numba-cuda 0.22.0__cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- _numba_cuda_redirector.pth +4 -0
- _numba_cuda_redirector.py +89 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +6 -0
- numba_cuda/_version.py +11 -0
- numba_cuda/numba/cuda/__init__.py +70 -0
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
- numba_cuda/numba/cuda/api.py +580 -0
- numba_cuda/numba/cuda/api_util.py +76 -0
- numba_cuda/numba/cuda/args.py +72 -0
- numba_cuda/numba/cuda/bf16.py +397 -0
- numba_cuda/numba/cuda/cache_hints.py +287 -0
- numba_cuda/numba/cuda/cext/__init__.py +2 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
- numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
- numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
- numba_cuda/numba/cuda/cext/_helperlib.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
- numba_cuda/numba/cuda/cext/_typeof.h +19 -0
- numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
- numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
- numba_cuda/numba/cuda/cext/mviewbuf.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
- numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
- numba_cuda/numba/cuda/cg.py +67 -0
- numba_cuda/numba/cuda/cgutils.py +1294 -0
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +541 -0
- numba_cuda/numba/cuda/compiler.py +1396 -0
- numba_cuda/numba/cuda/core/analysis.py +758 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
- numba_cuda/numba/cuda/core/base.py +1332 -0
- numba_cuda/numba/cuda/core/boxing.py +1411 -0
- numba_cuda/numba/cuda/core/bytecode.py +728 -0
- numba_cuda/numba/cuda/core/byteflow.py +2346 -0
- numba_cuda/numba/cuda/core/caching.py +744 -0
- numba_cuda/numba/cuda/core/callconv.py +392 -0
- numba_cuda/numba/cuda/core/codegen.py +171 -0
- numba_cuda/numba/cuda/core/compiler.py +199 -0
- numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +650 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/controlflow.py +989 -0
- numba_cuda/numba/cuda/core/entrypoints.py +57 -0
- numba_cuda/numba/cuda/core/environment.py +66 -0
- numba_cuda/numba/cuda/core/errors.py +917 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/generators.py +387 -0
- numba_cuda/numba/cuda/core/imputils.py +509 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
- numba_cuda/numba/cuda/core/interpreter.py +3617 -0
- numba_cuda/numba/cuda/core/ir.py +1812 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
- numba_cuda/numba/cuda/core/optional.py +129 -0
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
- numba_cuda/numba/cuda/core/registry.py +46 -0
- numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
- numba_cuda/numba/cuda/core/sigutils.py +68 -0
- numba_cuda/numba/cuda/core/ssa.py +498 -0
- numba_cuda/numba/cuda/core/targetconfig.py +330 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +956 -0
- numba_cuda/numba/cuda/core/typed_passes.py +867 -0
- numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
- numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
- numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
- numba_cuda/numba/cuda/cpython/iterators.py +167 -0
- numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
- numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
- numba_cuda/numba/cuda/cpython/slicing.py +322 -0
- numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
- numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
- numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
- numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
- numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
- numba_cuda/numba/cuda/cuda_paths.py +691 -0
- numba_cuda/numba/cuda/cudadecl.py +543 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
- numba_cuda/numba/cuda/cudadrv/error.py +48 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
- numba_cuda/numba/cuda/cudaimpl.py +983 -0
- numba_cuda/numba/cuda/cudamath.py +149 -0
- numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
- numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
- numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
- numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
- numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
- numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
- numba_cuda/numba/cuda/datamodel/manager.py +11 -0
- numba_cuda/numba/cuda/datamodel/models.py +9 -0
- numba_cuda/numba/cuda/datamodel/packer.py +9 -0
- numba_cuda/numba/cuda/datamodel/registry.py +11 -0
- numba_cuda/numba/cuda/datamodel/testing.py +11 -0
- numba_cuda/numba/cuda/debuginfo.py +997 -0
- numba_cuda/numba/cuda/decorators.py +294 -0
- numba_cuda/numba/cuda/descriptor.py +35 -0
- numba_cuda/numba/cuda/device_init.py +155 -0
- numba_cuda/numba/cuda/deviceufunc.py +1021 -0
- numba_cuda/numba/cuda/dispatcher.py +2463 -0
- numba_cuda/numba/cuda/errors.py +72 -0
- numba_cuda/numba/cuda/extending.py +697 -0
- numba_cuda/numba/cuda/flags.py +178 -0
- numba_cuda/numba/cuda/fp16.py +357 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +24 -0
- numba_cuda/numba/cuda/intrinsics.py +531 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +265 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3386 -0
- numba_cuda/numba/cuda/libdevicedecl.py +20 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
- numba_cuda/numba/cuda/locks.py +19 -0
- numba_cuda/numba/cuda/lowering.py +1980 -0
- numba_cuda/numba/cuda/mathimpl.py +374 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
- numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
- numba_cuda/numba/cuda/misc/appdirs.py +594 -0
- numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
- numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
- numba_cuda/numba/cuda/misc/dump_style.py +41 -0
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
- numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
- numba_cuda/numba/cuda/misc/literal.py +28 -0
- numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
- numba_cuda/numba/cuda/misc/special.py +94 -0
- numba_cuda/numba/cuda/models.py +56 -0
- numba_cuda/numba/cuda/np/arraymath.py +5130 -0
- numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
- numba_cuda/numba/cuda/np/extensions.py +11 -0
- numba_cuda/numba/cuda/np/linalg.py +3087 -0
- numba_cuda/numba/cuda/np/math/__init__.py +0 -0
- numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
- numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
- numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
- numba_cuda/numba/cuda/np/npdatetime.py +969 -0
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
- numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
- numba_cuda/numba/cuda/np/numpy_support.py +798 -0
- numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
- numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
- numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
- numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
- numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
- numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
- numba_cuda/numba/cuda/nvvmutils.py +254 -0
- numba_cuda/numba/cuda/printimpl.py +126 -0
- numba_cuda/numba/cuda/random.py +308 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
- numba_cuda/numba/cuda/serialize.py +267 -0
- numba_cuda/numba/cuda/simulator/__init__.py +63 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +179 -0
- numba_cuda/numba/cuda/simulator/bf16.py +4 -0
- numba_cuda/numba/cuda/simulator/compiler.py +38 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
- numba_cuda/numba/cuda/simulator/kernel.py +320 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
- numba_cuda/numba/cuda/simulator/reduction.py +19 -0
- numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
- numba_cuda/numba/cuda/simulator_init.py +18 -0
- numba_cuda/numba/cuda/stubs.py +624 -0
- numba_cuda/numba/cuda/target.py +505 -0
- numba_cuda/numba/cuda/testing.py +347 -0
- numba_cuda/numba/cuda/tests/__init__.py +62 -0
- numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
- numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
- numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
- numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
- numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
- numba_cuda/numba/cuda/tests/data/error.cu +12 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
- numba_cuda/numba/cuda/tests/support.py +900 -0
- numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
- numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
- numba_cuda/numba/cuda/typeconv/rules.py +63 -0
- numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
- numba_cuda/numba/cuda/types/__init__.py +233 -0
- numba_cuda/numba/cuda/types/__init__.pyi +167 -0
- numba_cuda/numba/cuda/types/abstract.py +9 -0
- numba_cuda/numba/cuda/types/common.py +9 -0
- numba_cuda/numba/cuda/types/containers.py +9 -0
- numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
- numba_cuda/numba/cuda/types/cuda_common.py +110 -0
- numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
- numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
- numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
- numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
- numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
- numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
- numba_cuda/numba/cuda/types/ext_types.py +101 -0
- numba_cuda/numba/cuda/types/function_type.py +11 -0
- numba_cuda/numba/cuda/types/functions.py +9 -0
- numba_cuda/numba/cuda/types/iterators.py +9 -0
- numba_cuda/numba/cuda/types/misc.py +9 -0
- numba_cuda/numba/cuda/types/npytypes.py +9 -0
- numba_cuda/numba/cuda/types/scalars.py +9 -0
- numba_cuda/numba/cuda/typing/__init__.py +19 -0
- numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
- numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
- numba_cuda/numba/cuda/typing/bufproto.py +70 -0
- numba_cuda/numba/cuda/typing/builtins.py +1209 -0
- numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
- numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
- numba_cuda/numba/cuda/typing/collections.py +138 -0
- numba_cuda/numba/cuda/typing/context.py +782 -0
- numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
- numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/listdecl.py +147 -0
- numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
- numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
- numba_cuda/numba/cuda/typing/npydecl.py +749 -0
- numba_cuda/numba/cuda/typing/setdecl.py +115 -0
- numba_cuda/numba/cuda/typing/templates.py +1446 -0
- numba_cuda/numba/cuda/typing/typeof.py +301 -0
- numba_cuda/numba/cuda/ufuncs.py +746 -0
- numba_cuda/numba/cuda/utils.py +724 -0
- numba_cuda/numba/cuda/vector_types.py +214 -0
- numba_cuda/numba/cuda/vectorizers.py +260 -0
- numba_cuda-0.22.0.dist-info/METADATA +109 -0
- numba_cuda-0.22.0.dist-info/RECORD +487 -0
- numba_cuda-0.22.0.dist-info/WHEEL +6 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import sys
|
|
5
|
+
import os
|
|
6
|
+
import multiprocessing as mp
|
|
7
|
+
import warnings
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
from numba.cuda.core.config import IS_WIN32
|
|
11
|
+
from numba.cuda.core.errors import NumbaWarning
|
|
12
|
+
from numba.cuda.cudadrv import nvvm
|
|
13
|
+
from numba.cuda.testing import (
|
|
14
|
+
unittest,
|
|
15
|
+
skip_on_cudasim,
|
|
16
|
+
skip_unless_conda_cudatoolkit,
|
|
17
|
+
)
|
|
18
|
+
from numba.cuda.cuda_paths import (
|
|
19
|
+
_get_libdevice_path_decision,
|
|
20
|
+
_get_nvvm_path_decision,
|
|
21
|
+
_get_cudalib_dir_path_decision,
|
|
22
|
+
get_system_ctk,
|
|
23
|
+
get_system_ctk_libdir,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
has_cuda = nvvm.is_available()
|
|
28
|
+
has_mp_get_context = hasattr(mp, "get_context")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class LibraryLookupBase(unittest.TestCase):
|
|
32
|
+
def setUp(self):
|
|
33
|
+
ctx = mp.get_context("spawn")
|
|
34
|
+
|
|
35
|
+
qrecv = ctx.Queue()
|
|
36
|
+
qsend = ctx.Queue()
|
|
37
|
+
self.qsend = qsend
|
|
38
|
+
self.qrecv = qrecv
|
|
39
|
+
self.child_process = ctx.Process(
|
|
40
|
+
target=check_lib_lookup,
|
|
41
|
+
args=(qrecv, qsend),
|
|
42
|
+
daemon=True,
|
|
43
|
+
)
|
|
44
|
+
self.child_process.start()
|
|
45
|
+
|
|
46
|
+
def tearDown(self):
|
|
47
|
+
self.qsend.put(self.do_terminate)
|
|
48
|
+
self.child_process.join(3)
|
|
49
|
+
# Ensure the process is terminated
|
|
50
|
+
self.assertIsNotNone(self.child_process)
|
|
51
|
+
|
|
52
|
+
def remote_do(self, action):
|
|
53
|
+
self.qsend.put(action)
|
|
54
|
+
out = self.qrecv.get()
|
|
55
|
+
self.assertNotIsInstance(out, BaseException)
|
|
56
|
+
return out
|
|
57
|
+
|
|
58
|
+
@staticmethod
|
|
59
|
+
def do_terminate():
|
|
60
|
+
return False, None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def remove_env(name):
|
|
64
|
+
try:
|
|
65
|
+
del os.environ[name]
|
|
66
|
+
except KeyError:
|
|
67
|
+
return False
|
|
68
|
+
else:
|
|
69
|
+
return True
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def check_lib_lookup(qout, qin):
|
|
73
|
+
status = True
|
|
74
|
+
while status:
|
|
75
|
+
try:
|
|
76
|
+
action = qin.get()
|
|
77
|
+
except Exception as e:
|
|
78
|
+
qout.put(e)
|
|
79
|
+
status = False
|
|
80
|
+
else:
|
|
81
|
+
try:
|
|
82
|
+
with warnings.catch_warnings(record=True) as w:
|
|
83
|
+
warnings.simplefilter("always", NumbaWarning)
|
|
84
|
+
status, result = action()
|
|
85
|
+
qout.put(result + (w,))
|
|
86
|
+
except Exception as e:
|
|
87
|
+
qout.put(e)
|
|
88
|
+
status = False
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@skip_on_cudasim("Library detection unsupported in the simulator")
|
|
92
|
+
@unittest.skipUnless(has_mp_get_context, "mp.get_context not available")
|
|
93
|
+
@skip_unless_conda_cudatoolkit("test assumes conda installed cudatoolkit")
|
|
94
|
+
class TestLibDeviceLookUp(LibraryLookupBase):
|
|
95
|
+
def test_libdevice_path_decision(self):
|
|
96
|
+
# Check that the default is using conda environment
|
|
97
|
+
by, info, warns = self.remote_do(self.do_clear_envs)
|
|
98
|
+
if has_cuda:
|
|
99
|
+
self.assertEqual(by, "Conda environment")
|
|
100
|
+
else:
|
|
101
|
+
self.assertEqual(by, "<unknown>")
|
|
102
|
+
self.assertIsNone(info)
|
|
103
|
+
self.assertFalse(warns)
|
|
104
|
+
# Check that CUDA_HOME works by removing conda-env
|
|
105
|
+
by, info, warns = self.remote_do(self.do_set_cuda_home)
|
|
106
|
+
self.assertEqual(by, "CUDA_HOME")
|
|
107
|
+
self.assertTrue(
|
|
108
|
+
info.startswith(os.path.join("mycudahome", "nvvm", "libdevice"))
|
|
109
|
+
)
|
|
110
|
+
self.assertFalse(warns)
|
|
111
|
+
|
|
112
|
+
if get_system_ctk("nvvm", "libdevice") is None:
|
|
113
|
+
# Fake remove conda environment so no cudatoolkit is available
|
|
114
|
+
by, info, warns = self.remote_do(self.do_clear_envs)
|
|
115
|
+
self.assertEqual(by, "<unknown>")
|
|
116
|
+
self.assertIsNone(info)
|
|
117
|
+
self.assertFalse(warns)
|
|
118
|
+
else:
|
|
119
|
+
# Use system available cudatoolkit
|
|
120
|
+
by, info, warns = self.remote_do(self.do_clear_envs)
|
|
121
|
+
self.assertEqual(by, "System")
|
|
122
|
+
self.assertFalse(warns)
|
|
123
|
+
|
|
124
|
+
@staticmethod
|
|
125
|
+
def do_clear_envs():
|
|
126
|
+
remove_env("CUDA_HOME")
|
|
127
|
+
remove_env("CUDA_PATH")
|
|
128
|
+
return True, _get_libdevice_path_decision()
|
|
129
|
+
|
|
130
|
+
@staticmethod
|
|
131
|
+
def do_set_cuda_home():
|
|
132
|
+
os.environ["CUDA_HOME"] = os.path.join("mycudahome")
|
|
133
|
+
_fake_non_conda_env()
|
|
134
|
+
return True, _get_libdevice_path_decision()
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@skip_on_cudasim("Library detection unsupported in the simulator")
|
|
138
|
+
@unittest.skipUnless(has_mp_get_context, "mp.get_context not available")
|
|
139
|
+
@skip_unless_conda_cudatoolkit("test assumes conda installed cudatoolkit")
|
|
140
|
+
class TestNvvmLookUp(LibraryLookupBase):
|
|
141
|
+
def test_nvvm_path_decision(self):
|
|
142
|
+
# Check that the default is using conda environment
|
|
143
|
+
by, info, warns = self.remote_do(self.do_clear_envs)
|
|
144
|
+
if has_cuda:
|
|
145
|
+
self.assertEqual(by, "Conda environment")
|
|
146
|
+
else:
|
|
147
|
+
self.assertEqual(by, "<unknown>")
|
|
148
|
+
self.assertIsNone(info)
|
|
149
|
+
self.assertFalse(warns)
|
|
150
|
+
# Check that CUDA_HOME works by removing conda-env
|
|
151
|
+
by, info, warns = self.remote_do(self.do_set_cuda_home)
|
|
152
|
+
self.assertEqual(by, "CUDA_HOME")
|
|
153
|
+
self.assertFalse(warns)
|
|
154
|
+
if IS_WIN32:
|
|
155
|
+
self.assertEqual(
|
|
156
|
+
os.path.dirname(info), os.path.join("mycudahome", "nvvm", "bin")
|
|
157
|
+
)
|
|
158
|
+
else:
|
|
159
|
+
self.assertEqual(
|
|
160
|
+
os.path.dirname(info),
|
|
161
|
+
os.path.join("mycudahome", "nvvm", "lib64"),
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
if get_system_ctk("nvvm") is None:
|
|
165
|
+
# Fake remove conda environment so no cudatoolkit is available
|
|
166
|
+
by, info, warns = self.remote_do(self.do_clear_envs)
|
|
167
|
+
self.assertEqual(by, "<unknown>")
|
|
168
|
+
self.assertIsNone(info)
|
|
169
|
+
self.assertFalse(warns)
|
|
170
|
+
else:
|
|
171
|
+
# Use system available cudatoolkit
|
|
172
|
+
by, info, warns = self.remote_do(self.do_clear_envs)
|
|
173
|
+
self.assertEqual(by, "System")
|
|
174
|
+
self.assertFalse(warns)
|
|
175
|
+
|
|
176
|
+
@staticmethod
|
|
177
|
+
def do_clear_envs():
|
|
178
|
+
remove_env("CUDA_HOME")
|
|
179
|
+
remove_env("CUDA_PATH")
|
|
180
|
+
return True, _get_nvvm_path_decision()
|
|
181
|
+
|
|
182
|
+
@staticmethod
|
|
183
|
+
def do_set_cuda_home():
|
|
184
|
+
os.environ["CUDA_HOME"] = os.path.join("mycudahome")
|
|
185
|
+
_fake_non_conda_env()
|
|
186
|
+
return True, _get_nvvm_path_decision()
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
@skip_on_cudasim("Library detection unsupported in the simulator")
|
|
190
|
+
@unittest.skipUnless(has_mp_get_context, "mp.get_context not available")
|
|
191
|
+
@skip_unless_conda_cudatoolkit("test assumes conda installed cudatoolkit")
|
|
192
|
+
class TestCudaLibLookUp(LibraryLookupBase):
|
|
193
|
+
def test_cudalib_path_decision(self):
|
|
194
|
+
# Check that the default is using conda environment
|
|
195
|
+
by, info, warns = self.remote_do(self.do_clear_envs)
|
|
196
|
+
if has_cuda:
|
|
197
|
+
self.assertEqual(by, "Conda environment")
|
|
198
|
+
else:
|
|
199
|
+
self.assertEqual(by, "<unknown>")
|
|
200
|
+
self.assertIsNone(info)
|
|
201
|
+
self.assertFalse(warns)
|
|
202
|
+
|
|
203
|
+
# Check that CUDA_HOME works by removing conda-env
|
|
204
|
+
self.remote_do(self.do_clear_envs)
|
|
205
|
+
by, info, warns = self.remote_do(self.do_set_cuda_home)
|
|
206
|
+
self.assertEqual(by, "CUDA_HOME")
|
|
207
|
+
self.assertFalse(warns)
|
|
208
|
+
if IS_WIN32:
|
|
209
|
+
# I think only wheels don't have the "Library" directory?
|
|
210
|
+
self.assertTrue(
|
|
211
|
+
info
|
|
212
|
+
in (
|
|
213
|
+
os.path.join("mycudahome", "bin"),
|
|
214
|
+
os.path.join("mycudahome", "Library", "bin"),
|
|
215
|
+
)
|
|
216
|
+
)
|
|
217
|
+
else:
|
|
218
|
+
self.assertEqual(info, os.path.join("mycudahome", "lib64"))
|
|
219
|
+
if get_system_ctk_libdir() is None:
|
|
220
|
+
# Fake remove conda environment so no cudatoolkit is available
|
|
221
|
+
by, info, warns = self.remote_do(self.do_clear_envs)
|
|
222
|
+
self.assertEqual(by, "<unknown>")
|
|
223
|
+
self.assertIsNone(info)
|
|
224
|
+
self.assertFalse(warns)
|
|
225
|
+
else:
|
|
226
|
+
# Use system available cudatoolkit
|
|
227
|
+
by, info, warns = self.remote_do(self.do_clear_envs)
|
|
228
|
+
self.assertEqual(by, "System")
|
|
229
|
+
self.assertFalse(warns)
|
|
230
|
+
|
|
231
|
+
@staticmethod
|
|
232
|
+
def do_clear_envs():
|
|
233
|
+
remove_env("CUDA_HOME")
|
|
234
|
+
remove_env("CUDA_PATH")
|
|
235
|
+
return True, _get_cudalib_dir_path_decision()
|
|
236
|
+
|
|
237
|
+
@staticmethod
|
|
238
|
+
def do_set_cuda_home():
|
|
239
|
+
os.environ["CUDA_HOME"] = os.path.join("mycudahome")
|
|
240
|
+
_fake_non_conda_env()
|
|
241
|
+
return True, _get_cudalib_dir_path_decision()
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _fake_non_conda_env():
|
|
245
|
+
"""
|
|
246
|
+
Monkeypatch sys.prefix to hide the fact we are in a conda-env
|
|
247
|
+
"""
|
|
248
|
+
sys.prefix = ""
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
if __name__ == "__main__":
|
|
252
|
+
unittest.main()
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from numba.cuda.cudadrv import nvvm
|
|
5
|
+
from numba.cuda.testing import skip_on_cudasim
|
|
6
|
+
from numba.cuda import utils
|
|
7
|
+
|
|
8
|
+
from llvmlite import ir
|
|
9
|
+
from llvmlite import binding as llvm
|
|
10
|
+
|
|
11
|
+
import unittest
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
original = (
|
|
15
|
+
"call void @llvm.memset.p0i8.i64("
|
|
16
|
+
"i8* align 4 %arg.x.41, i8 0, i64 %0, i1 false)"
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
missing_align = (
|
|
20
|
+
"call void @llvm.memset.p0i8.i64(i8* %arg.x.41, i8 0, i64 %0, i1 false)"
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@skip_on_cudasim("libNVVM not supported in simulator")
|
|
25
|
+
@unittest.skipIf(utils.MACHINE_BITS == 32, "CUDA not support for 32-bit")
|
|
26
|
+
@unittest.skipIf(not nvvm.is_available(), "No libNVVM")
|
|
27
|
+
class TestNvvmWithoutCuda(unittest.TestCase):
|
|
28
|
+
def test_nvvm_accepts_encoding(self):
|
|
29
|
+
# Test that NVVM will accept a constant containing all possible 8-bit
|
|
30
|
+
# characters. Taken from the test case added in llvmlite PR #53:
|
|
31
|
+
#
|
|
32
|
+
# https://github.com/numba/llvmlite/pull/53
|
|
33
|
+
#
|
|
34
|
+
# This test case is included in Numba to ensure that the encoding used
|
|
35
|
+
# by llvmlite (e.g. utf-8, latin1, etc.) does not result in an input to
|
|
36
|
+
# NVVM that it cannot parse correctly
|
|
37
|
+
|
|
38
|
+
# Create a module with a constant containing all 8-bit characters
|
|
39
|
+
c = ir.Constant(ir.ArrayType(ir.IntType(8), 256), bytearray(range(256)))
|
|
40
|
+
m = ir.Module()
|
|
41
|
+
m.triple = "nvptx64-nvidia-cuda"
|
|
42
|
+
nvvm.add_ir_version(m)
|
|
43
|
+
gv = ir.GlobalVariable(m, c.type, "myconstant")
|
|
44
|
+
gv.global_constant = True
|
|
45
|
+
gv.initializer = c
|
|
46
|
+
m.data_layout = nvvm.NVVM().data_layout
|
|
47
|
+
|
|
48
|
+
# Parse with LLVM then dump the parsed module into NVVM
|
|
49
|
+
parsed = llvm.parse_assembly(str(m))
|
|
50
|
+
ptx = nvvm.compile_ir(str(parsed))
|
|
51
|
+
|
|
52
|
+
# Ensure all characters appear in the generated constant array.
|
|
53
|
+
elements = ", ".join([str(i) for i in range(256)])
|
|
54
|
+
myconstant = f"myconstant[256] = {{{elements}}}".encode("utf-8")
|
|
55
|
+
self.assertIn(myconstant, ptx)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
if __name__ == "__main__":
|
|
59
|
+
unittest.main()
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from numba.cuda.tests import load_testsuite
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def load_tests(loader, tests, pattern):
|
|
9
|
+
return load_testsuite(loader, os.path.dirname(__file__))
|
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import re
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import unittest
|
|
9
|
+
from numba.cuda.testing import CUDATestCase, skip_on_cudasim
|
|
10
|
+
from numba.cuda.tests.support import run_in_subprocess, override_config
|
|
11
|
+
from numba.cuda import get_current_device
|
|
12
|
+
from numba.cuda.cudadrv.nvrtc import compile
|
|
13
|
+
from numba.cuda import types
|
|
14
|
+
from numba.cuda.typing import signature
|
|
15
|
+
from numba import cuda
|
|
16
|
+
from numba.cuda import config
|
|
17
|
+
from numba.cuda.typing.templates import AbstractTemplate
|
|
18
|
+
from numba.cuda.cudadrv.linkable_code import (
|
|
19
|
+
CUSource,
|
|
20
|
+
PTXSource,
|
|
21
|
+
Fatbin,
|
|
22
|
+
Cubin,
|
|
23
|
+
Archive,
|
|
24
|
+
Object,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR")
|
|
28
|
+
|
|
29
|
+
if not config.ENABLE_CUDASIM:
|
|
30
|
+
from numba.cuda.memory_management.nrt import rtsys, get_include
|
|
31
|
+
from numba.cuda.cudadecl import registry as cuda_decl_registry
|
|
32
|
+
from numba.cuda.cudaimpl import lower as cuda_lower
|
|
33
|
+
|
|
34
|
+
def allocate_deallocate_handle():
|
|
35
|
+
"""
|
|
36
|
+
Handle to call NRT_Allocate and NRT_Free
|
|
37
|
+
"""
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
@cuda_decl_registry.register_global(allocate_deallocate_handle)
|
|
41
|
+
class AllocateShimImpl(AbstractTemplate):
|
|
42
|
+
def generic(self, args, kws):
|
|
43
|
+
return signature(types.void)
|
|
44
|
+
|
|
45
|
+
device_fun_shim = cuda.declare_device(
|
|
46
|
+
"device_allocate_deallocate", types.int32()
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# wrapper to turn the above into a python callable
|
|
50
|
+
def call_device_fun_shim():
|
|
51
|
+
return device_fun_shim()
|
|
52
|
+
|
|
53
|
+
@cuda_lower(allocate_deallocate_handle)
|
|
54
|
+
def allocate_deallocate_impl(context, builder, sig, args):
|
|
55
|
+
sig_ = types.int32()
|
|
56
|
+
# call the external function, passing the pointer
|
|
57
|
+
result = context.compile_internal(
|
|
58
|
+
builder,
|
|
59
|
+
call_device_fun_shim,
|
|
60
|
+
sig_,
|
|
61
|
+
(),
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
return result
|
|
65
|
+
|
|
66
|
+
if TEST_BIN_DIR:
|
|
67
|
+
|
|
68
|
+
def make_linkable_code(name, kind, mode):
|
|
69
|
+
path = os.path.join(TEST_BIN_DIR, name)
|
|
70
|
+
with open(path, mode) as f:
|
|
71
|
+
contents = f.read()
|
|
72
|
+
return kind(contents, nrt=True)
|
|
73
|
+
|
|
74
|
+
nrt_extern_a = make_linkable_code("nrt_extern.a", Archive, "rb")
|
|
75
|
+
nrt_extern_cubin = make_linkable_code("nrt_extern.cubin", Cubin, "rb")
|
|
76
|
+
nrt_extern_cu = make_linkable_code(
|
|
77
|
+
"nrt_extern.cu",
|
|
78
|
+
CUSource,
|
|
79
|
+
"rb",
|
|
80
|
+
)
|
|
81
|
+
nrt_extern_fatbin = make_linkable_code(
|
|
82
|
+
"nrt_extern.fatbin", Fatbin, "rb"
|
|
83
|
+
)
|
|
84
|
+
nrt_extern_fatbin_multi = make_linkable_code(
|
|
85
|
+
"nrt_extern_multi.fatbin", Fatbin, "rb"
|
|
86
|
+
)
|
|
87
|
+
nrt_extern_o = make_linkable_code("nrt_extern.o", Object, "rb")
|
|
88
|
+
nrt_extern_ptx = make_linkable_code("nrt_extern.ptx", PTXSource, "rb")
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class TestNrtBasic(CUDATestCase):
|
|
92
|
+
def run(self, result=None):
|
|
93
|
+
with override_config("CUDA_ENABLE_NRT", True):
|
|
94
|
+
super(TestNrtBasic, self).run(result)
|
|
95
|
+
|
|
96
|
+
def test_nrt_launches(self):
|
|
97
|
+
@cuda.jit
|
|
98
|
+
def f(x):
|
|
99
|
+
return x[:5]
|
|
100
|
+
|
|
101
|
+
@cuda.jit
|
|
102
|
+
def g():
|
|
103
|
+
x = np.empty(10, np.int64)
|
|
104
|
+
f(x)
|
|
105
|
+
|
|
106
|
+
g[1, 1]()
|
|
107
|
+
cuda.synchronize()
|
|
108
|
+
|
|
109
|
+
@skip_on_cudasim("CUDA Simulator does not produce PTX")
|
|
110
|
+
def test_nrt_ptx_contains_refcount(self):
|
|
111
|
+
@cuda.jit
|
|
112
|
+
def f(x):
|
|
113
|
+
return x[:5]
|
|
114
|
+
|
|
115
|
+
@cuda.jit
|
|
116
|
+
def g():
|
|
117
|
+
x = np.empty(10, np.int64)
|
|
118
|
+
f(x)
|
|
119
|
+
|
|
120
|
+
g[1, 1]()
|
|
121
|
+
|
|
122
|
+
ptx = next(iter(g.inspect_asm().values()))
|
|
123
|
+
|
|
124
|
+
# The following checks that a `call` PTX instruction is
|
|
125
|
+
# emitted for NRT_MemInfo_alloc_aligned, NRT_incref and
|
|
126
|
+
# NRT_decref
|
|
127
|
+
p1 = r"call\.uni(.|\n)*NRT_MemInfo_alloc_aligned"
|
|
128
|
+
match = re.search(p1, ptx)
|
|
129
|
+
assert match is not None
|
|
130
|
+
|
|
131
|
+
p2 = r"call\.uni.*\n?.*NRT_incref"
|
|
132
|
+
match = re.search(p2, ptx)
|
|
133
|
+
assert match is not None
|
|
134
|
+
|
|
135
|
+
p3 = r"call\.uni.*\n?.*NRT_decref"
|
|
136
|
+
match = re.search(p3, ptx)
|
|
137
|
+
assert match is not None
|
|
138
|
+
|
|
139
|
+
def test_nrt_returns_correct(self):
|
|
140
|
+
@cuda.jit
|
|
141
|
+
def f(x):
|
|
142
|
+
return x[5:]
|
|
143
|
+
|
|
144
|
+
@cuda.jit
|
|
145
|
+
def g(out_ary):
|
|
146
|
+
x = np.empty(10, np.int64)
|
|
147
|
+
x[5] = 1
|
|
148
|
+
y = f(x)
|
|
149
|
+
out_ary[0] = y[0]
|
|
150
|
+
|
|
151
|
+
out_ary = np.zeros(1, dtype=np.int64)
|
|
152
|
+
|
|
153
|
+
g[1, 1](out_ary)
|
|
154
|
+
|
|
155
|
+
self.assertEqual(out_ary[0], 1)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class TestNrtLinking(CUDATestCase):
|
|
159
|
+
def run(self, result=None):
|
|
160
|
+
with override_config("CUDA_ENABLE_NRT", True):
|
|
161
|
+
super(TestNrtLinking, self).run(result)
|
|
162
|
+
|
|
163
|
+
@skip_on_cudasim("CUDA Simulator does not link PTX")
|
|
164
|
+
def test_nrt_detect_linked_ptx_file(self):
|
|
165
|
+
src = f"#include <{get_include()}/nrt.cuh>"
|
|
166
|
+
src += """
|
|
167
|
+
extern "C" __device__ int device_allocate_deallocate(int* nb_retval){
|
|
168
|
+
auto ptr = NRT_Allocate(1);
|
|
169
|
+
NRT_Free(ptr);
|
|
170
|
+
return 0;
|
|
171
|
+
}
|
|
172
|
+
"""
|
|
173
|
+
cc = get_current_device().compute_capability
|
|
174
|
+
ptx, _ = compile(src, "external_nrt.cu", cc)
|
|
175
|
+
|
|
176
|
+
@cuda.jit(link=[PTXSource(ptx.code, nrt=True)])
|
|
177
|
+
def kernel():
|
|
178
|
+
allocate_deallocate_handle()
|
|
179
|
+
|
|
180
|
+
kernel[1, 1]()
|
|
181
|
+
|
|
182
|
+
@unittest.skipIf(not TEST_BIN_DIR, "necessary binaries not generated.")
|
|
183
|
+
@skip_on_cudasim("CUDA Simulator does not link code")
|
|
184
|
+
def test_nrt_detect_linkable_code(self):
|
|
185
|
+
codes = (
|
|
186
|
+
nrt_extern_a,
|
|
187
|
+
nrt_extern_cubin,
|
|
188
|
+
nrt_extern_cu,
|
|
189
|
+
nrt_extern_fatbin,
|
|
190
|
+
nrt_extern_fatbin_multi,
|
|
191
|
+
nrt_extern_o,
|
|
192
|
+
nrt_extern_ptx,
|
|
193
|
+
)
|
|
194
|
+
for code in codes:
|
|
195
|
+
with self.subTest(code=code):
|
|
196
|
+
|
|
197
|
+
@cuda.jit(link=[code])
|
|
198
|
+
def kernel():
|
|
199
|
+
allocate_deallocate_handle()
|
|
200
|
+
|
|
201
|
+
kernel[1, 1]()
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
@skip_on_cudasim("CUDASIM does not have NRT statistics")
|
|
205
|
+
class TestNrtStatistics(CUDATestCase):
|
|
206
|
+
def setUp(self):
|
|
207
|
+
self._stream = cuda.default_stream()
|
|
208
|
+
# Store the current stats state
|
|
209
|
+
self.__stats_state = rtsys.memsys_stats_enabled(self._stream)
|
|
210
|
+
|
|
211
|
+
def tearDown(self):
|
|
212
|
+
# Set stats state back to whatever it was before the test ran
|
|
213
|
+
if self.__stats_state:
|
|
214
|
+
rtsys.memsys_enable_stats(self._stream)
|
|
215
|
+
else:
|
|
216
|
+
rtsys.memsys_disable_stats(self._stream)
|
|
217
|
+
|
|
218
|
+
def test_stats_env_var_explicit_on(self):
|
|
219
|
+
# Checks that explicitly turning the stats on via the env var works.
|
|
220
|
+
src = """if 1:
|
|
221
|
+
from numba import cuda
|
|
222
|
+
from numba.cuda.memory_management import rtsys
|
|
223
|
+
import numpy as np
|
|
224
|
+
|
|
225
|
+
@cuda.jit
|
|
226
|
+
def foo():
|
|
227
|
+
x = np.arange(10)[0]
|
|
228
|
+
|
|
229
|
+
# initialize the NRT before use
|
|
230
|
+
rtsys.initialize()
|
|
231
|
+
assert rtsys.memsys_stats_enabled(), "Stats not enabled"
|
|
232
|
+
orig_stats = rtsys.get_allocation_stats()
|
|
233
|
+
foo[1, 1]()
|
|
234
|
+
new_stats = rtsys.get_allocation_stats()
|
|
235
|
+
total_alloc = new_stats.alloc - orig_stats.alloc
|
|
236
|
+
total_free = new_stats.free - orig_stats.free
|
|
237
|
+
total_mi_alloc = new_stats.mi_alloc - orig_stats.mi_alloc
|
|
238
|
+
total_mi_free = new_stats.mi_free - orig_stats.mi_free
|
|
239
|
+
|
|
240
|
+
expected = 1
|
|
241
|
+
assert total_alloc == expected, \\
|
|
242
|
+
f"total_alloc != expected, {total_alloc} != {expected}"
|
|
243
|
+
assert total_free == expected, \\
|
|
244
|
+
f"total_free != expected, {total_free} != {expected}"
|
|
245
|
+
assert total_mi_alloc == expected, \\
|
|
246
|
+
f"total_mi_alloc != expected, {total_mi_alloc} != {expected}"
|
|
247
|
+
assert total_mi_free == expected, \\
|
|
248
|
+
f"total_mi_free != expected, {total_mi_free} != {expected}"
|
|
249
|
+
"""
|
|
250
|
+
|
|
251
|
+
# Check env var explicitly being set works
|
|
252
|
+
env = os.environ.copy()
|
|
253
|
+
env["NUMBA_CUDA_NRT_STATS"] = "1"
|
|
254
|
+
env["NUMBA_CUDA_ENABLE_NRT"] = "1"
|
|
255
|
+
run_in_subprocess(src, env=env)
|
|
256
|
+
|
|
257
|
+
def check_env_var_off(self, env):
|
|
258
|
+
src = """if 1:
|
|
259
|
+
from numba import cuda
|
|
260
|
+
import numpy as np
|
|
261
|
+
from numba.cuda.memory_management import rtsys
|
|
262
|
+
|
|
263
|
+
@cuda.jit
|
|
264
|
+
def foo():
|
|
265
|
+
arr = np.arange(10)[0]
|
|
266
|
+
|
|
267
|
+
assert rtsys.memsys_stats_enabled() == False
|
|
268
|
+
try:
|
|
269
|
+
rtsys.get_allocation_stats()
|
|
270
|
+
except RuntimeError as e:
|
|
271
|
+
assert "NRT stats are disabled." in str(e)
|
|
272
|
+
"""
|
|
273
|
+
run_in_subprocess(src, env=env)
|
|
274
|
+
|
|
275
|
+
def test_stats_env_var_explicit_off(self):
|
|
276
|
+
# Checks that explicitly turning the stats off via the env var works.
|
|
277
|
+
env = os.environ.copy()
|
|
278
|
+
env["NUMBA_CUDA_NRT_STATS"] = "0"
|
|
279
|
+
self.check_env_var_off(env)
|
|
280
|
+
|
|
281
|
+
def test_stats_env_var_default_off(self):
|
|
282
|
+
# Checks that the env var not being set is the same as "off", i.e.
|
|
283
|
+
# default for Numba is off.
|
|
284
|
+
env = os.environ.copy()
|
|
285
|
+
env.pop("NUMBA_CUDA_NRT_STATS", None)
|
|
286
|
+
self.check_env_var_off(env)
|
|
287
|
+
|
|
288
|
+
def test_stats_status_toggle(self):
|
|
289
|
+
@cuda.jit
|
|
290
|
+
def foo():
|
|
291
|
+
tmp = np.ones(3)
|
|
292
|
+
arr = np.arange(5 * tmp[0]) # noqa: F841
|
|
293
|
+
return None
|
|
294
|
+
|
|
295
|
+
with (
|
|
296
|
+
override_config("CUDA_ENABLE_NRT", True),
|
|
297
|
+
override_config("CUDA_NRT_STATS", True),
|
|
298
|
+
):
|
|
299
|
+
# Switch on stats
|
|
300
|
+
rtsys.memsys_enable_stats()
|
|
301
|
+
# check the stats are on
|
|
302
|
+
self.assertTrue(rtsys.memsys_stats_enabled())
|
|
303
|
+
|
|
304
|
+
for i in range(2):
|
|
305
|
+
# capture the stats state
|
|
306
|
+
stats_1 = rtsys.get_allocation_stats()
|
|
307
|
+
# Switch off stats
|
|
308
|
+
rtsys.memsys_disable_stats()
|
|
309
|
+
# check the stats are off
|
|
310
|
+
self.assertFalse(rtsys.memsys_stats_enabled())
|
|
311
|
+
# run something that would move the counters were they enabled
|
|
312
|
+
foo[1, 1]()
|
|
313
|
+
# Switch on stats
|
|
314
|
+
rtsys.memsys_enable_stats()
|
|
315
|
+
# check the stats are on
|
|
316
|
+
self.assertTrue(rtsys.memsys_stats_enabled())
|
|
317
|
+
# capture the stats state (should not have changed)
|
|
318
|
+
stats_2 = rtsys.get_allocation_stats()
|
|
319
|
+
# run something that will move the counters
|
|
320
|
+
foo[1, 1]()
|
|
321
|
+
# capture the stats state (should have changed)
|
|
322
|
+
stats_3 = rtsys.get_allocation_stats()
|
|
323
|
+
# check stats_1 == stats_2
|
|
324
|
+
self.assertEqual(stats_1, stats_2)
|
|
325
|
+
# check stats_2 < stats_3
|
|
326
|
+
self.assertLess(stats_2, stats_3)
|
|
327
|
+
|
|
328
|
+
def test_rtsys_stats_query_raises_exception_when_disabled(self):
|
|
329
|
+
# Checks that the standard rtsys.get_allocation_stats() query raises
|
|
330
|
+
# when stats counters are turned off.
|
|
331
|
+
|
|
332
|
+
rtsys.memsys_disable_stats()
|
|
333
|
+
self.assertFalse(rtsys.memsys_stats_enabled())
|
|
334
|
+
|
|
335
|
+
with self.assertRaises(RuntimeError) as raises:
|
|
336
|
+
rtsys.get_allocation_stats()
|
|
337
|
+
|
|
338
|
+
self.assertIn("NRT stats are disabled.", str(raises.exception))
|
|
339
|
+
|
|
340
|
+
def test_nrt_explicit_stats_query_raises_exception_when_disabled(self):
|
|
341
|
+
# Checks the various memsys_get_stats functions raise if queried when
|
|
342
|
+
# the stats counters are disabled.
|
|
343
|
+
method_variations = ("alloc", "free", "mi_alloc", "mi_free")
|
|
344
|
+
for meth in method_variations:
|
|
345
|
+
stats_func = getattr(rtsys, f"memsys_get_stats_{meth}")
|
|
346
|
+
with self.subTest(stats_func=stats_func):
|
|
347
|
+
# Turn stats off
|
|
348
|
+
rtsys.memsys_disable_stats()
|
|
349
|
+
self.assertFalse(rtsys.memsys_stats_enabled())
|
|
350
|
+
with self.assertRaises(RuntimeError) as raises:
|
|
351
|
+
stats_func()
|
|
352
|
+
self.assertIn("NRT stats are disabled.", str(raises.exception))
|
|
353
|
+
|
|
354
|
+
def test_read_one_stat(self):
|
|
355
|
+
@cuda.jit
|
|
356
|
+
def foo():
|
|
357
|
+
tmp = np.ones(3)
|
|
358
|
+
arr = np.arange(5 * tmp[0]) # noqa: F841
|
|
359
|
+
return None
|
|
360
|
+
|
|
361
|
+
with (
|
|
362
|
+
override_config("CUDA_ENABLE_NRT", True),
|
|
363
|
+
override_config("CUDA_NRT_STATS", True),
|
|
364
|
+
):
|
|
365
|
+
# Switch on stats
|
|
366
|
+
rtsys.memsys_enable_stats()
|
|
367
|
+
|
|
368
|
+
# Launch the kernel a couple of times to increase stats
|
|
369
|
+
foo[1, 1]()
|
|
370
|
+
foo[1, 1]()
|
|
371
|
+
|
|
372
|
+
# Get stats struct and individual stats
|
|
373
|
+
stats = rtsys.get_allocation_stats()
|
|
374
|
+
stats_alloc = rtsys.memsys_get_stats_alloc()
|
|
375
|
+
stats_mi_alloc = rtsys.memsys_get_stats_mi_alloc()
|
|
376
|
+
stats_free = rtsys.memsys_get_stats_free()
|
|
377
|
+
stats_mi_free = rtsys.memsys_get_stats_mi_free()
|
|
378
|
+
|
|
379
|
+
# Check individual stats match stats struct
|
|
380
|
+
self.assertEqual(stats.alloc, stats_alloc)
|
|
381
|
+
self.assertEqual(stats.mi_alloc, stats_mi_alloc)
|
|
382
|
+
self.assertEqual(stats.free, stats_free)
|
|
383
|
+
self.assertEqual(stats.mi_free, stats_mi_free)
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
if __name__ == "__main__":
|
|
387
|
+
unittest.main()
|