numba-cuda 0.22.0__cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- _numba_cuda_redirector.pth +4 -0
- _numba_cuda_redirector.py +89 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +6 -0
- numba_cuda/_version.py +11 -0
- numba_cuda/numba/cuda/__init__.py +70 -0
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
- numba_cuda/numba/cuda/api.py +580 -0
- numba_cuda/numba/cuda/api_util.py +76 -0
- numba_cuda/numba/cuda/args.py +72 -0
- numba_cuda/numba/cuda/bf16.py +397 -0
- numba_cuda/numba/cuda/cache_hints.py +287 -0
- numba_cuda/numba/cuda/cext/__init__.py +2 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
- numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
- numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
- numba_cuda/numba/cuda/cext/_helperlib.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
- numba_cuda/numba/cuda/cext/_typeof.h +19 -0
- numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
- numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
- numba_cuda/numba/cuda/cext/mviewbuf.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
- numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
- numba_cuda/numba/cuda/cg.py +67 -0
- numba_cuda/numba/cuda/cgutils.py +1294 -0
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +541 -0
- numba_cuda/numba/cuda/compiler.py +1396 -0
- numba_cuda/numba/cuda/core/analysis.py +758 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
- numba_cuda/numba/cuda/core/base.py +1332 -0
- numba_cuda/numba/cuda/core/boxing.py +1411 -0
- numba_cuda/numba/cuda/core/bytecode.py +728 -0
- numba_cuda/numba/cuda/core/byteflow.py +2346 -0
- numba_cuda/numba/cuda/core/caching.py +744 -0
- numba_cuda/numba/cuda/core/callconv.py +392 -0
- numba_cuda/numba/cuda/core/codegen.py +171 -0
- numba_cuda/numba/cuda/core/compiler.py +199 -0
- numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +650 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/controlflow.py +989 -0
- numba_cuda/numba/cuda/core/entrypoints.py +57 -0
- numba_cuda/numba/cuda/core/environment.py +66 -0
- numba_cuda/numba/cuda/core/errors.py +917 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/generators.py +387 -0
- numba_cuda/numba/cuda/core/imputils.py +509 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
- numba_cuda/numba/cuda/core/interpreter.py +3617 -0
- numba_cuda/numba/cuda/core/ir.py +1812 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
- numba_cuda/numba/cuda/core/optional.py +129 -0
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
- numba_cuda/numba/cuda/core/registry.py +46 -0
- numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
- numba_cuda/numba/cuda/core/sigutils.py +68 -0
- numba_cuda/numba/cuda/core/ssa.py +498 -0
- numba_cuda/numba/cuda/core/targetconfig.py +330 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +956 -0
- numba_cuda/numba/cuda/core/typed_passes.py +867 -0
- numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
- numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
- numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
- numba_cuda/numba/cuda/cpython/iterators.py +167 -0
- numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
- numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
- numba_cuda/numba/cuda/cpython/slicing.py +322 -0
- numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
- numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
- numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
- numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
- numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
- numba_cuda/numba/cuda/cuda_paths.py +691 -0
- numba_cuda/numba/cuda/cudadecl.py +543 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
- numba_cuda/numba/cuda/cudadrv/error.py +48 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
- numba_cuda/numba/cuda/cudaimpl.py +983 -0
- numba_cuda/numba/cuda/cudamath.py +149 -0
- numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
- numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
- numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
- numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
- numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
- numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
- numba_cuda/numba/cuda/datamodel/manager.py +11 -0
- numba_cuda/numba/cuda/datamodel/models.py +9 -0
- numba_cuda/numba/cuda/datamodel/packer.py +9 -0
- numba_cuda/numba/cuda/datamodel/registry.py +11 -0
- numba_cuda/numba/cuda/datamodel/testing.py +11 -0
- numba_cuda/numba/cuda/debuginfo.py +997 -0
- numba_cuda/numba/cuda/decorators.py +294 -0
- numba_cuda/numba/cuda/descriptor.py +35 -0
- numba_cuda/numba/cuda/device_init.py +155 -0
- numba_cuda/numba/cuda/deviceufunc.py +1021 -0
- numba_cuda/numba/cuda/dispatcher.py +2463 -0
- numba_cuda/numba/cuda/errors.py +72 -0
- numba_cuda/numba/cuda/extending.py +697 -0
- numba_cuda/numba/cuda/flags.py +178 -0
- numba_cuda/numba/cuda/fp16.py +357 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +24 -0
- numba_cuda/numba/cuda/intrinsics.py +531 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +265 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3386 -0
- numba_cuda/numba/cuda/libdevicedecl.py +20 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
- numba_cuda/numba/cuda/locks.py +19 -0
- numba_cuda/numba/cuda/lowering.py +1980 -0
- numba_cuda/numba/cuda/mathimpl.py +374 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
- numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
- numba_cuda/numba/cuda/misc/appdirs.py +594 -0
- numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
- numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
- numba_cuda/numba/cuda/misc/dump_style.py +41 -0
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
- numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
- numba_cuda/numba/cuda/misc/literal.py +28 -0
- numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
- numba_cuda/numba/cuda/misc/special.py +94 -0
- numba_cuda/numba/cuda/models.py +56 -0
- numba_cuda/numba/cuda/np/arraymath.py +5130 -0
- numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
- numba_cuda/numba/cuda/np/extensions.py +11 -0
- numba_cuda/numba/cuda/np/linalg.py +3087 -0
- numba_cuda/numba/cuda/np/math/__init__.py +0 -0
- numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
- numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
- numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
- numba_cuda/numba/cuda/np/npdatetime.py +969 -0
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
- numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
- numba_cuda/numba/cuda/np/numpy_support.py +798 -0
- numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
- numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
- numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
- numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
- numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
- numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
- numba_cuda/numba/cuda/nvvmutils.py +254 -0
- numba_cuda/numba/cuda/printimpl.py +126 -0
- numba_cuda/numba/cuda/random.py +308 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
- numba_cuda/numba/cuda/serialize.py +267 -0
- numba_cuda/numba/cuda/simulator/__init__.py +63 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +179 -0
- numba_cuda/numba/cuda/simulator/bf16.py +4 -0
- numba_cuda/numba/cuda/simulator/compiler.py +38 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
- numba_cuda/numba/cuda/simulator/kernel.py +320 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
- numba_cuda/numba/cuda/simulator/reduction.py +19 -0
- numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
- numba_cuda/numba/cuda/simulator_init.py +18 -0
- numba_cuda/numba/cuda/stubs.py +624 -0
- numba_cuda/numba/cuda/target.py +505 -0
- numba_cuda/numba/cuda/testing.py +347 -0
- numba_cuda/numba/cuda/tests/__init__.py +62 -0
- numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
- numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
- numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
- numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
- numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
- numba_cuda/numba/cuda/tests/data/error.cu +12 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
- numba_cuda/numba/cuda/tests/support.py +900 -0
- numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
- numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
- numba_cuda/numba/cuda/typeconv/rules.py +63 -0
- numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
- numba_cuda/numba/cuda/types/__init__.py +233 -0
- numba_cuda/numba/cuda/types/__init__.pyi +167 -0
- numba_cuda/numba/cuda/types/abstract.py +9 -0
- numba_cuda/numba/cuda/types/common.py +9 -0
- numba_cuda/numba/cuda/types/containers.py +9 -0
- numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
- numba_cuda/numba/cuda/types/cuda_common.py +110 -0
- numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
- numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
- numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
- numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
- numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
- numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
- numba_cuda/numba/cuda/types/ext_types.py +101 -0
- numba_cuda/numba/cuda/types/function_type.py +11 -0
- numba_cuda/numba/cuda/types/functions.py +9 -0
- numba_cuda/numba/cuda/types/iterators.py +9 -0
- numba_cuda/numba/cuda/types/misc.py +9 -0
- numba_cuda/numba/cuda/types/npytypes.py +9 -0
- numba_cuda/numba/cuda/types/scalars.py +9 -0
- numba_cuda/numba/cuda/typing/__init__.py +19 -0
- numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
- numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
- numba_cuda/numba/cuda/typing/bufproto.py +70 -0
- numba_cuda/numba/cuda/typing/builtins.py +1209 -0
- numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
- numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
- numba_cuda/numba/cuda/typing/collections.py +138 -0
- numba_cuda/numba/cuda/typing/context.py +782 -0
- numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
- numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/listdecl.py +147 -0
- numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
- numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
- numba_cuda/numba/cuda/typing/npydecl.py +749 -0
- numba_cuda/numba/cuda/typing/setdecl.py +115 -0
- numba_cuda/numba/cuda/typing/templates.py +1446 -0
- numba_cuda/numba/cuda/typing/typeof.py +301 -0
- numba_cuda/numba/cuda/ufuncs.py +746 -0
- numba_cuda/numba/cuda/utils.py +724 -0
- numba_cuda/numba/cuda/vector_types.py +214 -0
- numba_cuda/numba/cuda/vectorizers.py +260 -0
- numba_cuda-0.22.0.dist-info/METADATA +109 -0
- numba_cuda-0.22.0.dist-info/RECORD +487 -0
- numba_cuda-0.22.0.dist-info/WHEEL +6 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
import platform
|
|
6
|
+
import shutil
|
|
7
|
+
import pytest
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from numba.cuda.utils import PYVERSION
|
|
10
|
+
from numba.cuda.cuda_paths import get_conda_ctk_libdir
|
|
11
|
+
from numba.cuda.cudadrv import driver, devices, libs
|
|
12
|
+
from numba.cuda.dispatcher import CUDADispatcher
|
|
13
|
+
from numba.cuda import config
|
|
14
|
+
from numba.cuda.tests.support import TestCase
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from typing import Iterable, Union
|
|
18
|
+
from io import StringIO
|
|
19
|
+
import unittest
|
|
20
|
+
import numpy as np
|
|
21
|
+
from numba.cuda import HAS_NUMBA
|
|
22
|
+
|
|
23
|
+
if PYVERSION >= (3, 10):
|
|
24
|
+
from filecheck.matcher import Matcher
|
|
25
|
+
from filecheck.options import Options
|
|
26
|
+
from filecheck.parser import Parser, pattern_for_opts
|
|
27
|
+
from filecheck.finput import FInput
|
|
28
|
+
|
|
29
|
+
numba_cuda_dir = Path(__file__).parent
|
|
30
|
+
test_data_dir = numba_cuda_dir / "tests" / "data"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@pytest.mark.usefixtures("initialize_from_pytest_config")
|
|
34
|
+
class CUDATestCase(TestCase):
|
|
35
|
+
"""
|
|
36
|
+
For tests that use a CUDA device.
|
|
37
|
+
|
|
38
|
+
Methods assertFileCheckAsm and assertFileCheckLLVM will inspect a
|
|
39
|
+
CUDADispatcher and assert that the compilation artifacts match the
|
|
40
|
+
FileCheck checks given in the kernel's docstring.
|
|
41
|
+
|
|
42
|
+
Method assertFileCheckMatches can be used to assert that a given string
|
|
43
|
+
matches FileCheck checks, and is not specific to CUDADispatcher.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
FLOAT16_RTOL = np.finfo(np.float16).eps
|
|
47
|
+
|
|
48
|
+
def setUp(self):
|
|
49
|
+
self._low_occupancy_warnings = config.CUDA_LOW_OCCUPANCY_WARNINGS
|
|
50
|
+
self._warn_on_implicit_copy = config.CUDA_WARN_ON_IMPLICIT_COPY
|
|
51
|
+
|
|
52
|
+
# Disable warnings about low gpu utilization in the test suite
|
|
53
|
+
config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
|
|
54
|
+
# Disable warnings about host arrays in the test suite
|
|
55
|
+
config.CUDA_WARN_ON_IMPLICIT_COPY = 0
|
|
56
|
+
|
|
57
|
+
def tearDown(self):
|
|
58
|
+
config.CUDA_LOW_OCCUPANCY_WARNINGS = self._low_occupancy_warnings
|
|
59
|
+
config.CUDA_WARN_ON_IMPLICIT_COPY = self._warn_on_implicit_copy
|
|
60
|
+
|
|
61
|
+
Signature = Union[tuple[type, ...], None]
|
|
62
|
+
|
|
63
|
+
def _getIRContents(
|
|
64
|
+
self,
|
|
65
|
+
ir_result: Union[dict[Signature, str], str],
|
|
66
|
+
signature: Union[Signature, None] = None,
|
|
67
|
+
) -> Iterable[str]:
|
|
68
|
+
if isinstance(ir_result, str):
|
|
69
|
+
assert signature is None, (
|
|
70
|
+
"Cannot use signature because the kernel was only compiled for one signature"
|
|
71
|
+
)
|
|
72
|
+
return [ir_result]
|
|
73
|
+
|
|
74
|
+
if signature is None:
|
|
75
|
+
return list(ir_result.values())
|
|
76
|
+
|
|
77
|
+
return [ir_result[signature]]
|
|
78
|
+
|
|
79
|
+
def assertFileCheckAsm(
|
|
80
|
+
self,
|
|
81
|
+
ir_producer: CUDADispatcher,
|
|
82
|
+
signature: Union[tuple[type, ...], None] = None,
|
|
83
|
+
check_prefixes: tuple[str] = ("ASM",),
|
|
84
|
+
**extra_filecheck_options,
|
|
85
|
+
) -> None:
|
|
86
|
+
"""
|
|
87
|
+
Assert that the assembly output of the given CUDADispatcher matches
|
|
88
|
+
the FileCheck checks given in the kernel's docstring.
|
|
89
|
+
"""
|
|
90
|
+
ir_contents = self._getIRContents(ir_producer.inspect_asm(), signature)
|
|
91
|
+
assert ir_contents, "No assembly output found for the given signature."
|
|
92
|
+
assert ir_producer.__doc__ is not None, (
|
|
93
|
+
"Kernel docstring is required. To pass checks explicitly, use assertFileCheckMatches."
|
|
94
|
+
)
|
|
95
|
+
check_patterns = ir_producer.__doc__
|
|
96
|
+
for ir_content in ir_contents:
|
|
97
|
+
self.assertFileCheckMatches(
|
|
98
|
+
ir_content,
|
|
99
|
+
check_patterns=check_patterns,
|
|
100
|
+
check_prefixes=check_prefixes,
|
|
101
|
+
**extra_filecheck_options,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
def assertFileCheckLLVM(
|
|
105
|
+
self,
|
|
106
|
+
ir_producer: CUDADispatcher,
|
|
107
|
+
signature: Union[tuple[type, ...], None] = None,
|
|
108
|
+
check_prefixes: tuple[str] = ("LLVM",),
|
|
109
|
+
**extra_filecheck_options,
|
|
110
|
+
) -> None:
|
|
111
|
+
"""
|
|
112
|
+
Assert that the LLVM IR output of the given CUDADispatcher matches
|
|
113
|
+
the FileCheck checks given in the kernel's docstring.
|
|
114
|
+
"""
|
|
115
|
+
ir_contents = self._getIRContents(ir_producer.inspect_llvm(), signature)
|
|
116
|
+
assert ir_contents, "No LLVM IR output found for the given signature."
|
|
117
|
+
assert ir_producer.__doc__ is not None, (
|
|
118
|
+
"Kernel docstring is required. To pass checks explicitly, use assertFileCheckMatches."
|
|
119
|
+
)
|
|
120
|
+
check_patterns = ir_producer.__doc__
|
|
121
|
+
for ir_content in ir_contents:
|
|
122
|
+
assert ir_content, (
|
|
123
|
+
"LLVM IR content is empty for the given signature."
|
|
124
|
+
)
|
|
125
|
+
self.assertFileCheckMatches(
|
|
126
|
+
ir_content,
|
|
127
|
+
check_patterns=check_patterns,
|
|
128
|
+
check_prefixes=check_prefixes,
|
|
129
|
+
**extra_filecheck_options,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
def assertFileCheckMatches(
|
|
133
|
+
self,
|
|
134
|
+
ir_content: str,
|
|
135
|
+
check_patterns: str,
|
|
136
|
+
check_prefixes: tuple[str] = ("CHECK",),
|
|
137
|
+
**extra_filecheck_options,
|
|
138
|
+
) -> None:
|
|
139
|
+
"""
|
|
140
|
+
Assert that the given string matches the passed FileCheck checks.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
ir_content: The string to check against.
|
|
144
|
+
check_patterns: The FileCheck checks to use.
|
|
145
|
+
check_prefixes: The prefixes to use for the FileCheck checks.
|
|
146
|
+
extra_filecheck_options: Extra options to pass to FileCheck.
|
|
147
|
+
"""
|
|
148
|
+
if PYVERSION < (3, 10):
|
|
149
|
+
self.skipTest("FileCheck requires Python 3.10 or later")
|
|
150
|
+
opts = Options(
|
|
151
|
+
match_filename="-",
|
|
152
|
+
check_prefixes=list(check_prefixes),
|
|
153
|
+
**extra_filecheck_options,
|
|
154
|
+
)
|
|
155
|
+
input_file = FInput(fname="-", content=ir_content)
|
|
156
|
+
parser = Parser(opts, StringIO(check_patterns), *pattern_for_opts(opts))
|
|
157
|
+
matcher = Matcher(opts, input_file, parser)
|
|
158
|
+
matcher.stderr = StringIO()
|
|
159
|
+
result = matcher.run()
|
|
160
|
+
if result != 0:
|
|
161
|
+
if self._dump_failed_filechecks:
|
|
162
|
+
dump_directory = Path(
|
|
163
|
+
datetime.now().strftime("numba-ir-%Y_%m_%d_%H_%M_%S")
|
|
164
|
+
)
|
|
165
|
+
if not dump_directory.exists():
|
|
166
|
+
dump_directory.mkdir(parents=True, exist_ok=True)
|
|
167
|
+
base_path = self.id().replace(".", "_")
|
|
168
|
+
ir_dump = dump_directory / Path(base_path).with_suffix(".ll")
|
|
169
|
+
checks_dump = dump_directory / Path(base_path).with_suffix(
|
|
170
|
+
".checks"
|
|
171
|
+
)
|
|
172
|
+
with (
|
|
173
|
+
open(ir_dump, "w") as ir_file,
|
|
174
|
+
open(checks_dump, "w") as checks_file,
|
|
175
|
+
):
|
|
176
|
+
_ = ir_file.write(ir_content + "\n")
|
|
177
|
+
_ = checks_file.write(check_patterns)
|
|
178
|
+
dump_instructions = f"Reproduce with:\n\nfilecheck --check-prefixes={','.join(check_prefixes)} {checks_dump} --input-file {ir_dump}"
|
|
179
|
+
else:
|
|
180
|
+
dump_instructions = "Rerun with --dump-failed-filechecks to generate a reproducer."
|
|
181
|
+
|
|
182
|
+
self.fail(
|
|
183
|
+
f"FileCheck failed:\n{matcher.stderr.getvalue()}\n\n"
|
|
184
|
+
+ dump_instructions
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def skip_on_cudasim(reason):
|
|
189
|
+
"""Skip this test if running on the CUDA simulator"""
|
|
190
|
+
assert isinstance(reason, str)
|
|
191
|
+
return unittest.skipIf(config.ENABLE_CUDASIM, reason)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
skip_on_standalone_numba_cuda = unittest.skipUnless(
|
|
195
|
+
HAS_NUMBA, "requires base numba install"
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def skip_unless_cudasim(reason):
|
|
200
|
+
"""Skip this test if running on CUDA hardware"""
|
|
201
|
+
assert isinstance(reason, str)
|
|
202
|
+
return unittest.skipUnless(config.ENABLE_CUDASIM, reason)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def skip_unless_conda_cudatoolkit(reason):
|
|
206
|
+
"""Skip test if the CUDA toolkit was not installed by Conda"""
|
|
207
|
+
assert isinstance(reason, str)
|
|
208
|
+
return unittest.skipUnless(get_conda_ctk_libdir() is not None, reason)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def skip_if_external_memmgr(reason):
|
|
212
|
+
"""Skip test if an EMM Plugin is in use"""
|
|
213
|
+
assert isinstance(reason, str)
|
|
214
|
+
return unittest.skipIf(config.CUDA_MEMORY_MANAGER != "default", reason)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def skip_under_cuda_memcheck(reason):
|
|
218
|
+
assert isinstance(reason, str)
|
|
219
|
+
return unittest.skipIf(os.environ.get("CUDA_MEMCHECK") is not None, reason)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def skip_without_nvdisasm(reason):
|
|
223
|
+
assert isinstance(reason, str)
|
|
224
|
+
nvdisasm_path = shutil.which("nvdisasm")
|
|
225
|
+
return unittest.skipIf(nvdisasm_path is None, reason)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def skip_with_nvdisasm(reason):
|
|
229
|
+
assert isinstance(reason, str)
|
|
230
|
+
nvdisasm_path = shutil.which("nvdisasm")
|
|
231
|
+
return unittest.skipIf(nvdisasm_path is not None, reason)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def skip_on_arm(reason):
|
|
235
|
+
assert isinstance(reason, str)
|
|
236
|
+
cpu = platform.processor()
|
|
237
|
+
is_arm = cpu.startswith("arm") or cpu.startswith("aarch")
|
|
238
|
+
return unittest.skipIf(is_arm, reason)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def skip_on_wsl2(reason):
|
|
242
|
+
"""Skip test when running under WSL2.
|
|
243
|
+
|
|
244
|
+
Detection is based on the kernel release string, which typically contains
|
|
245
|
+
"microsoft-standard-WSL2" on WSL2 systems.
|
|
246
|
+
"""
|
|
247
|
+
assert isinstance(reason, str)
|
|
248
|
+
rel = platform.release().lower()
|
|
249
|
+
is_wsl2 = ("microsoft-standard-wsl2" in rel) or ("wsl2" in rel)
|
|
250
|
+
return unittest.skipIf(is_wsl2, reason)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def skip_if_cuda_includes_missing(fn):
|
|
254
|
+
# Skip when cuda.h is not available - generally this should indicate
|
|
255
|
+
# whether the CUDA includes are available or not
|
|
256
|
+
reason = "CUDA include dir not available on this system"
|
|
257
|
+
try:
|
|
258
|
+
cuda_include_path = libs.get_cuda_include_dir()
|
|
259
|
+
except FileNotFoundError:
|
|
260
|
+
return unittest.skip(reason)(fn)
|
|
261
|
+
cuda_h = os.path.join(cuda_include_path, "cuda.h")
|
|
262
|
+
cuda_h_file = os.path.exists(cuda_h) and os.path.isfile(cuda_h)
|
|
263
|
+
return unittest.skipUnless(cuda_h_file, reason)(fn)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def skip_if_curand_kernel_missing(fn):
|
|
267
|
+
reason = "curand_kernel.h not available on this system"
|
|
268
|
+
try:
|
|
269
|
+
cuda_include_path = libs.get_cuda_include_dir()
|
|
270
|
+
except FileNotFoundError:
|
|
271
|
+
return unittest.skip(reason)(fn)
|
|
272
|
+
curand_kernel_h = os.path.join(cuda_include_path, "curand_kernel.h")
|
|
273
|
+
curand_kernel_h_file = os.path.exists(curand_kernel_h) and os.path.isfile(
|
|
274
|
+
curand_kernel_h
|
|
275
|
+
)
|
|
276
|
+
return unittest.skipUnless(curand_kernel_h_file, reason)(fn)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def skip_if_mvc_enabled(reason):
|
|
280
|
+
"""Skip a test if Minor Version Compatibility is enabled"""
|
|
281
|
+
assert isinstance(reason, str)
|
|
282
|
+
return unittest.skipIf(
|
|
283
|
+
config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY, reason
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def cc_X_or_above(major, minor):
|
|
288
|
+
if not config.ENABLE_CUDASIM:
|
|
289
|
+
cc = devices.get_context().device.compute_capability
|
|
290
|
+
return cc >= (major, minor)
|
|
291
|
+
else:
|
|
292
|
+
return True
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def skip_unless_cc_50(fn):
|
|
296
|
+
return unittest.skipUnless(cc_X_or_above(5, 0), "requires cc >= 5.0")(fn)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def skip_unless_cc_53(fn):
|
|
300
|
+
return unittest.skipUnless(cc_X_or_above(5, 3), "requires cc >= 5.3")(fn)
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def skip_unless_cc_60(fn):
|
|
304
|
+
return unittest.skipUnless(cc_X_or_above(6, 0), "requires cc >= 6.0")(fn)
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def skip_unless_cc_75(fn):
|
|
308
|
+
return unittest.skipUnless(cc_X_or_above(7, 5), "requires cc >= 7.5")(fn)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def xfail_unless_cudasim(fn):
|
|
312
|
+
if config.ENABLE_CUDASIM:
|
|
313
|
+
return fn
|
|
314
|
+
else:
|
|
315
|
+
return unittest.expectedFailure(fn)
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def cudadevrt_missing():
|
|
319
|
+
if config.ENABLE_CUDASIM:
|
|
320
|
+
return False
|
|
321
|
+
try:
|
|
322
|
+
path = libs.get_cudalib("cudadevrt", static=True)
|
|
323
|
+
libs.check_static_lib(path)
|
|
324
|
+
except FileNotFoundError:
|
|
325
|
+
return True
|
|
326
|
+
return False
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def skip_if_cudadevrt_missing(fn):
|
|
330
|
+
return unittest.skipIf(cudadevrt_missing(), "cudadevrt missing")(fn)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def skip_if_nvjitlink_missing(reason):
|
|
334
|
+
assert isinstance(reason, str)
|
|
335
|
+
return unittest.skipIf(not driver._have_nvjitlink(), reason)
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
class ForeignArray(object):
|
|
339
|
+
"""
|
|
340
|
+
Class for emulating an array coming from another library through the CUDA
|
|
341
|
+
Array interface. This just hides a DeviceNDArray so that it doesn't look
|
|
342
|
+
like a DeviceNDArray.
|
|
343
|
+
"""
|
|
344
|
+
|
|
345
|
+
def __init__(self, arr):
|
|
346
|
+
self._arr = arr
|
|
347
|
+
self.__cuda_array_interface__ = arr.__cuda_array_interface__
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from fnmatch import fnmatch
|
|
5
|
+
import unittest
|
|
6
|
+
from numba import cuda
|
|
7
|
+
from os.path import dirname, isfile, join, normpath, relpath, splitext
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
import sys
|
|
11
|
+
import traceback
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# Copied and modified from numba/testing/__init__.py, to handle the difference
|
|
15
|
+
# between the top dirs for Numba and the CUDA target
|
|
16
|
+
def load_testsuite(loader, dir):
|
|
17
|
+
"""Find tests in 'dir'."""
|
|
18
|
+
top_level_dir = dirname(dirname(dirname(dirname(__file__))))
|
|
19
|
+
try:
|
|
20
|
+
suite = unittest.TestSuite()
|
|
21
|
+
files = []
|
|
22
|
+
for f in os.listdir(dir):
|
|
23
|
+
path = join(dir, f)
|
|
24
|
+
if isfile(path) and fnmatch(f, "test_*.py"):
|
|
25
|
+
files.append(f)
|
|
26
|
+
elif isfile(join(path, "__init__.py")):
|
|
27
|
+
suite.addTests(
|
|
28
|
+
loader.discover(path, top_level_dir=top_level_dir)
|
|
29
|
+
)
|
|
30
|
+
for f in files:
|
|
31
|
+
# turn 'f' into a filename relative to the toplevel dir and
|
|
32
|
+
# translate it to a module name. This differs from the
|
|
33
|
+
# implementation in Numba, because the toplevel dir is the
|
|
34
|
+
# numba_cuda module location, not the numba one.
|
|
35
|
+
f = relpath(join(dir, f), top_level_dir)
|
|
36
|
+
f = splitext(normpath(f.replace(os.path.sep, ".")))[0]
|
|
37
|
+
suite.addTests(loader.loadTestsFromName(f))
|
|
38
|
+
return suite
|
|
39
|
+
except Exception:
|
|
40
|
+
traceback.print_exc(file=sys.stderr)
|
|
41
|
+
sys.exit(-1)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def load_tests(loader, tests, pattern):
|
|
45
|
+
suite = unittest.TestSuite()
|
|
46
|
+
this_dir = dirname(__file__)
|
|
47
|
+
suite.addTests(load_testsuite(loader, join(this_dir, "nocuda")))
|
|
48
|
+
if cuda.is_available():
|
|
49
|
+
suite.addTests(load_testsuite(loader, join(this_dir, "cudasim")))
|
|
50
|
+
gpus = cuda.list_devices()
|
|
51
|
+
if gpus and gpus[0].compute_capability >= (2, 0):
|
|
52
|
+
suite.addTests(load_testsuite(loader, join(this_dir, "cudadrv")))
|
|
53
|
+
suite.addTests(load_testsuite(loader, join(this_dir, "cudapy")))
|
|
54
|
+
suite.addTests(load_testsuite(loader, join(this_dir, "nrt")))
|
|
55
|
+
suite.addTests(
|
|
56
|
+
load_testsuite(loader, join(this_dir, "doc_examples"))
|
|
57
|
+
)
|
|
58
|
+
else:
|
|
59
|
+
print("skipped CUDA tests because GPU CC < 2.0")
|
|
60
|
+
else:
|
|
61
|
+
print("skipped CUDA tests")
|
|
62
|
+
return suite
|
|
File without changes
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import string
|
|
5
|
+
from numba import cuda
|
|
6
|
+
from numba.cuda.core import config
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pytest
|
|
9
|
+
from pytest import param
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
pytestmark = pytest.mark.skipif(
|
|
13
|
+
condition=config.ENABLE_CUDASIM,
|
|
14
|
+
reason="no reason to run benchmarks in the simulator",
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@pytest.mark.parametrize(
|
|
19
|
+
"array_func",
|
|
20
|
+
[
|
|
21
|
+
param(
|
|
22
|
+
lambda: cuda.device_array(128, dtype=np.float32),
|
|
23
|
+
id="device_array",
|
|
24
|
+
),
|
|
25
|
+
param(
|
|
26
|
+
lambda: pytest.importorskip("torch").empty(
|
|
27
|
+
(128,),
|
|
28
|
+
dtype=pytest.importorskip("torch").float32,
|
|
29
|
+
device="cuda:0",
|
|
30
|
+
),
|
|
31
|
+
id="torch",
|
|
32
|
+
),
|
|
33
|
+
param(
|
|
34
|
+
lambda: pytest.importorskip("cupy").empty(128, dtype=np.float32),
|
|
35
|
+
id="cupy",
|
|
36
|
+
),
|
|
37
|
+
],
|
|
38
|
+
)
|
|
39
|
+
def test_one_arg(benchmark, array_func):
|
|
40
|
+
@cuda.jit("void(float32[:])")
|
|
41
|
+
def one_arg(arr1):
|
|
42
|
+
return
|
|
43
|
+
|
|
44
|
+
def bench(func, arr):
|
|
45
|
+
for _ in range(100):
|
|
46
|
+
func(arr)
|
|
47
|
+
|
|
48
|
+
benchmark(bench, one_arg[128, 128], array_func())
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@pytest.mark.parametrize(
|
|
52
|
+
"array_func",
|
|
53
|
+
[
|
|
54
|
+
param(
|
|
55
|
+
lambda: [
|
|
56
|
+
cuda.device_array(128, dtype=np.float32)
|
|
57
|
+
for _ in range(len(string.ascii_lowercase))
|
|
58
|
+
],
|
|
59
|
+
id="device_array",
|
|
60
|
+
),
|
|
61
|
+
param(
|
|
62
|
+
lambda: [
|
|
63
|
+
pytest.importorskip("torch").empty(
|
|
64
|
+
(128,),
|
|
65
|
+
dtype=pytest.importorskip("torch").float32,
|
|
66
|
+
device="cuda:0",
|
|
67
|
+
)
|
|
68
|
+
for _ in range(len(string.ascii_lowercase))
|
|
69
|
+
],
|
|
70
|
+
id="torch",
|
|
71
|
+
),
|
|
72
|
+
param(
|
|
73
|
+
lambda: [
|
|
74
|
+
pytest.importorskip("cupy").empty(128, dtype=np.float32)
|
|
75
|
+
for _ in range(len(string.ascii_lowercase))
|
|
76
|
+
],
|
|
77
|
+
id="cupy",
|
|
78
|
+
),
|
|
79
|
+
],
|
|
80
|
+
)
|
|
81
|
+
def test_many_args(benchmark, array_func):
|
|
82
|
+
many_arrs = array_func()
|
|
83
|
+
|
|
84
|
+
@cuda.jit("void({})".format(", ".join(["float32[:]"] * len(many_arrs))))
|
|
85
|
+
def many_args(
|
|
86
|
+
a,
|
|
87
|
+
b,
|
|
88
|
+
c,
|
|
89
|
+
d,
|
|
90
|
+
e,
|
|
91
|
+
f,
|
|
92
|
+
g,
|
|
93
|
+
h,
|
|
94
|
+
i,
|
|
95
|
+
j,
|
|
96
|
+
k,
|
|
97
|
+
l,
|
|
98
|
+
m,
|
|
99
|
+
n,
|
|
100
|
+
o,
|
|
101
|
+
p,
|
|
102
|
+
q,
|
|
103
|
+
r,
|
|
104
|
+
s,
|
|
105
|
+
t,
|
|
106
|
+
u,
|
|
107
|
+
v,
|
|
108
|
+
w,
|
|
109
|
+
x,
|
|
110
|
+
y,
|
|
111
|
+
z,
|
|
112
|
+
):
|
|
113
|
+
return
|
|
114
|
+
|
|
115
|
+
def bench(func, *arrs):
|
|
116
|
+
for _ in range(100):
|
|
117
|
+
func(*arrs)
|
|
118
|
+
|
|
119
|
+
benchmark(bench, many_args[128, 128], *many_arrs)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
# Expected to run this module as __main__
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# Cloudpickle will think this is a dynamic class when this module is __main__
|
|
8
|
+
class Klass:
|
|
9
|
+
classvar = None
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Separate module with function samples for serialization tests,
|
|
6
|
+
to avoid issues with __main__.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import math
|
|
10
|
+
from math import sqrt
|
|
11
|
+
import numpy as np
|
|
12
|
+
import numpy.random as nprand
|
|
13
|
+
|
|
14
|
+
# This does not need a guard, it's already guarded at the import site
|
|
15
|
+
from numba import jit
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@jit("int32(int32, int32)")
|
|
19
|
+
def add_with_sig(a, b):
|
|
20
|
+
return a + b
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@jit
|
|
24
|
+
def add_without_sig(a, b):
|
|
25
|
+
return a + b
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@jit(nopython=True)
|
|
29
|
+
def add_nopython(a, b):
|
|
30
|
+
return a + b
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@jit(nopython=True)
|
|
34
|
+
def add_nopython_fail(a, b):
|
|
35
|
+
object()
|
|
36
|
+
return a + b
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def closure(a):
|
|
40
|
+
@jit(nopython=True)
|
|
41
|
+
def inner(b, c):
|
|
42
|
+
return a + b + c
|
|
43
|
+
|
|
44
|
+
return inner
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
K = 3.0
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def closure_with_globals(x, **jit_args):
|
|
51
|
+
@jit(**jit_args)
|
|
52
|
+
def inner(y):
|
|
53
|
+
# Exercise a builtin function and a module-level constant
|
|
54
|
+
k = max(K, K + 1)
|
|
55
|
+
# Exercise two functions from another module, one accessed with
|
|
56
|
+
# dotted notation, one imported explicitly.
|
|
57
|
+
return math.hypot(x, y) + sqrt(k)
|
|
58
|
+
|
|
59
|
+
return inner
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@jit(nopython=True)
|
|
63
|
+
def other_function(x, y):
|
|
64
|
+
return math.hypot(x, y)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@jit(forceobj=True)
|
|
68
|
+
def get_global_objmode(x):
|
|
69
|
+
return K * x
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@jit(nopython=True)
|
|
73
|
+
def get_renamed_module(x):
|
|
74
|
+
nprand.seed(42)
|
|
75
|
+
return np.cos(x), nprand.random()
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def closure_calling_other_function(x):
|
|
79
|
+
@jit(nopython=True)
|
|
80
|
+
def inner(y, z):
|
|
81
|
+
return other_function(x, y) + z
|
|
82
|
+
|
|
83
|
+
return inner
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def closure_calling_other_closure(x):
|
|
87
|
+
@jit(nopython=True)
|
|
88
|
+
def other_inner(y):
|
|
89
|
+
return math.hypot(x, y)
|
|
90
|
+
|
|
91
|
+
@jit(nopython=True)
|
|
92
|
+
def inner(y):
|
|
93
|
+
return other_inner(y) + x
|
|
94
|
+
|
|
95
|
+
return inner
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# A dynamic function calling a builtin function
|
|
99
|
+
def _get_dyn_func(**jit_args):
|
|
100
|
+
code = """
|
|
101
|
+
def dyn_func(x):
|
|
102
|
+
res = 0
|
|
103
|
+
for i in range(x):
|
|
104
|
+
res += x
|
|
105
|
+
return res
|
|
106
|
+
"""
|
|
107
|
+
ns = {}
|
|
108
|
+
exec(code.strip(), ns)
|
|
109
|
+
return jit(**jit_args)(ns["dyn_func"])
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
dyn_func = _get_dyn_func(nopython=True)
|
|
113
|
+
dyn_func_objmode = _get_dyn_func(forceobj=True)
|