numba-cuda 0.22.0__cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +4 -0
- _numba_cuda_redirector.py +89 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +6 -0
- numba_cuda/_version.py +11 -0
- numba_cuda/numba/cuda/__init__.py +70 -0
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
- numba_cuda/numba/cuda/api.py +580 -0
- numba_cuda/numba/cuda/api_util.py +76 -0
- numba_cuda/numba/cuda/args.py +72 -0
- numba_cuda/numba/cuda/bf16.py +397 -0
- numba_cuda/numba/cuda/cache_hints.py +287 -0
- numba_cuda/numba/cuda/cext/__init__.py +2 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
- numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
- numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
- numba_cuda/numba/cuda/cext/_helperlib.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
- numba_cuda/numba/cuda/cext/_typeof.h +19 -0
- numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
- numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
- numba_cuda/numba/cuda/cext/mviewbuf.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
- numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
- numba_cuda/numba/cuda/cg.py +67 -0
- numba_cuda/numba/cuda/cgutils.py +1294 -0
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +541 -0
- numba_cuda/numba/cuda/compiler.py +1396 -0
- numba_cuda/numba/cuda/core/analysis.py +758 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
- numba_cuda/numba/cuda/core/base.py +1332 -0
- numba_cuda/numba/cuda/core/boxing.py +1411 -0
- numba_cuda/numba/cuda/core/bytecode.py +728 -0
- numba_cuda/numba/cuda/core/byteflow.py +2346 -0
- numba_cuda/numba/cuda/core/caching.py +744 -0
- numba_cuda/numba/cuda/core/callconv.py +392 -0
- numba_cuda/numba/cuda/core/codegen.py +171 -0
- numba_cuda/numba/cuda/core/compiler.py +199 -0
- numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +650 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/controlflow.py +989 -0
- numba_cuda/numba/cuda/core/entrypoints.py +57 -0
- numba_cuda/numba/cuda/core/environment.py +66 -0
- numba_cuda/numba/cuda/core/errors.py +917 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/generators.py +387 -0
- numba_cuda/numba/cuda/core/imputils.py +509 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
- numba_cuda/numba/cuda/core/interpreter.py +3617 -0
- numba_cuda/numba/cuda/core/ir.py +1812 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
- numba_cuda/numba/cuda/core/optional.py +129 -0
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
- numba_cuda/numba/cuda/core/registry.py +46 -0
- numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
- numba_cuda/numba/cuda/core/sigutils.py +68 -0
- numba_cuda/numba/cuda/core/ssa.py +498 -0
- numba_cuda/numba/cuda/core/targetconfig.py +330 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +956 -0
- numba_cuda/numba/cuda/core/typed_passes.py +867 -0
- numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
- numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
- numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
- numba_cuda/numba/cuda/cpython/iterators.py +167 -0
- numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
- numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
- numba_cuda/numba/cuda/cpython/slicing.py +322 -0
- numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
- numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
- numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
- numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
- numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
- numba_cuda/numba/cuda/cuda_paths.py +691 -0
- numba_cuda/numba/cuda/cudadecl.py +543 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
- numba_cuda/numba/cuda/cudadrv/error.py +48 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
- numba_cuda/numba/cuda/cudaimpl.py +983 -0
- numba_cuda/numba/cuda/cudamath.py +149 -0
- numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
- numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
- numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
- numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
- numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
- numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
- numba_cuda/numba/cuda/datamodel/manager.py +11 -0
- numba_cuda/numba/cuda/datamodel/models.py +9 -0
- numba_cuda/numba/cuda/datamodel/packer.py +9 -0
- numba_cuda/numba/cuda/datamodel/registry.py +11 -0
- numba_cuda/numba/cuda/datamodel/testing.py +11 -0
- numba_cuda/numba/cuda/debuginfo.py +997 -0
- numba_cuda/numba/cuda/decorators.py +294 -0
- numba_cuda/numba/cuda/descriptor.py +35 -0
- numba_cuda/numba/cuda/device_init.py +155 -0
- numba_cuda/numba/cuda/deviceufunc.py +1021 -0
- numba_cuda/numba/cuda/dispatcher.py +2463 -0
- numba_cuda/numba/cuda/errors.py +72 -0
- numba_cuda/numba/cuda/extending.py +697 -0
- numba_cuda/numba/cuda/flags.py +178 -0
- numba_cuda/numba/cuda/fp16.py +357 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +24 -0
- numba_cuda/numba/cuda/intrinsics.py +531 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +265 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3386 -0
- numba_cuda/numba/cuda/libdevicedecl.py +20 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
- numba_cuda/numba/cuda/locks.py +19 -0
- numba_cuda/numba/cuda/lowering.py +1980 -0
- numba_cuda/numba/cuda/mathimpl.py +374 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
- numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
- numba_cuda/numba/cuda/misc/appdirs.py +594 -0
- numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
- numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
- numba_cuda/numba/cuda/misc/dump_style.py +41 -0
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
- numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
- numba_cuda/numba/cuda/misc/literal.py +28 -0
- numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
- numba_cuda/numba/cuda/misc/special.py +94 -0
- numba_cuda/numba/cuda/models.py +56 -0
- numba_cuda/numba/cuda/np/arraymath.py +5130 -0
- numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
- numba_cuda/numba/cuda/np/extensions.py +11 -0
- numba_cuda/numba/cuda/np/linalg.py +3087 -0
- numba_cuda/numba/cuda/np/math/__init__.py +0 -0
- numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
- numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
- numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
- numba_cuda/numba/cuda/np/npdatetime.py +969 -0
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
- numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
- numba_cuda/numba/cuda/np/numpy_support.py +798 -0
- numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
- numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
- numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
- numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
- numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
- numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
- numba_cuda/numba/cuda/nvvmutils.py +254 -0
- numba_cuda/numba/cuda/printimpl.py +126 -0
- numba_cuda/numba/cuda/random.py +308 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
- numba_cuda/numba/cuda/serialize.py +267 -0
- numba_cuda/numba/cuda/simulator/__init__.py +63 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +179 -0
- numba_cuda/numba/cuda/simulator/bf16.py +4 -0
- numba_cuda/numba/cuda/simulator/compiler.py +38 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
- numba_cuda/numba/cuda/simulator/kernel.py +320 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
- numba_cuda/numba/cuda/simulator/reduction.py +19 -0
- numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
- numba_cuda/numba/cuda/simulator_init.py +18 -0
- numba_cuda/numba/cuda/stubs.py +624 -0
- numba_cuda/numba/cuda/target.py +505 -0
- numba_cuda/numba/cuda/testing.py +347 -0
- numba_cuda/numba/cuda/tests/__init__.py +62 -0
- numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
- numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
- numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
- numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
- numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
- numba_cuda/numba/cuda/tests/data/error.cu +12 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
- numba_cuda/numba/cuda/tests/support.py +900 -0
- numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
- numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
- numba_cuda/numba/cuda/typeconv/rules.py +63 -0
- numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
- numba_cuda/numba/cuda/types/__init__.py +233 -0
- numba_cuda/numba/cuda/types/__init__.pyi +167 -0
- numba_cuda/numba/cuda/types/abstract.py +9 -0
- numba_cuda/numba/cuda/types/common.py +9 -0
- numba_cuda/numba/cuda/types/containers.py +9 -0
- numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
- numba_cuda/numba/cuda/types/cuda_common.py +110 -0
- numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
- numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
- numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
- numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
- numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
- numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
- numba_cuda/numba/cuda/types/ext_types.py +101 -0
- numba_cuda/numba/cuda/types/function_type.py +11 -0
- numba_cuda/numba/cuda/types/functions.py +9 -0
- numba_cuda/numba/cuda/types/iterators.py +9 -0
- numba_cuda/numba/cuda/types/misc.py +9 -0
- numba_cuda/numba/cuda/types/npytypes.py +9 -0
- numba_cuda/numba/cuda/types/scalars.py +9 -0
- numba_cuda/numba/cuda/typing/__init__.py +19 -0
- numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
- numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
- numba_cuda/numba/cuda/typing/bufproto.py +70 -0
- numba_cuda/numba/cuda/typing/builtins.py +1209 -0
- numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
- numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
- numba_cuda/numba/cuda/typing/collections.py +138 -0
- numba_cuda/numba/cuda/typing/context.py +782 -0
- numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
- numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/listdecl.py +147 -0
- numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
- numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
- numba_cuda/numba/cuda/typing/npydecl.py +749 -0
- numba_cuda/numba/cuda/typing/setdecl.py +115 -0
- numba_cuda/numba/cuda/typing/templates.py +1446 -0
- numba_cuda/numba/cuda/typing/typeof.py +301 -0
- numba_cuda/numba/cuda/ufuncs.py +746 -0
- numba_cuda/numba/cuda/utils.py +724 -0
- numba_cuda/numba/cuda/vector_types.py +214 -0
- numba_cuda/numba/cuda/vectorizers.py +260 -0
- numba_cuda-0.22.0.dist-info/METADATA +109 -0
- numba_cuda-0.22.0.dist-info/RECORD +487 -0
- numba_cuda-0.22.0.dist-info/WHEEL +6 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import threading
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
from numba import cuda
|
|
9
|
+
from numba.cuda.testing import CUDATestCase, skip_unless_cudasim
|
|
10
|
+
import numba.cuda.simulator as simulator
|
|
11
|
+
import unittest
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class TestCudaSimIssues(CUDATestCase):
|
|
15
|
+
def test_record_access(self):
|
|
16
|
+
backyard_type = [
|
|
17
|
+
("statue", np.float64),
|
|
18
|
+
("newspaper", np.float64, (6,)),
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
goose_type = [
|
|
22
|
+
("garden", np.float64, (12,)),
|
|
23
|
+
("town", np.float64, (42,)),
|
|
24
|
+
("backyard", backyard_type),
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
goose_np_type = np.dtype(goose_type, align=True)
|
|
28
|
+
|
|
29
|
+
@cuda.jit
|
|
30
|
+
def simple_kernel(f):
|
|
31
|
+
f.garden[0] = 45.0
|
|
32
|
+
f.backyard.newspaper[3] = 2.0
|
|
33
|
+
f.backyard.newspaper[3] = f.backyard.newspaper[3] + 3.0
|
|
34
|
+
|
|
35
|
+
item = np.recarray(1, dtype=goose_np_type)
|
|
36
|
+
simple_kernel[1, 1](item[0])
|
|
37
|
+
np.testing.assert_equal(item[0]["garden"][0], 45)
|
|
38
|
+
np.testing.assert_equal(item[0]["backyard"]["newspaper"][3], 5)
|
|
39
|
+
|
|
40
|
+
def test_recarray_setting(self):
|
|
41
|
+
recordwith2darray = np.dtype(
|
|
42
|
+
[("i", np.int32), ("j", np.float32, (3, 2))]
|
|
43
|
+
)
|
|
44
|
+
rec = np.recarray(2, dtype=recordwith2darray)
|
|
45
|
+
rec[0]["i"] = 45
|
|
46
|
+
|
|
47
|
+
@cuda.jit
|
|
48
|
+
def simple_kernel(f):
|
|
49
|
+
f[1] = f[0]
|
|
50
|
+
|
|
51
|
+
simple_kernel[1, 1](rec)
|
|
52
|
+
np.testing.assert_equal(rec[0]["i"], rec[1]["i"])
|
|
53
|
+
|
|
54
|
+
def test_cuda_module_in_device_function(self):
|
|
55
|
+
"""
|
|
56
|
+
Discovered in https://github.com/numba/numba/issues/1837.
|
|
57
|
+
When the `cuda` module is referenced in a device function,
|
|
58
|
+
it does not have the kernel API (e.g. cuda.threadIdx, cuda.shared)
|
|
59
|
+
"""
|
|
60
|
+
from numba.cuda.tests.cudasim import support
|
|
61
|
+
|
|
62
|
+
inner = support.cuda_module_in_device_function
|
|
63
|
+
|
|
64
|
+
@cuda.jit
|
|
65
|
+
def outer(out):
|
|
66
|
+
tid = inner()
|
|
67
|
+
if tid < out.size:
|
|
68
|
+
out[tid] = tid
|
|
69
|
+
|
|
70
|
+
arr = np.zeros(10, dtype=np.int32)
|
|
71
|
+
outer[1, 11](arr)
|
|
72
|
+
expected = np.arange(arr.size, dtype=np.int32)
|
|
73
|
+
np.testing.assert_equal(expected, arr)
|
|
74
|
+
|
|
75
|
+
@skip_unless_cudasim("Only works on CUDASIM")
|
|
76
|
+
def test_deadlock_on_exception(self):
|
|
77
|
+
def assert_no_blockthreads():
|
|
78
|
+
blockthreads = []
|
|
79
|
+
for t in threading.enumerate():
|
|
80
|
+
if not isinstance(t, simulator.kernel.BlockThread):
|
|
81
|
+
continue
|
|
82
|
+
|
|
83
|
+
# join blockthreads with a short timeout to allow aborted
|
|
84
|
+
# threads to exit
|
|
85
|
+
t.join(1)
|
|
86
|
+
if t.is_alive():
|
|
87
|
+
self.fail("Blocked kernel thread: %s" % t)
|
|
88
|
+
|
|
89
|
+
self.assertListEqual(blockthreads, [])
|
|
90
|
+
|
|
91
|
+
@simulator.jit
|
|
92
|
+
def assign_with_sync(x, y):
|
|
93
|
+
i = cuda.grid(1)
|
|
94
|
+
y[i] = x[i]
|
|
95
|
+
|
|
96
|
+
cuda.syncthreads()
|
|
97
|
+
cuda.syncthreads()
|
|
98
|
+
|
|
99
|
+
x = np.arange(3)
|
|
100
|
+
y = np.empty(3)
|
|
101
|
+
assign_with_sync[1, 3](x, y)
|
|
102
|
+
np.testing.assert_array_equal(x, y)
|
|
103
|
+
assert_no_blockthreads()
|
|
104
|
+
|
|
105
|
+
with self.assertRaises(IndexError):
|
|
106
|
+
assign_with_sync[1, 6](x, y)
|
|
107
|
+
assert_no_blockthreads()
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
if __name__ == "__main__":
|
|
111
|
+
unittest.main()
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: BSD-2-Clause
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
#include <cooperative_groups.h>
|
|
7
|
+
#include <cuda/barrier>
|
|
8
|
+
|
|
9
|
+
namespace cg = cooperative_groups;
|
|
10
|
+
|
|
11
|
+
__device__ void _wait_on_tile(cuda::barrier<cuda::thread_scope_block> &tile)
|
|
12
|
+
{
|
|
13
|
+
auto token = tile.arrive();
|
|
14
|
+
tile.wait(std::move(token));
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
extern "C"
|
|
18
|
+
__device__ int cta_barrier(int *ret) {
|
|
19
|
+
auto cta = cg::this_thread_block();
|
|
20
|
+
cg::thread_block_tile<32> tile = cg::tiled_partition<32>(cta);
|
|
21
|
+
__shared__ cuda::barrier<cuda::thread_scope_block> barrier;
|
|
22
|
+
if (threadIdx.x == 0) {
|
|
23
|
+
init(&barrier, blockDim.x);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
_wait_on_tile(barrier);
|
|
27
|
+
return 0;
|
|
28
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: BSD-2-Clause
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
// Not all CUDA includes are safe to include in device code compiled by NVRTC,
|
|
7
|
+
// because it does not have paths to all system include directories. Headers
|
|
8
|
+
// such as cuda_device_runtime_api.h are safe to use in NVRTC without adding
|
|
9
|
+
// additional includes.
|
|
10
|
+
#include <cuda_device_runtime_api.h>
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: BSD-2-Clause
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
extern "C" __device__
|
|
7
|
+
int bar(int* out, int a) {
|
|
8
|
+
// Explicitly placed to generate an error
|
|
9
|
+
SYNTAX ERROR
|
|
10
|
+
*out = a * 2;
|
|
11
|
+
return 0;
|
|
12
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: BSD-2-Clause
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
// Compile with:
|
|
7
|
+
//
|
|
8
|
+
// nvcc -gencode arch=compute_50,code=compute_50 -rdc true -ptx jitlink.cu
|
|
9
|
+
//
|
|
10
|
+
// using the oldest supported toolkit version (10.2 at the time of writing).
|
|
11
|
+
|
|
12
|
+
extern "C" __device__
|
|
13
|
+
int bar(int *out, int a)
|
|
14
|
+
{
|
|
15
|
+
*out = a * 2;
|
|
16
|
+
return 0;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
// The out argument is necessary due to Numba's CUDA calling convention, which
|
|
21
|
+
// always reserves the first parameter for a pointer to a returned value, even
|
|
22
|
+
// if there is no return value.
|
|
23
|
+
extern "C" __device__
|
|
24
|
+
int array_mutator(void *out, int *a)
|
|
25
|
+
{
|
|
26
|
+
a[0] = a[1];
|
|
27
|
+
return 0;
|
|
28
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
//
|
|
2
|
+
// Generated by NVIDIA NVVM Compiler
|
|
3
|
+
//
|
|
4
|
+
// Compiler Build ID: CL-27506705
|
|
5
|
+
// Cuda compilation tools, release 10.2, V10.2.89
|
|
6
|
+
// Based on LLVM 3.4svn
|
|
7
|
+
//
|
|
8
|
+
|
|
9
|
+
.version 6.5
|
|
10
|
+
.target sm_50
|
|
11
|
+
.address_size 64
|
|
12
|
+
|
|
13
|
+
// .globl bar
|
|
14
|
+
|
|
15
|
+
.visible .func (.param .b32 func_retval0) bar(
|
|
16
|
+
.param .b64 bar_param_0,
|
|
17
|
+
.param .b32 bar_param_1
|
|
18
|
+
)
|
|
19
|
+
{
|
|
20
|
+
.reg .b32 %r<4>;
|
|
21
|
+
.reg .b64 %rd<2>;
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
ld.param.u64 %rd1, [bar_param_0];
|
|
25
|
+
ld.param.u32 %r1, [bar_param_1];
|
|
26
|
+
shl.b32 %r2, %r1, 1;
|
|
27
|
+
st.u32 [%rd1], %r2;
|
|
28
|
+
mov.u32 %r3, 0;
|
|
29
|
+
st.param.b32 [func_retval0+0], %r3;
|
|
30
|
+
ret;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// .globl array_mutator
|
|
34
|
+
.visible .func (.param .b32 func_retval0) array_mutator(
|
|
35
|
+
.param .b64 array_mutator_param_0,
|
|
36
|
+
.param .b64 array_mutator_param_1
|
|
37
|
+
)
|
|
38
|
+
{
|
|
39
|
+
.reg .b32 %r<3>;
|
|
40
|
+
.reg .b64 %rd<2>;
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
ld.param.u64 %rd1, [array_mutator_param_1];
|
|
44
|
+
ld.u32 %r1, [%rd1+4];
|
|
45
|
+
st.u32 [%rd1], %r1;
|
|
46
|
+
mov.u32 %r2, 0;
|
|
47
|
+
st.param.b32 [func_retval0+0], %r2;
|
|
48
|
+
ret;
|
|
49
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: BSD-2-Clause
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
extern "C" __device__
|
|
7
|
+
int bar(int* out, int a) {
|
|
8
|
+
// Explicitly placed to generate a warning for testing the NVRTC program log
|
|
9
|
+
int unused;
|
|
10
|
+
*out = a * 2;
|
|
11
|
+
return 0;
|
|
12
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from numba.cuda.tests import load_testsuite
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def load_tests(loader, tests, pattern):
|
|
9
|
+
return load_testsuite(loader, os.path.dirname(__file__))
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: BSD-2-Clause
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
// magictoken.ex_mul_f32_f32.begin
|
|
7
|
+
// Foreign function example: multiplication of a pair of floats
|
|
8
|
+
|
|
9
|
+
extern "C" __device__ int
|
|
10
|
+
mul_f32_f32(
|
|
11
|
+
float* return_value,
|
|
12
|
+
float x,
|
|
13
|
+
float y)
|
|
14
|
+
{
|
|
15
|
+
// Compute result and store in caller-provided slot
|
|
16
|
+
*return_value = x * y;
|
|
17
|
+
|
|
18
|
+
// Signal that no Python exception occurred
|
|
19
|
+
return 0;
|
|
20
|
+
}
|
|
21
|
+
// magictoken.ex_mul_f32_f32.end
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
// magictoken.ex_sum_reduce_proto.begin
|
|
25
|
+
extern "C"
|
|
26
|
+
__device__ int
|
|
27
|
+
sum_reduce(
|
|
28
|
+
float* return_value,
|
|
29
|
+
float* array,
|
|
30
|
+
int n
|
|
31
|
+
);
|
|
32
|
+
// magictoken.ex_sum_reduce_proto.end
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
// Performs a simple reduction on an array passed by pointer using the
|
|
36
|
+
// ffi.from_buffer() method. Implements the prototype above.
|
|
37
|
+
extern "C"
|
|
38
|
+
__device__ int
|
|
39
|
+
sum_reduce(
|
|
40
|
+
float* return_value,
|
|
41
|
+
float* array,
|
|
42
|
+
int n
|
|
43
|
+
)
|
|
44
|
+
{
|
|
45
|
+
double sum = 0.0;
|
|
46
|
+
|
|
47
|
+
for (size_t i = 0; i < n; ++i) {
|
|
48
|
+
sum += array[i];
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
*return_value = (float)sum;
|
|
52
|
+
|
|
53
|
+
return 0;
|
|
54
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: BSD-2-Clause
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
// Templated multiplication function: mymul
|
|
7
|
+
template <typename T>
|
|
8
|
+
__device__ T mymul(T a, T b) { return a * b; }
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: BSD-2-Clause
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
#include <add.cuh> // In numba/cuda/tests/data/include
|
|
7
|
+
#include <mul.cuh> // In numba/cuda/tests/doc_examples/ffi/include
|
|
8
|
+
|
|
9
|
+
extern "C"
|
|
10
|
+
__device__ int saxpy(float *ret, float a, float x, float y)
|
|
11
|
+
{
|
|
12
|
+
*ret = myadd(mymul(a, x), y);
|
|
13
|
+
return 0;
|
|
14
|
+
}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
# Contents in this file are referenced from the sphinx-generated docs.
|
|
5
|
+
# "magictoken" is used for markers as beginning and ending of example text.
|
|
6
|
+
|
|
7
|
+
import unittest
|
|
8
|
+
from numba.cuda.testing import (
|
|
9
|
+
CUDATestCase,
|
|
10
|
+
skip_on_cudasim,
|
|
11
|
+
skip_if_cudadevrt_missing,
|
|
12
|
+
skip_unless_cc_60,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@skip_if_cudadevrt_missing
|
|
17
|
+
@skip_unless_cc_60
|
|
18
|
+
@skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
|
|
19
|
+
class TestCooperativeGroups(CUDATestCase):
|
|
20
|
+
def test_ex_grid_sync(self):
|
|
21
|
+
# magictoken.ex_grid_sync_kernel.begin
|
|
22
|
+
from numba import cuda
|
|
23
|
+
from numba.cuda import int32
|
|
24
|
+
import numpy as np
|
|
25
|
+
|
|
26
|
+
sig = (int32[:, ::1],)
|
|
27
|
+
|
|
28
|
+
@cuda.jit(sig)
|
|
29
|
+
def sequential_rows(M):
|
|
30
|
+
col = cuda.grid(1)
|
|
31
|
+
g = cuda.cg.this_grid()
|
|
32
|
+
|
|
33
|
+
rows = M.shape[0]
|
|
34
|
+
cols = M.shape[1]
|
|
35
|
+
|
|
36
|
+
for row in range(1, rows):
|
|
37
|
+
opposite = cols - col - 1
|
|
38
|
+
# Each row's elements are one greater than the previous row
|
|
39
|
+
M[row, col] = M[row - 1, opposite] + 1
|
|
40
|
+
# Wait until all threads have written their column element,
|
|
41
|
+
# and that the write is visible to all other threads
|
|
42
|
+
g.sync()
|
|
43
|
+
|
|
44
|
+
# magictoken.ex_grid_sync_kernel.end
|
|
45
|
+
|
|
46
|
+
# magictoken.ex_grid_sync_data.begin
|
|
47
|
+
# Empty input data
|
|
48
|
+
A = np.zeros((1024, 1024), dtype=np.int32)
|
|
49
|
+
# A somewhat arbitrary choice (one warp), but generally smaller block sizes
|
|
50
|
+
# allow more blocks to be launched (noting that other limitations on
|
|
51
|
+
# occupancy apply such as shared memory size)
|
|
52
|
+
blockdim = 32
|
|
53
|
+
griddim = A.shape[1] // blockdim
|
|
54
|
+
# magictoken.ex_grid_sync_data.end
|
|
55
|
+
|
|
56
|
+
# Skip this test if the grid size used in the example is too large for
|
|
57
|
+
# a cooperative launch on the current GPU
|
|
58
|
+
mb = sequential_rows.overloads[sig].max_cooperative_grid_blocks(
|
|
59
|
+
blockdim
|
|
60
|
+
)
|
|
61
|
+
if mb < griddim:
|
|
62
|
+
self.skipTest("Device does not support a large enough coop grid")
|
|
63
|
+
|
|
64
|
+
# magictoken.ex_grid_sync_launch.begin
|
|
65
|
+
# Kernel launch - this is implicitly a cooperative launch
|
|
66
|
+
sequential_rows[griddim, blockdim](A)
|
|
67
|
+
|
|
68
|
+
# What do the results look like?
|
|
69
|
+
# print(A)
|
|
70
|
+
#
|
|
71
|
+
# [[ 0 0 0 ... 0 0 0]
|
|
72
|
+
# [ 1 1 1 ... 1 1 1]
|
|
73
|
+
# [ 2 2 2 ... 2 2 2]
|
|
74
|
+
# ...
|
|
75
|
+
# [1021 1021 1021 ... 1021 1021 1021]
|
|
76
|
+
# [1022 1022 1022 ... 1022 1022 1022]
|
|
77
|
+
# [1023 1023 1023 ... 1023 1023 1023]]
|
|
78
|
+
# magictoken.ex_grid_sync_launch.end
|
|
79
|
+
|
|
80
|
+
# Sanity check - are the results what we expect?
|
|
81
|
+
reference = np.tile(np.arange(1024), (1024, 1)).T
|
|
82
|
+
np.testing.assert_equal(A, reference)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
if __name__ == "__main__":
|
|
86
|
+
unittest.main()
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import unittest
|
|
5
|
+
|
|
6
|
+
from numba.cuda.testing import CUDATestCase, skip_on_cudasim
|
|
7
|
+
from numba.cuda.tests.support import captured_stdout
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
|
|
11
|
+
class TestCPointer(CUDATestCase):
|
|
12
|
+
"""
|
|
13
|
+
Test simple vector addition
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def setUp(self):
|
|
17
|
+
# Prevent output from this test showing
|
|
18
|
+
# up when running the test suite
|
|
19
|
+
self._captured_stdout = captured_stdout()
|
|
20
|
+
self._captured_stdout.__enter__()
|
|
21
|
+
super().setUp()
|
|
22
|
+
|
|
23
|
+
def tearDown(self):
|
|
24
|
+
# No exception type, value, or traceback
|
|
25
|
+
self._captured_stdout.__exit__(None, None, None)
|
|
26
|
+
super().tearDown()
|
|
27
|
+
|
|
28
|
+
def test_ex_cpointer(self):
|
|
29
|
+
# ex_cpointer.sig.begin
|
|
30
|
+
import numpy as np
|
|
31
|
+
from numba import cuda
|
|
32
|
+
from numba.cuda import types
|
|
33
|
+
|
|
34
|
+
# The first kernel argument is a pointer to a uint8 array.
|
|
35
|
+
# The second argument holds the length as a uint32.
|
|
36
|
+
# The return type of a kernel is always void.
|
|
37
|
+
sig = types.void(types.CPointer(types.uint8), types.uint32)
|
|
38
|
+
# ex_cpointer.sig.end
|
|
39
|
+
|
|
40
|
+
# ex_cpointer.kernel.begin
|
|
41
|
+
@cuda.jit(sig)
|
|
42
|
+
def add_one(x, n):
|
|
43
|
+
i = cuda.grid(1)
|
|
44
|
+
if i < n:
|
|
45
|
+
x[i] += 1
|
|
46
|
+
|
|
47
|
+
# ex_cpointer.kernel.end
|
|
48
|
+
|
|
49
|
+
# ex_cpointer.launch.begin
|
|
50
|
+
x = cuda.to_device(np.arange(10, dtype=np.uint8))
|
|
51
|
+
|
|
52
|
+
# Print initial values of x
|
|
53
|
+
print(x.copy_to_host()) # [0 1 2 3 4 5 6 7 8 9]
|
|
54
|
+
|
|
55
|
+
# Obtain a pointer to the data from from the CUDA Array Interface
|
|
56
|
+
x_ptr = x.__cuda_array_interface__["data"][0]
|
|
57
|
+
x_len = len(x)
|
|
58
|
+
|
|
59
|
+
# Launch the kernel with the pointer and length
|
|
60
|
+
add_one[1, 32](x_ptr, x_len)
|
|
61
|
+
|
|
62
|
+
# Demonstrate that the data was updated by the kernel
|
|
63
|
+
print(x.copy_to_host()) # [ 1 2 3 4 5 6 7 8 9 10]
|
|
64
|
+
# ex_cpointer.launch.end
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
if __name__ == "__main__":
|
|
68
|
+
unittest.main()
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import unittest
|
|
5
|
+
|
|
6
|
+
from numba.cuda.testing import (
|
|
7
|
+
CUDATestCase,
|
|
8
|
+
skip_on_cudasim,
|
|
9
|
+
skip_on_standalone_numba_cuda,
|
|
10
|
+
)
|
|
11
|
+
from numba.cuda.tests.support import captured_stdout
|
|
12
|
+
import numpy as np
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
|
|
16
|
+
class TestCpuGpuCompat(CUDATestCase):
|
|
17
|
+
"""
|
|
18
|
+
Test compatibility of CPU and GPU functions
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def setUp(self):
|
|
22
|
+
# Prevent output from this test showing up when running the test suite
|
|
23
|
+
self._captured_stdout = captured_stdout()
|
|
24
|
+
self._captured_stdout.__enter__()
|
|
25
|
+
super().setUp()
|
|
26
|
+
|
|
27
|
+
def tearDown(self):
|
|
28
|
+
# No exception type, value, or traceback
|
|
29
|
+
self._captured_stdout.__exit__(None, None, None)
|
|
30
|
+
super().tearDown()
|
|
31
|
+
|
|
32
|
+
@skip_on_standalone_numba_cuda
|
|
33
|
+
def test_ex_cpu_gpu_compat(self):
|
|
34
|
+
# ex_cpu_gpu_compat.import.begin
|
|
35
|
+
from math import pi
|
|
36
|
+
|
|
37
|
+
import numba
|
|
38
|
+
from numba import cuda
|
|
39
|
+
# ex_cpu_gpu_compat.import.end
|
|
40
|
+
|
|
41
|
+
# ex_cpu_gpu_compat.allocate.begin
|
|
42
|
+
X = cuda.to_device([1, 10, 234])
|
|
43
|
+
Y = cuda.to_device([2, 2, 4014])
|
|
44
|
+
Z = cuda.to_device([3, 14, 2211])
|
|
45
|
+
results = cuda.to_device([0.0, 0.0, 0.0])
|
|
46
|
+
# ex_cpu_gpu_compat.allocate.end
|
|
47
|
+
|
|
48
|
+
# ex_cpu_gpu_compat.define.begin
|
|
49
|
+
@numba.jit
|
|
50
|
+
def business_logic(x, y, z):
|
|
51
|
+
return 4 * z * (2 * x - (4 * y) / 2 * pi)
|
|
52
|
+
|
|
53
|
+
# ex_cpu_gpu_compat.define.end
|
|
54
|
+
|
|
55
|
+
# ex_cpu_gpu_compat.cpurun.begin
|
|
56
|
+
print(business_logic(1, 2, 3)) # -126.79644737231007
|
|
57
|
+
# ex_cpu_gpu_compat.cpurun.end
|
|
58
|
+
|
|
59
|
+
# ex_cpu_gpu_compat.usegpu.begin
|
|
60
|
+
@cuda.jit
|
|
61
|
+
def f(res, xarr, yarr, zarr):
|
|
62
|
+
tid = cuda.grid(1)
|
|
63
|
+
if tid < len(xarr):
|
|
64
|
+
# The function decorated with numba.jit may be directly reused
|
|
65
|
+
res[tid] = business_logic(xarr[tid], yarr[tid], zarr[tid])
|
|
66
|
+
|
|
67
|
+
# ex_cpu_gpu_compat.usegpu.end
|
|
68
|
+
|
|
69
|
+
# ex_cpu_gpu_compat.launch.begin
|
|
70
|
+
f.forall(len(X))(results, X, Y, Z)
|
|
71
|
+
print(results)
|
|
72
|
+
# [-126.79644737231007, 416.28324559588634, -218912930.2987788]
|
|
73
|
+
# ex_cpu_gpu_compat.launch.end
|
|
74
|
+
|
|
75
|
+
expect = [business_logic(x, y, z) for x, y, z in zip(X, Y, Z)]
|
|
76
|
+
|
|
77
|
+
np.testing.assert_equal(expect, results.copy_to_host())
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
if __name__ == "__main__":
|
|
81
|
+
unittest.main()
|