numba-cuda 0.21.1__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +4 -0
- _numba_cuda_redirector.py +89 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +6 -0
- numba_cuda/_version.py +11 -0
- numba_cuda/numba/cuda/__init__.py +70 -0
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
- numba_cuda/numba/cuda/api.py +577 -0
- numba_cuda/numba/cuda/api_util.py +76 -0
- numba_cuda/numba/cuda/args.py +72 -0
- numba_cuda/numba/cuda/bf16.py +397 -0
- numba_cuda/numba/cuda/cache_hints.py +287 -0
- numba_cuda/numba/cuda/cext/__init__.py +2 -0
- numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
- numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
- numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
- numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
- numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
- numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
- numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
- numba_cuda/numba/cuda/cext/_typeof.h +19 -0
- numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
- numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
- numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
- numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
- numba_cuda/numba/cuda/cg.py +67 -0
- numba_cuda/numba/cuda/cgutils.py +1294 -0
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +541 -0
- numba_cuda/numba/cuda/compiler.py +1396 -0
- numba_cuda/numba/cuda/core/analysis.py +758 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
- numba_cuda/numba/cuda/core/base.py +1332 -0
- numba_cuda/numba/cuda/core/boxing.py +1411 -0
- numba_cuda/numba/cuda/core/bytecode.py +728 -0
- numba_cuda/numba/cuda/core/byteflow.py +2346 -0
- numba_cuda/numba/cuda/core/caching.py +744 -0
- numba_cuda/numba/cuda/core/callconv.py +392 -0
- numba_cuda/numba/cuda/core/codegen.py +171 -0
- numba_cuda/numba/cuda/core/compiler.py +199 -0
- numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +650 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/controlflow.py +989 -0
- numba_cuda/numba/cuda/core/entrypoints.py +57 -0
- numba_cuda/numba/cuda/core/environment.py +66 -0
- numba_cuda/numba/cuda/core/errors.py +917 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/generators.py +387 -0
- numba_cuda/numba/cuda/core/imputils.py +509 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
- numba_cuda/numba/cuda/core/interpreter.py +3617 -0
- numba_cuda/numba/cuda/core/ir.py +1812 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
- numba_cuda/numba/cuda/core/optional.py +129 -0
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
- numba_cuda/numba/cuda/core/registry.py +46 -0
- numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
- numba_cuda/numba/cuda/core/sigutils.py +68 -0
- numba_cuda/numba/cuda/core/ssa.py +498 -0
- numba_cuda/numba/cuda/core/targetconfig.py +330 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +956 -0
- numba_cuda/numba/cuda/core/typed_passes.py +867 -0
- numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
- numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
- numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
- numba_cuda/numba/cuda/cpython/iterators.py +167 -0
- numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
- numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
- numba_cuda/numba/cuda/cpython/slicing.py +322 -0
- numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
- numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
- numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
- numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
- numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
- numba_cuda/numba/cuda/cuda_paths.py +691 -0
- numba_cuda/numba/cuda/cudadecl.py +556 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
- numba_cuda/numba/cuda/cudadrv/error.py +48 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
- numba_cuda/numba/cuda/cudaimpl.py +995 -0
- numba_cuda/numba/cuda/cudamath.py +149 -0
- numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
- numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
- numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
- numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
- numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
- numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
- numba_cuda/numba/cuda/datamodel/manager.py +11 -0
- numba_cuda/numba/cuda/datamodel/models.py +9 -0
- numba_cuda/numba/cuda/datamodel/packer.py +9 -0
- numba_cuda/numba/cuda/datamodel/registry.py +11 -0
- numba_cuda/numba/cuda/datamodel/testing.py +11 -0
- numba_cuda/numba/cuda/debuginfo.py +903 -0
- numba_cuda/numba/cuda/decorators.py +294 -0
- numba_cuda/numba/cuda/descriptor.py +35 -0
- numba_cuda/numba/cuda/device_init.py +158 -0
- numba_cuda/numba/cuda/deviceufunc.py +1021 -0
- numba_cuda/numba/cuda/dispatcher.py +2463 -0
- numba_cuda/numba/cuda/errors.py +72 -0
- numba_cuda/numba/cuda/extending.py +697 -0
- numba_cuda/numba/cuda/flags.py +178 -0
- numba_cuda/numba/cuda/fp16.py +357 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +24 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
- numba_cuda/numba/cuda/intrinsics.py +382 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +265 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3386 -0
- numba_cuda/numba/cuda/libdevicedecl.py +20 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
- numba_cuda/numba/cuda/locks.py +19 -0
- numba_cuda/numba/cuda/lowering.py +1951 -0
- numba_cuda/numba/cuda/mathimpl.py +374 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
- numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
- numba_cuda/numba/cuda/misc/appdirs.py +594 -0
- numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
- numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
- numba_cuda/numba/cuda/misc/dump_style.py +41 -0
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
- numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
- numba_cuda/numba/cuda/misc/literal.py +28 -0
- numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
- numba_cuda/numba/cuda/misc/special.py +94 -0
- numba_cuda/numba/cuda/models.py +56 -0
- numba_cuda/numba/cuda/np/arraymath.py +5130 -0
- numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
- numba_cuda/numba/cuda/np/extensions.py +11 -0
- numba_cuda/numba/cuda/np/linalg.py +3087 -0
- numba_cuda/numba/cuda/np/math/__init__.py +0 -0
- numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
- numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
- numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
- numba_cuda/numba/cuda/np/npdatetime.py +969 -0
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
- numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
- numba_cuda/numba/cuda/np/numpy_support.py +798 -0
- numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
- numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
- numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
- numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
- numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
- numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
- numba_cuda/numba/cuda/nvvmutils.py +254 -0
- numba_cuda/numba/cuda/printimpl.py +126 -0
- numba_cuda/numba/cuda/random.py +308 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
- numba_cuda/numba/cuda/serialize.py +267 -0
- numba_cuda/numba/cuda/simulator/__init__.py +63 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +179 -0
- numba_cuda/numba/cuda/simulator/bf16.py +4 -0
- numba_cuda/numba/cuda/simulator/compiler.py +38 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
- numba_cuda/numba/cuda/simulator/kernel.py +320 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
- numba_cuda/numba/cuda/simulator/reduction.py +19 -0
- numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
- numba_cuda/numba/cuda/simulator_init.py +18 -0
- numba_cuda/numba/cuda/stubs.py +635 -0
- numba_cuda/numba/cuda/target.py +505 -0
- numba_cuda/numba/cuda/testing.py +347 -0
- numba_cuda/numba/cuda/tests/__init__.py +62 -0
- numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
- numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
- numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
- numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
- numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
- numba_cuda/numba/cuda/tests/data/error.cu +12 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
- numba_cuda/numba/cuda/tests/support.py +900 -0
- numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
- numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
- numba_cuda/numba/cuda/typeconv/rules.py +63 -0
- numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
- numba_cuda/numba/cuda/types/__init__.py +233 -0
- numba_cuda/numba/cuda/types/__init__.pyi +167 -0
- numba_cuda/numba/cuda/types/abstract.py +9 -0
- numba_cuda/numba/cuda/types/common.py +9 -0
- numba_cuda/numba/cuda/types/containers.py +9 -0
- numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
- numba_cuda/numba/cuda/types/cuda_common.py +110 -0
- numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
- numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
- numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
- numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
- numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
- numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
- numba_cuda/numba/cuda/types/ext_types.py +101 -0
- numba_cuda/numba/cuda/types/function_type.py +11 -0
- numba_cuda/numba/cuda/types/functions.py +9 -0
- numba_cuda/numba/cuda/types/iterators.py +9 -0
- numba_cuda/numba/cuda/types/misc.py +9 -0
- numba_cuda/numba/cuda/types/npytypes.py +9 -0
- numba_cuda/numba/cuda/types/scalars.py +9 -0
- numba_cuda/numba/cuda/typing/__init__.py +19 -0
- numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
- numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
- numba_cuda/numba/cuda/typing/bufproto.py +70 -0
- numba_cuda/numba/cuda/typing/builtins.py +1209 -0
- numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
- numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
- numba_cuda/numba/cuda/typing/collections.py +138 -0
- numba_cuda/numba/cuda/typing/context.py +782 -0
- numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
- numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/listdecl.py +147 -0
- numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
- numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
- numba_cuda/numba/cuda/typing/npydecl.py +749 -0
- numba_cuda/numba/cuda/typing/setdecl.py +115 -0
- numba_cuda/numba/cuda/typing/templates.py +1446 -0
- numba_cuda/numba/cuda/typing/typeof.py +301 -0
- numba_cuda/numba/cuda/ufuncs.py +746 -0
- numba_cuda/numba/cuda/utils.py +724 -0
- numba_cuda/numba/cuda/vector_types.py +214 -0
- numba_cuda/numba/cuda/vectorizers.py +260 -0
- numba_cuda-0.21.1.dist-info/METADATA +109 -0
- numba_cuda-0.21.1.dist-info/RECORD +488 -0
- numba_cuda-0.21.1.dist-info/WHEEL +5 -0
- numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
- numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from numba import cuda
|
|
5
|
+
from numba.cuda.testing import CUDATestCase
|
|
6
|
+
import numpy as np
|
|
7
|
+
import sys
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class UseCase:
|
|
11
|
+
"""
|
|
12
|
+
Provide a way to call a kernel as if it were a function.
|
|
13
|
+
|
|
14
|
+
This allows the CUDA cache tests to closely match the CPU cache tests, and
|
|
15
|
+
also to support calling cache use cases as njitted functions. The class
|
|
16
|
+
wraps a function that takes an array for the return value and arguments,
|
|
17
|
+
and provides an interface that accepts arguments, launches the kernel
|
|
18
|
+
appropriately, and returns the stored return value.
|
|
19
|
+
|
|
20
|
+
The return type is inferred from the type of the first argument, unless it
|
|
21
|
+
is explicitly overridden by the ``retty`` kwarg.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, func, retty=None):
|
|
25
|
+
self._func = func
|
|
26
|
+
self._retty = retty
|
|
27
|
+
|
|
28
|
+
def __call__(self, *args):
|
|
29
|
+
array_args = [np.asarray(arg) for arg in args]
|
|
30
|
+
if self._retty:
|
|
31
|
+
array_return = np.ndarray((), dtype=self._retty)
|
|
32
|
+
else:
|
|
33
|
+
array_return = np.zeros_like(array_args[0])
|
|
34
|
+
|
|
35
|
+
self._call(array_return, *array_args)
|
|
36
|
+
return array_return[()]
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def func(self):
|
|
40
|
+
return self._func
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class CUDAUseCase(UseCase):
|
|
44
|
+
def _call(self, ret, *args):
|
|
45
|
+
self._func[1, 1](ret, *args)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@cuda.jit(cache=True)
|
|
49
|
+
def add_usecase_kernel(r, x, y):
|
|
50
|
+
r[()] = x[()] + y[()] + Z
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@cuda.jit(cache=False)
|
|
54
|
+
def add_nocache_usecase_kernel(r, x, y):
|
|
55
|
+
r[()] = x[()] + y[()] + Z
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
add_usecase = CUDAUseCase(add_usecase_kernel)
|
|
59
|
+
add_nocache_usecase = CUDAUseCase(add_nocache_usecase_kernel)
|
|
60
|
+
|
|
61
|
+
Z = 1
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# Inner / outer cached / uncached cases
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@cuda.jit(cache=True)
|
|
68
|
+
def inner(x, y):
|
|
69
|
+
return x + y + Z
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@cuda.jit(cache=True)
|
|
73
|
+
def outer_kernel(r, x, y):
|
|
74
|
+
r[()] = inner(-y[()], x[()])
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@cuda.jit(cache=False)
|
|
78
|
+
def outer_uncached_kernel(r, x, y):
|
|
79
|
+
r[()] = inner(-y[()], x[()])
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
outer = CUDAUseCase(outer_kernel)
|
|
83
|
+
outer_uncached = CUDAUseCase(outer_uncached_kernel)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# Exercise returning a record instance. This used to hardcode the dtype
|
|
87
|
+
# pointer's value in the bitcode.
|
|
88
|
+
|
|
89
|
+
packed_record_type = np.dtype([("a", np.int8), ("b", np.float64)])
|
|
90
|
+
aligned_record_type = np.dtype([("a", np.int8), ("b", np.float64)], align=True)
|
|
91
|
+
|
|
92
|
+
packed_arr = np.empty(2, dtype=packed_record_type)
|
|
93
|
+
for i in range(packed_arr.size):
|
|
94
|
+
packed_arr[i]["a"] = i + 1
|
|
95
|
+
packed_arr[i]["b"] = i + 42.5
|
|
96
|
+
|
|
97
|
+
aligned_arr = np.array(packed_arr, dtype=aligned_record_type)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@cuda.jit(cache=True)
|
|
101
|
+
def record_return(r, ary, i):
|
|
102
|
+
r[()] = ary[i]
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
record_return_packed = CUDAUseCase(record_return, retty=packed_record_type)
|
|
106
|
+
record_return_aligned = CUDAUseCase(record_return, retty=aligned_record_type)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
# Closure test cases
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def make_closure(x):
|
|
113
|
+
@cuda.jit(cache=True)
|
|
114
|
+
def closure(r, y):
|
|
115
|
+
r[()] = x + y[()]
|
|
116
|
+
|
|
117
|
+
return CUDAUseCase(closure)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
closure1 = make_closure(3)
|
|
121
|
+
closure2 = make_closure(5)
|
|
122
|
+
closure3 = make_closure(7)
|
|
123
|
+
closure4 = make_closure(9)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
# Ambiguous / renamed functions
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@cuda.jit(cache=True)
|
|
130
|
+
def ambiguous_function(r, x):
|
|
131
|
+
r[()] = x[()] + 2
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
renamed_function1 = CUDAUseCase(ambiguous_function)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@cuda.jit(cache=True)
|
|
138
|
+
def ambiguous_function(r, x):
|
|
139
|
+
r[()] = x[()] + 6
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
renamed_function2 = CUDAUseCase(ambiguous_function)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@cuda.jit(cache=True)
|
|
146
|
+
def many_locals():
|
|
147
|
+
aa = cuda.local.array((1, 1), np.float64)
|
|
148
|
+
ab = cuda.local.array((1, 1), np.float64)
|
|
149
|
+
ac = cuda.local.array((1, 1), np.float64)
|
|
150
|
+
ad = cuda.local.array((1, 1), np.float64)
|
|
151
|
+
ae = cuda.local.array((1, 1), np.float64)
|
|
152
|
+
af = cuda.local.array((1, 1), np.float64)
|
|
153
|
+
ag = cuda.local.array((1, 1), np.float64)
|
|
154
|
+
ah = cuda.local.array((1, 1), np.float64)
|
|
155
|
+
ai = cuda.local.array((1, 1), np.float64)
|
|
156
|
+
aj = cuda.local.array((1, 1), np.float64)
|
|
157
|
+
ak = cuda.local.array((1, 1), np.float64)
|
|
158
|
+
al = cuda.local.array((1, 1), np.float64)
|
|
159
|
+
am = cuda.local.array((1, 1), np.float64)
|
|
160
|
+
an = cuda.local.array((1, 1), np.float64)
|
|
161
|
+
ao = cuda.local.array((1, 1), np.float64)
|
|
162
|
+
ap = cuda.local.array((1, 1), np.float64)
|
|
163
|
+
ar = cuda.local.array((1, 1), np.float64)
|
|
164
|
+
at = cuda.local.array((1, 1), np.float64)
|
|
165
|
+
au = cuda.local.array((1, 1), np.float64)
|
|
166
|
+
av = cuda.local.array((1, 1), np.float64)
|
|
167
|
+
aw = cuda.local.array((1, 1), np.float64)
|
|
168
|
+
ax = cuda.local.array((1, 1), np.float64)
|
|
169
|
+
ay = cuda.local.array((1, 1), np.float64)
|
|
170
|
+
az = cuda.local.array((1, 1), np.float64)
|
|
171
|
+
|
|
172
|
+
aa[:] = 0
|
|
173
|
+
ab[:] = 0
|
|
174
|
+
ac[:] = 0
|
|
175
|
+
ad[:] = 0
|
|
176
|
+
ae[:] = 0
|
|
177
|
+
af[:] = 0
|
|
178
|
+
ag[:] = 0
|
|
179
|
+
ah[:] = 0
|
|
180
|
+
ai[:] = 0
|
|
181
|
+
aj[:] = 0
|
|
182
|
+
ak[:] = 0
|
|
183
|
+
al[:] = 0
|
|
184
|
+
am[:] = 0
|
|
185
|
+
an[:] = 0
|
|
186
|
+
ao[:] = 0
|
|
187
|
+
ap[:] = 0
|
|
188
|
+
ar[:] = 0
|
|
189
|
+
at[:] = 0
|
|
190
|
+
au[:] = 0
|
|
191
|
+
av[:] = 0
|
|
192
|
+
aw[:] = 0
|
|
193
|
+
ax[:] = 0
|
|
194
|
+
ay[:] = 0
|
|
195
|
+
az[:] = 0
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
# Simple use case for multiprocessing test
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
@cuda.jit(cache=True)
|
|
202
|
+
def simple_usecase_kernel(r, x):
|
|
203
|
+
r[()] = x[()]
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
simple_usecase_caller = CUDAUseCase(simple_usecase_kernel)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
class _TestModule(CUDATestCase):
|
|
210
|
+
"""
|
|
211
|
+
Tests for functionality of this module's functions.
|
|
212
|
+
Note this does not define any "test_*" method, instead check_module()
|
|
213
|
+
should be called by hand.
|
|
214
|
+
"""
|
|
215
|
+
|
|
216
|
+
def check_module(self, mod):
|
|
217
|
+
self.assertPreciseEqual(mod.add_usecase(2, 3), 6)
|
|
218
|
+
self.assertPreciseEqual(mod.outer_uncached(3, 2), 2)
|
|
219
|
+
self.assertPreciseEqual(mod.outer(3, 2), 2)
|
|
220
|
+
|
|
221
|
+
packed_rec = mod.record_return_packed(mod.packed_arr, 1)
|
|
222
|
+
self.assertPreciseEqual(tuple(packed_rec), (2, 43.5))
|
|
223
|
+
aligned_rec = mod.record_return_aligned(mod.aligned_arr, 1)
|
|
224
|
+
self.assertPreciseEqual(tuple(aligned_rec), (2, 43.5))
|
|
225
|
+
|
|
226
|
+
mod.simple_usecase_caller(2)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def self_test():
|
|
230
|
+
mod = sys.modules[__name__]
|
|
231
|
+
_TestModule().check_module(mod)
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
from numba import cuda
|
|
7
|
+
from numba.cuda import HAS_NUMBA
|
|
8
|
+
from numba.cuda.testing import CUDATestCase, skip_on_standalone_numba_cuda
|
|
9
|
+
from numba.cuda.tests.cudapy.cache_usecases import CUDAUseCase, UseCase
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class CPUUseCase(UseCase):
|
|
13
|
+
def _call(self, ret, *args):
|
|
14
|
+
self._func(ret, *args)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# Using the same function as a cached CPU and CUDA-jitted function
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def target_shared_assign(r, x):
|
|
21
|
+
r[()] = x[()]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
assign_cuda_kernel = cuda.jit(cache=True)(target_shared_assign)
|
|
25
|
+
assign_cuda = CUDAUseCase(assign_cuda_kernel)
|
|
26
|
+
if HAS_NUMBA:
|
|
27
|
+
from numba import njit
|
|
28
|
+
|
|
29
|
+
assign_cpu_jitted = njit(cache=True)(target_shared_assign)
|
|
30
|
+
assign_cpu = CPUUseCase(assign_cpu_jitted)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@skip_on_standalone_numba_cuda
|
|
34
|
+
class _TestModule(CUDATestCase):
|
|
35
|
+
"""
|
|
36
|
+
Tests for functionality of this module's functions.
|
|
37
|
+
Note this does not define any "test_*" method, instead check_module()
|
|
38
|
+
should be called by hand.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def check_module(self, mod):
|
|
42
|
+
self.assertPreciseEqual(mod.assign_cpu(5), 5)
|
|
43
|
+
self.assertPreciseEqual(mod.assign_cpu(5.5), 5.5)
|
|
44
|
+
self.assertPreciseEqual(mod.assign_cuda(5), 5)
|
|
45
|
+
self.assertPreciseEqual(mod.assign_cuda(5.5), 5.5)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def self_test():
|
|
49
|
+
mod = sys.modules[__name__]
|
|
50
|
+
_TestModule().check_module(mod)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from numba import cuda
|
|
5
|
+
from numba.cuda.testing import CUDATestCase
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
from numba.cuda.tests.cudapy.cache_usecases import CUDAUseCase
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# Usecase with cooperative groups
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@cuda.jit(cache=True)
|
|
15
|
+
def cg_usecase_kernel(r, x):
|
|
16
|
+
grid = cuda.cg.this_grid()
|
|
17
|
+
grid.sync()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
cg_usecase = CUDAUseCase(cg_usecase_kernel)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class _TestModule(CUDATestCase):
|
|
24
|
+
"""
|
|
25
|
+
Tests for functionality of this module's functions.
|
|
26
|
+
Note this does not define any "test_*" method, instead check_module()
|
|
27
|
+
should be called by hand.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def check_module(self, mod):
|
|
31
|
+
mod.cg_usecase(0)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def self_test():
|
|
35
|
+
mod = sys.modules[__name__]
|
|
36
|
+
_TestModule().check_module(mod)
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import cmath
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def div_usecase(x, y):
|
|
8
|
+
return x / y
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def real_usecase(x):
|
|
12
|
+
return x.real
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def imag_usecase(x):
|
|
16
|
+
return x.imag
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def conjugate_usecase(x):
|
|
20
|
+
return x.conjugate()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def acos_usecase(x):
|
|
24
|
+
return cmath.acos(x)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def cos_usecase(x):
|
|
28
|
+
return cmath.cos(x)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def asin_usecase(x):
|
|
32
|
+
return cmath.asin(x)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def sin_usecase(x):
|
|
36
|
+
return cmath.sin(x)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def atan_usecase(x):
|
|
40
|
+
return cmath.atan(x)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def tan_usecase(x):
|
|
44
|
+
return cmath.tan(x)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def acosh_usecase(x):
|
|
48
|
+
return cmath.acosh(x)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def cosh_usecase(x):
|
|
52
|
+
return cmath.cosh(x)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def asinh_usecase(x):
|
|
56
|
+
return cmath.asinh(x)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def sinh_usecase(x):
|
|
60
|
+
return cmath.sinh(x)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def atanh_usecase(x):
|
|
64
|
+
return cmath.atanh(x)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def tanh_usecase(x):
|
|
68
|
+
return cmath.tanh(x)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def exp_usecase(x):
|
|
72
|
+
return cmath.exp(x)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def isfinite_usecase(x):
|
|
76
|
+
return cmath.isfinite(x)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def isinf_usecase(x):
|
|
80
|
+
return cmath.isinf(x)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def isnan_usecase(x):
|
|
84
|
+
return cmath.isnan(x)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def log_usecase(x):
|
|
88
|
+
return cmath.log(x)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def log_base_usecase(x, base):
|
|
92
|
+
return cmath.log(x, base)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def log10_usecase(x):
|
|
96
|
+
return cmath.log10(x)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def phase_usecase(x):
|
|
100
|
+
return cmath.phase(x)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def polar_usecase(x):
|
|
104
|
+
return cmath.polar(x)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def polar_as_complex_usecase(x):
|
|
108
|
+
return complex(*cmath.polar(x))
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def rect_usecase(r, phi):
|
|
112
|
+
return cmath.rect(r, phi)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def sqrt_usecase(x):
|
|
116
|
+
return cmath.sqrt(x)
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from enum import Enum, IntEnum
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Color(Enum):
|
|
8
|
+
red = 1
|
|
9
|
+
green = 2
|
|
10
|
+
blue = 3
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Shake(Enum):
|
|
14
|
+
vanilla = 7
|
|
15
|
+
chocolate = 4
|
|
16
|
+
cookies = 9
|
|
17
|
+
# Same as Color.blue
|
|
18
|
+
mint = 3
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Planet(Enum):
|
|
22
|
+
MERCURY = (3.303e23, 2.4397e6)
|
|
23
|
+
VENUS = (4.869e24, 6.0518e6)
|
|
24
|
+
EARTH = (5.976e24, 6.37814e6)
|
|
25
|
+
MARS = (6.421e23, 3.3972e6)
|
|
26
|
+
JUPITER = (1.9e27, 7.1492e7)
|
|
27
|
+
SATURN = (5.688e26, 6.0268e7)
|
|
28
|
+
URANUS = (8.686e25, 2.5559e7)
|
|
29
|
+
NEPTUNE = (1.024e26, 2.4746e7)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class HeterogeneousEnum(Enum):
|
|
33
|
+
red = 1.0
|
|
34
|
+
green = 2.0
|
|
35
|
+
blue = 3j
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class Shape(IntEnum):
|
|
39
|
+
# Same as Color.green
|
|
40
|
+
circle = 2
|
|
41
|
+
# Same as RequestError.internal_error
|
|
42
|
+
square = 500
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class RequestError(IntEnum):
|
|
46
|
+
dummy = 2
|
|
47
|
+
not_found = 404
|
|
48
|
+
internal_error = 500
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class IntEnumWithNegatives(IntEnum):
|
|
52
|
+
# Used for testing of hash, need to make sure -1 -> -2 to comply with CPy
|
|
53
|
+
one = 1
|
|
54
|
+
two = 2
|
|
55
|
+
too = 2
|
|
56
|
+
three = 3
|
|
57
|
+
negone = -1
|
|
58
|
+
negtwo = -2
|
|
59
|
+
negthree = -3
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from numba.cuda import types
|
|
5
|
+
from numba.cuda.core import config
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class MyStruct:
|
|
9
|
+
def __init__(self, x, y):
|
|
10
|
+
self.x = x
|
|
11
|
+
self.y = y
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class StructModelType(types.Type):
|
|
15
|
+
def __init__(self):
|
|
16
|
+
super().__init__(name="TestStructModelType")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
struct_model_type = StructModelType()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
if not config.ENABLE_CUDASIM:
|
|
23
|
+
from numba.cuda import int32
|
|
24
|
+
from numba.cuda.extending import (
|
|
25
|
+
core_models,
|
|
26
|
+
typeof_impl,
|
|
27
|
+
type_callable,
|
|
28
|
+
)
|
|
29
|
+
from numba.cuda.extending import (
|
|
30
|
+
register_model,
|
|
31
|
+
make_attribute_wrapper,
|
|
32
|
+
)
|
|
33
|
+
from numba.cuda.cudaimpl import lower
|
|
34
|
+
from numba.cuda import cgutils
|
|
35
|
+
|
|
36
|
+
@typeof_impl.register(MyStruct)
|
|
37
|
+
def typeof_teststruct(val, c):
|
|
38
|
+
return struct_model_type
|
|
39
|
+
|
|
40
|
+
@register_model(StructModelType)
|
|
41
|
+
class TestStructModel(core_models.StructModel):
|
|
42
|
+
def __init__(self, dmm, fe_type):
|
|
43
|
+
members = [("x", int32), ("y", int32)]
|
|
44
|
+
super().__init__(dmm, fe_type, members)
|
|
45
|
+
|
|
46
|
+
make_attribute_wrapper(StructModelType, "x", "x")
|
|
47
|
+
make_attribute_wrapper(StructModelType, "y", "y")
|
|
48
|
+
|
|
49
|
+
@type_callable(MyStruct)
|
|
50
|
+
def type_test_struct(context):
|
|
51
|
+
def typer(x, y):
|
|
52
|
+
if isinstance(x, types.Integer) and isinstance(y, types.Integer):
|
|
53
|
+
return struct_model_type
|
|
54
|
+
|
|
55
|
+
return typer
|
|
56
|
+
|
|
57
|
+
@lower(MyStruct, types.Integer, types.Integer)
|
|
58
|
+
def lower_test_type_ctor(context, builder, sig, args):
|
|
59
|
+
obj = cgutils.create_struct_proxy(struct_model_type)(context, builder)
|
|
60
|
+
obj.x = args[0]
|
|
61
|
+
obj.y = args[1]
|
|
62
|
+
return obj._getvalue()
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
//
|
|
2
|
+
// Generated by NVIDIA NVVM Compiler
|
|
3
|
+
// Compiler built on Tue Apr 1 03:34:02 2014 (1396341242)
|
|
4
|
+
// Cuda compilation tools, release 6.0, V6.0.1
|
|
5
|
+
//
|
|
6
|
+
|
|
7
|
+
.version 4.0
|
|
8
|
+
.target sm_20
|
|
9
|
+
.address_size 64
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
.visible .func (.param .b32 func_retval0) bar(
|
|
13
|
+
.param .b64 bar_param_0,
|
|
14
|
+
.param .b32 bar_param_1
|
|
15
|
+
)
|
|
16
|
+
{
|
|
17
|
+
.reg .s32 %r<4>;
|
|
18
|
+
.reg .s64 %rd<2>;
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
ld.param.u64 %rd1, [bar_param_0];
|
|
22
|
+
ld.param.u32 %r1, [bar_param_1];
|
|
23
|
+
shl.b32 %r2, %r1, 1;
|
|
24
|
+
st.u32 [%rd1], %r2;
|
|
25
|
+
mov.u32 %r3, 0;
|
|
26
|
+
st.param.b32 [func_retval0+0], %r3;
|
|
27
|
+
ret;
|
|
28
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
# Python 3 syntax only use cases, used in test_extending.py
|
|
5
|
+
|
|
6
|
+
# arg name is different, and there's no arg name to match before *args
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def impl4(z, *args, kw=None):
|
|
10
|
+
if z > 10:
|
|
11
|
+
return 1
|
|
12
|
+
else:
|
|
13
|
+
return -1
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# arg name is different but at a detectable location, with *args
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def impl5(z, b, *args, kw=None):
|
|
20
|
+
if z > 10:
|
|
21
|
+
return 1
|
|
22
|
+
else:
|
|
23
|
+
return -1
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def var_positional_impl(a, *star_args_token, kw=None, kw1=12):
|
|
27
|
+
def impl(a, b, f, kw=None, kw1=12):
|
|
28
|
+
if a > 10:
|
|
29
|
+
return 1
|
|
30
|
+
else:
|
|
31
|
+
return -1
|
|
32
|
+
|
|
33
|
+
return impl
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Usecases of recursive functions in the CUDA target, many derived from
|
|
6
|
+
numba/tests/recursion_usecases.py.
|
|
7
|
+
|
|
8
|
+
Some functions are compiled at import time, hence a separate module.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from numba import cuda
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@cuda.jit("i8(i8)", device=True)
|
|
15
|
+
def fib1(n):
|
|
16
|
+
if n < 2:
|
|
17
|
+
return n
|
|
18
|
+
# Note the second call does not use a named argument, unlike the CPU target
|
|
19
|
+
# usecase
|
|
20
|
+
return fib1(n - 1) + fib1(n - 2)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def make_fib2():
|
|
24
|
+
@cuda.jit("i8(i8)", device=True)
|
|
25
|
+
def fib2(n):
|
|
26
|
+
if n < 2:
|
|
27
|
+
return n
|
|
28
|
+
return fib2(n - 1) + fib2(n - 2)
|
|
29
|
+
|
|
30
|
+
return fib2
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
fib2 = make_fib2()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@cuda.jit
|
|
37
|
+
def type_change_self(x, y):
|
|
38
|
+
if x > 1 and y > 0:
|
|
39
|
+
return x + type_change_self(x - y, y)
|
|
40
|
+
else:
|
|
41
|
+
return y
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# Implicit signature
|
|
45
|
+
@cuda.jit(device=True)
|
|
46
|
+
def fib3(n):
|
|
47
|
+
if n < 2:
|
|
48
|
+
return n
|
|
49
|
+
|
|
50
|
+
return fib3(n - 1) + fib3(n - 2)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# Run-away self recursion
|
|
54
|
+
@cuda.jit(device=True)
|
|
55
|
+
def runaway_self(x):
|
|
56
|
+
return runaway_self(x)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@cuda.jit(device=True)
|
|
60
|
+
def raise_self(x):
|
|
61
|
+
if x == 1:
|
|
62
|
+
raise ValueError("raise_self")
|
|
63
|
+
elif x > 0:
|
|
64
|
+
return raise_self(x - 1)
|
|
65
|
+
else:
|
|
66
|
+
return 1
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@cuda.jit(debug=True, opt=False)
|
|
70
|
+
def raise_self_kernel(x):
|
|
71
|
+
raise_self(x)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def make_optional_return_case(jit=lambda x: x):
|
|
75
|
+
@jit
|
|
76
|
+
def foo(x):
|
|
77
|
+
if x > 5:
|
|
78
|
+
return x - 1
|
|
79
|
+
else:
|
|
80
|
+
return
|
|
81
|
+
|
|
82
|
+
@jit
|
|
83
|
+
def bar(x):
|
|
84
|
+
out = foo(x)
|
|
85
|
+
if out is None:
|
|
86
|
+
return out
|
|
87
|
+
elif out < 8:
|
|
88
|
+
return out
|
|
89
|
+
else:
|
|
90
|
+
return x * bar(out)
|
|
91
|
+
|
|
92
|
+
return bar
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def make_growing_tuple_case(jit=lambda x: x):
|
|
96
|
+
# From issue #4387
|
|
97
|
+
@jit
|
|
98
|
+
def make_list(n):
|
|
99
|
+
if n <= 0:
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
return (n, make_list(n - 1))
|
|
103
|
+
|
|
104
|
+
return make_list
|