numba-cuda 0.21.1__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +4 -0
- _numba_cuda_redirector.py +89 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +6 -0
- numba_cuda/_version.py +11 -0
- numba_cuda/numba/cuda/__init__.py +70 -0
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
- numba_cuda/numba/cuda/api.py +577 -0
- numba_cuda/numba/cuda/api_util.py +76 -0
- numba_cuda/numba/cuda/args.py +72 -0
- numba_cuda/numba/cuda/bf16.py +397 -0
- numba_cuda/numba/cuda/cache_hints.py +287 -0
- numba_cuda/numba/cuda/cext/__init__.py +2 -0
- numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
- numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
- numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
- numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
- numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
- numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
- numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
- numba_cuda/numba/cuda/cext/_typeof.h +19 -0
- numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
- numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
- numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
- numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
- numba_cuda/numba/cuda/cg.py +67 -0
- numba_cuda/numba/cuda/cgutils.py +1294 -0
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +541 -0
- numba_cuda/numba/cuda/compiler.py +1396 -0
- numba_cuda/numba/cuda/core/analysis.py +758 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
- numba_cuda/numba/cuda/core/base.py +1332 -0
- numba_cuda/numba/cuda/core/boxing.py +1411 -0
- numba_cuda/numba/cuda/core/bytecode.py +728 -0
- numba_cuda/numba/cuda/core/byteflow.py +2346 -0
- numba_cuda/numba/cuda/core/caching.py +744 -0
- numba_cuda/numba/cuda/core/callconv.py +392 -0
- numba_cuda/numba/cuda/core/codegen.py +171 -0
- numba_cuda/numba/cuda/core/compiler.py +199 -0
- numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +650 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/controlflow.py +989 -0
- numba_cuda/numba/cuda/core/entrypoints.py +57 -0
- numba_cuda/numba/cuda/core/environment.py +66 -0
- numba_cuda/numba/cuda/core/errors.py +917 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/generators.py +387 -0
- numba_cuda/numba/cuda/core/imputils.py +509 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
- numba_cuda/numba/cuda/core/interpreter.py +3617 -0
- numba_cuda/numba/cuda/core/ir.py +1812 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
- numba_cuda/numba/cuda/core/optional.py +129 -0
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
- numba_cuda/numba/cuda/core/registry.py +46 -0
- numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
- numba_cuda/numba/cuda/core/sigutils.py +68 -0
- numba_cuda/numba/cuda/core/ssa.py +498 -0
- numba_cuda/numba/cuda/core/targetconfig.py +330 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +956 -0
- numba_cuda/numba/cuda/core/typed_passes.py +867 -0
- numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
- numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
- numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
- numba_cuda/numba/cuda/cpython/iterators.py +167 -0
- numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
- numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
- numba_cuda/numba/cuda/cpython/slicing.py +322 -0
- numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
- numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
- numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
- numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
- numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
- numba_cuda/numba/cuda/cuda_paths.py +691 -0
- numba_cuda/numba/cuda/cudadecl.py +556 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
- numba_cuda/numba/cuda/cudadrv/error.py +48 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
- numba_cuda/numba/cuda/cudaimpl.py +995 -0
- numba_cuda/numba/cuda/cudamath.py +149 -0
- numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
- numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
- numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
- numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
- numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
- numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
- numba_cuda/numba/cuda/datamodel/manager.py +11 -0
- numba_cuda/numba/cuda/datamodel/models.py +9 -0
- numba_cuda/numba/cuda/datamodel/packer.py +9 -0
- numba_cuda/numba/cuda/datamodel/registry.py +11 -0
- numba_cuda/numba/cuda/datamodel/testing.py +11 -0
- numba_cuda/numba/cuda/debuginfo.py +903 -0
- numba_cuda/numba/cuda/decorators.py +294 -0
- numba_cuda/numba/cuda/descriptor.py +35 -0
- numba_cuda/numba/cuda/device_init.py +158 -0
- numba_cuda/numba/cuda/deviceufunc.py +1021 -0
- numba_cuda/numba/cuda/dispatcher.py +2463 -0
- numba_cuda/numba/cuda/errors.py +72 -0
- numba_cuda/numba/cuda/extending.py +697 -0
- numba_cuda/numba/cuda/flags.py +178 -0
- numba_cuda/numba/cuda/fp16.py +357 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +24 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
- numba_cuda/numba/cuda/intrinsics.py +382 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +265 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3386 -0
- numba_cuda/numba/cuda/libdevicedecl.py +20 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
- numba_cuda/numba/cuda/locks.py +19 -0
- numba_cuda/numba/cuda/lowering.py +1951 -0
- numba_cuda/numba/cuda/mathimpl.py +374 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
- numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
- numba_cuda/numba/cuda/misc/appdirs.py +594 -0
- numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
- numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
- numba_cuda/numba/cuda/misc/dump_style.py +41 -0
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
- numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
- numba_cuda/numba/cuda/misc/literal.py +28 -0
- numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
- numba_cuda/numba/cuda/misc/special.py +94 -0
- numba_cuda/numba/cuda/models.py +56 -0
- numba_cuda/numba/cuda/np/arraymath.py +5130 -0
- numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
- numba_cuda/numba/cuda/np/extensions.py +11 -0
- numba_cuda/numba/cuda/np/linalg.py +3087 -0
- numba_cuda/numba/cuda/np/math/__init__.py +0 -0
- numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
- numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
- numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
- numba_cuda/numba/cuda/np/npdatetime.py +969 -0
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
- numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
- numba_cuda/numba/cuda/np/numpy_support.py +798 -0
- numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
- numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
- numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
- numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
- numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
- numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
- numba_cuda/numba/cuda/nvvmutils.py +254 -0
- numba_cuda/numba/cuda/printimpl.py +126 -0
- numba_cuda/numba/cuda/random.py +308 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
- numba_cuda/numba/cuda/serialize.py +267 -0
- numba_cuda/numba/cuda/simulator/__init__.py +63 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +179 -0
- numba_cuda/numba/cuda/simulator/bf16.py +4 -0
- numba_cuda/numba/cuda/simulator/compiler.py +38 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
- numba_cuda/numba/cuda/simulator/kernel.py +320 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
- numba_cuda/numba/cuda/simulator/reduction.py +19 -0
- numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
- numba_cuda/numba/cuda/simulator_init.py +18 -0
- numba_cuda/numba/cuda/stubs.py +635 -0
- numba_cuda/numba/cuda/target.py +505 -0
- numba_cuda/numba/cuda/testing.py +347 -0
- numba_cuda/numba/cuda/tests/__init__.py +62 -0
- numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
- numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
- numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
- numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
- numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
- numba_cuda/numba/cuda/tests/data/error.cu +12 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
- numba_cuda/numba/cuda/tests/support.py +900 -0
- numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
- numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
- numba_cuda/numba/cuda/typeconv/rules.py +63 -0
- numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
- numba_cuda/numba/cuda/types/__init__.py +233 -0
- numba_cuda/numba/cuda/types/__init__.pyi +167 -0
- numba_cuda/numba/cuda/types/abstract.py +9 -0
- numba_cuda/numba/cuda/types/common.py +9 -0
- numba_cuda/numba/cuda/types/containers.py +9 -0
- numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
- numba_cuda/numba/cuda/types/cuda_common.py +110 -0
- numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
- numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
- numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
- numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
- numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
- numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
- numba_cuda/numba/cuda/types/ext_types.py +101 -0
- numba_cuda/numba/cuda/types/function_type.py +11 -0
- numba_cuda/numba/cuda/types/functions.py +9 -0
- numba_cuda/numba/cuda/types/iterators.py +9 -0
- numba_cuda/numba/cuda/types/misc.py +9 -0
- numba_cuda/numba/cuda/types/npytypes.py +9 -0
- numba_cuda/numba/cuda/types/scalars.py +9 -0
- numba_cuda/numba/cuda/typing/__init__.py +19 -0
- numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
- numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
- numba_cuda/numba/cuda/typing/bufproto.py +70 -0
- numba_cuda/numba/cuda/typing/builtins.py +1209 -0
- numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
- numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
- numba_cuda/numba/cuda/typing/collections.py +138 -0
- numba_cuda/numba/cuda/typing/context.py +782 -0
- numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
- numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/listdecl.py +147 -0
- numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
- numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
- numba_cuda/numba/cuda/typing/npydecl.py +749 -0
- numba_cuda/numba/cuda/typing/setdecl.py +115 -0
- numba_cuda/numba/cuda/typing/templates.py +1446 -0
- numba_cuda/numba/cuda/typing/typeof.py +301 -0
- numba_cuda/numba/cuda/ufuncs.py +746 -0
- numba_cuda/numba/cuda/utils.py +724 -0
- numba_cuda/numba/cuda/vector_types.py +214 -0
- numba_cuda/numba/cuda/vectorizers.py +260 -0
- numba_cuda-0.21.1.dist-info/METADATA +109 -0
- numba_cuda-0.21.1.dist-info/RECORD +488 -0
- numba_cuda-0.21.1.dist-info/WHEEL +5 -0
- numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
- numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from numba.cuda.cudadrv.driver import device_memset, driver
|
|
6
|
+
from numba import cuda
|
|
7
|
+
from numba.cuda.testing import unittest, CUDATestCase
|
|
8
|
+
from numba.cuda.testing import skip_on_cudasim, skip_on_arm
|
|
9
|
+
from numba.cuda.tests.support import linux_only
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@skip_on_cudasim("CUDA Driver API unsupported in the simulator")
|
|
13
|
+
@linux_only
|
|
14
|
+
@skip_on_arm("Managed Alloc support is experimental/untested on ARM")
|
|
15
|
+
class TestManagedAlloc(CUDATestCase):
|
|
16
|
+
def tearDown(self):
|
|
17
|
+
super().tearDown()
|
|
18
|
+
cuda.current_context().reset()
|
|
19
|
+
|
|
20
|
+
def get_total_gpu_memory(self):
|
|
21
|
+
# We use a driver function to directly get the total GPU memory because
|
|
22
|
+
# an EMM plugin may report something different (or not implement
|
|
23
|
+
# get_memory_info at all).
|
|
24
|
+
free, total = driver.cuMemGetInfo()
|
|
25
|
+
return total
|
|
26
|
+
|
|
27
|
+
def skip_if_cc_major_lt(self, min_required, reason):
|
|
28
|
+
"""
|
|
29
|
+
Skip the current test if the compute capability of the device is
|
|
30
|
+
less than `min_required`.
|
|
31
|
+
"""
|
|
32
|
+
ctx = cuda.current_context()
|
|
33
|
+
cc_major = ctx.device.compute_capability[0]
|
|
34
|
+
if cc_major < min_required:
|
|
35
|
+
self.skipTest(reason)
|
|
36
|
+
|
|
37
|
+
# CUDA Unified Memory comes in two flavors. For GPUs in the Kepler and
|
|
38
|
+
# Maxwell generations, managed memory allocations work as opaque,
|
|
39
|
+
# contiguous segments that can either be on the device or the host. For
|
|
40
|
+
# GPUs in the Pascal or later generations, managed memory operates on a
|
|
41
|
+
# per-page basis, so we can have arrays larger than GPU memory, where only
|
|
42
|
+
# part of them is resident on the device at one time. To ensure that this
|
|
43
|
+
# test works correctly on all supported GPUs, we'll select the size of our
|
|
44
|
+
# memory such that we only oversubscribe the GPU memory if we're on a
|
|
45
|
+
# Pascal or newer GPU (compute capability at least 6.0).
|
|
46
|
+
|
|
47
|
+
def test_managed_alloc_driver_undersubscribe(self):
|
|
48
|
+
msg = "Managed memory unsupported prior to CC 3.0"
|
|
49
|
+
self.skip_if_cc_major_lt(3, msg)
|
|
50
|
+
# We keep the allocation small so that it doesn't hang on GPUs
|
|
51
|
+
# with large memory (H100)
|
|
52
|
+
self._test_managed_alloc_driver(0.1)
|
|
53
|
+
|
|
54
|
+
# This test is skipped by default because it is easy to hang the machine
|
|
55
|
+
# for a very long time or get OOM killed if the GPU memory size is >50% of
|
|
56
|
+
# the system memory size. Even if the system does have more than 2x the RAM
|
|
57
|
+
# of the GPU, this test runs for a very long time (in comparison to the
|
|
58
|
+
# rest of the tests in the suite).
|
|
59
|
+
#
|
|
60
|
+
# However, it is left in here for manual testing as required.
|
|
61
|
+
|
|
62
|
+
@unittest.skip
|
|
63
|
+
def test_managed_alloc_driver_oversubscribe(self):
|
|
64
|
+
msg = "Oversubscription of managed memory unsupported prior to CC 6.0"
|
|
65
|
+
self.skip_if_cc_major_lt(6, msg)
|
|
66
|
+
self._test_managed_alloc_driver(2.0)
|
|
67
|
+
|
|
68
|
+
def test_managed_alloc_driver_host_attach(self):
|
|
69
|
+
msg = "Host attached managed memory is not accessible prior to CC 6.0"
|
|
70
|
+
self.skip_if_cc_major_lt(6, msg)
|
|
71
|
+
# Only test with a small array (0.01 * memory size) to keep the test
|
|
72
|
+
# quick.
|
|
73
|
+
self._test_managed_alloc_driver(0.01, attach_global=False)
|
|
74
|
+
|
|
75
|
+
def _test_managed_alloc_driver(self, memory_factor, attach_global=True):
|
|
76
|
+
# Verify that we can allocate and operate on managed
|
|
77
|
+
# memory through the CUDA driver interface.
|
|
78
|
+
|
|
79
|
+
total_mem_size = self.get_total_gpu_memory()
|
|
80
|
+
n_bytes = int(memory_factor * total_mem_size)
|
|
81
|
+
|
|
82
|
+
ctx = cuda.current_context()
|
|
83
|
+
mem = ctx.memallocmanaged(n_bytes, attach_global=attach_global)
|
|
84
|
+
|
|
85
|
+
dtype = np.dtype(np.uint8)
|
|
86
|
+
n_elems = n_bytes // dtype.itemsize
|
|
87
|
+
ary = np.ndarray(shape=n_elems, dtype=dtype, buffer=mem)
|
|
88
|
+
|
|
89
|
+
magic = 0xAB
|
|
90
|
+
device_memset(mem, magic, n_bytes)
|
|
91
|
+
ctx.synchronize()
|
|
92
|
+
|
|
93
|
+
# Note that this assertion operates on the CPU, so this
|
|
94
|
+
# test effectively drives both the CPU and the GPU on
|
|
95
|
+
# managed memory.
|
|
96
|
+
|
|
97
|
+
self.assertTrue(np.all(ary == magic))
|
|
98
|
+
|
|
99
|
+
def _test_managed_array(self, attach_global=True):
|
|
100
|
+
# Check the managed_array interface on both host and device.
|
|
101
|
+
|
|
102
|
+
ary = cuda.managed_array(100, dtype=np.double)
|
|
103
|
+
ary.fill(123.456)
|
|
104
|
+
self.assertTrue(all(ary == 123.456))
|
|
105
|
+
|
|
106
|
+
@cuda.jit("void(double[:])")
|
|
107
|
+
def kernel(x):
|
|
108
|
+
i = cuda.grid(1)
|
|
109
|
+
if i < x.shape[0]:
|
|
110
|
+
x[i] = 1.0
|
|
111
|
+
|
|
112
|
+
kernel[10, 10](ary)
|
|
113
|
+
cuda.current_context().synchronize()
|
|
114
|
+
|
|
115
|
+
self.assertTrue(all(ary == 1.0))
|
|
116
|
+
|
|
117
|
+
def test_managed_array_attach_global(self):
|
|
118
|
+
self._test_managed_array()
|
|
119
|
+
|
|
120
|
+
def test_managed_array_attach_host(self):
|
|
121
|
+
self._test_managed_array()
|
|
122
|
+
msg = "Host attached managed memory is not accessible prior to CC 6.0"
|
|
123
|
+
self.skip_if_cc_major_lt(6, msg)
|
|
124
|
+
self._test_managed_array(attach_global=False)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
if __name__ == "__main__":
|
|
128
|
+
unittest.main()
|
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import unittest
|
|
5
|
+
import threading
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
from numba import cuda
|
|
10
|
+
from numba.cuda import config
|
|
11
|
+
from numba.cuda.cudadrv.linkable_code import CUSource
|
|
12
|
+
from numba.cuda.testing import (
|
|
13
|
+
CUDATestCase,
|
|
14
|
+
skip_on_cudasim,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
if not config.ENABLE_CUDASIM:
|
|
18
|
+
from cuda.bindings.driver import cuModuleGetGlobal, cuMemcpyHtoD
|
|
19
|
+
|
|
20
|
+
from cuda.bindings.driver import CUmodule as cu_module_type
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def wipe_all_modules_in_context():
|
|
24
|
+
"""Cleans all modules reference held by current context.
|
|
25
|
+
This simulates the behavior on interpreter shutdown.
|
|
26
|
+
|
|
27
|
+
TODO: This is a temp solution until
|
|
28
|
+
https://github.com/NVIDIA/numba-cuda/issues/171 is implemented.
|
|
29
|
+
"""
|
|
30
|
+
ctx = cuda.current_context()
|
|
31
|
+
ctx.reset()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_hashable_handle_value(handle):
|
|
35
|
+
return handle
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@skip_on_cudasim("Module loading not implemented in the simulator")
|
|
39
|
+
class TestModuleCallbacksBasic(CUDATestCase):
|
|
40
|
+
def test_basic(self):
|
|
41
|
+
counter = 0
|
|
42
|
+
|
|
43
|
+
def setup(handle):
|
|
44
|
+
self.assertTrue(isinstance(handle, cu_module_type))
|
|
45
|
+
nonlocal counter
|
|
46
|
+
counter += 1
|
|
47
|
+
|
|
48
|
+
def teardown(handle):
|
|
49
|
+
self.assertTrue(isinstance(handle, cu_module_type))
|
|
50
|
+
nonlocal counter
|
|
51
|
+
counter -= 1
|
|
52
|
+
|
|
53
|
+
lib = CUSource("", setup_callback=setup, teardown_callback=teardown)
|
|
54
|
+
|
|
55
|
+
@cuda.jit(link=[lib])
|
|
56
|
+
def kernel():
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
self.assertEqual(counter, 0)
|
|
60
|
+
kernel[1, 1]()
|
|
61
|
+
self.assertEqual(counter, 1)
|
|
62
|
+
kernel[1, 1]() # cached
|
|
63
|
+
self.assertEqual(counter, 1)
|
|
64
|
+
|
|
65
|
+
wipe_all_modules_in_context()
|
|
66
|
+
del kernel
|
|
67
|
+
self.assertEqual(counter, 0)
|
|
68
|
+
|
|
69
|
+
def test_different_argtypes(self):
|
|
70
|
+
counter = 0
|
|
71
|
+
setup_seen = set()
|
|
72
|
+
teardown_seen = set()
|
|
73
|
+
|
|
74
|
+
def setup(handle):
|
|
75
|
+
nonlocal counter, setup_seen
|
|
76
|
+
counter += 1
|
|
77
|
+
setup_seen.add(get_hashable_handle_value(handle))
|
|
78
|
+
|
|
79
|
+
def teardown(handle):
|
|
80
|
+
nonlocal counter
|
|
81
|
+
counter -= 1
|
|
82
|
+
teardown_seen.add(get_hashable_handle_value(handle))
|
|
83
|
+
|
|
84
|
+
lib = CUSource("", setup_callback=setup, teardown_callback=teardown)
|
|
85
|
+
|
|
86
|
+
@cuda.jit(link=[lib])
|
|
87
|
+
def kernel(arg):
|
|
88
|
+
pass
|
|
89
|
+
|
|
90
|
+
self.assertEqual(counter, 0)
|
|
91
|
+
kernel[1, 1](42) # (int64)->() : module 1
|
|
92
|
+
self.assertEqual(counter, 1)
|
|
93
|
+
kernel[1, 1](100) # (int64)->() : module 1, cached
|
|
94
|
+
self.assertEqual(counter, 1)
|
|
95
|
+
kernel[1, 1](3.14) # (float64)->() : module 2
|
|
96
|
+
self.assertEqual(counter, 2)
|
|
97
|
+
|
|
98
|
+
wipe_all_modules_in_context()
|
|
99
|
+
del kernel
|
|
100
|
+
self.assertEqual(counter, 0)
|
|
101
|
+
|
|
102
|
+
self.assertEqual(len(setup_seen), 2)
|
|
103
|
+
self.assertEqual(len(teardown_seen), 2)
|
|
104
|
+
|
|
105
|
+
def test_two_kernels(self):
|
|
106
|
+
counter = 0
|
|
107
|
+
setup_seen = set()
|
|
108
|
+
teardown_seen = set()
|
|
109
|
+
|
|
110
|
+
def setup(handle):
|
|
111
|
+
nonlocal counter, setup_seen
|
|
112
|
+
counter += 1
|
|
113
|
+
setup_seen.add(get_hashable_handle_value(handle))
|
|
114
|
+
|
|
115
|
+
def teardown(handle):
|
|
116
|
+
nonlocal counter, teardown_seen
|
|
117
|
+
counter -= 1
|
|
118
|
+
teardown_seen.add(get_hashable_handle_value(handle))
|
|
119
|
+
|
|
120
|
+
lib = CUSource("", setup_callback=setup, teardown_callback=teardown)
|
|
121
|
+
|
|
122
|
+
@cuda.jit(link=[lib])
|
|
123
|
+
def kernel():
|
|
124
|
+
pass
|
|
125
|
+
|
|
126
|
+
@cuda.jit(link=[lib])
|
|
127
|
+
def kernel2():
|
|
128
|
+
pass
|
|
129
|
+
|
|
130
|
+
kernel[1, 1]()
|
|
131
|
+
self.assertEqual(counter, 1)
|
|
132
|
+
kernel2[1, 1]()
|
|
133
|
+
self.assertEqual(counter, 2)
|
|
134
|
+
|
|
135
|
+
wipe_all_modules_in_context()
|
|
136
|
+
del kernel
|
|
137
|
+
self.assertEqual(counter, 0)
|
|
138
|
+
|
|
139
|
+
self.assertEqual(len(setup_seen), 2)
|
|
140
|
+
self.assertEqual(len(teardown_seen), 2)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@skip_on_cudasim("Module loading not implemented in the simulator")
|
|
144
|
+
class TestModuleCallbacksAPICompleteness(CUDATestCase):
|
|
145
|
+
def test_api(self):
|
|
146
|
+
def setup(handle):
|
|
147
|
+
pass
|
|
148
|
+
|
|
149
|
+
def teardown(handle):
|
|
150
|
+
pass
|
|
151
|
+
|
|
152
|
+
api_combo = [
|
|
153
|
+
(setup, teardown),
|
|
154
|
+
(setup, None),
|
|
155
|
+
(None, teardown),
|
|
156
|
+
(None, None),
|
|
157
|
+
]
|
|
158
|
+
|
|
159
|
+
for setup, teardown in api_combo:
|
|
160
|
+
with self.subTest(setup=setup, teardown=teardown):
|
|
161
|
+
lib = CUSource(
|
|
162
|
+
"", setup_callback=setup, teardown_callback=teardown
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
@cuda.jit(link=[lib])
|
|
166
|
+
def kernel():
|
|
167
|
+
pass
|
|
168
|
+
|
|
169
|
+
kernel[1, 1]()
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
@skip_on_cudasim("Module loading not implemented in the simulator")
|
|
173
|
+
class TestModuleCallbacks(CUDATestCase):
|
|
174
|
+
def setUp(self):
|
|
175
|
+
super().setUp()
|
|
176
|
+
|
|
177
|
+
module = """
|
|
178
|
+
__device__ int num = 0;
|
|
179
|
+
extern "C"
|
|
180
|
+
__device__ int get_num(int &retval) {
|
|
181
|
+
retval = num;
|
|
182
|
+
return 0;
|
|
183
|
+
}
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
def set_forty_two(handle):
|
|
187
|
+
# Initialize 42 to global variable `num`
|
|
188
|
+
res, dptr, size = cuModuleGetGlobal(
|
|
189
|
+
get_hashable_handle_value(handle), "num".encode()
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
arr = np.array([42], np.int32)
|
|
193
|
+
cuMemcpyHtoD(dptr, arr.ctypes.data, size)
|
|
194
|
+
|
|
195
|
+
self.lib = CUSource(
|
|
196
|
+
module, setup_callback=set_forty_two, teardown_callback=None
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
def test_decldevice_arg(self):
|
|
200
|
+
get_num = cuda.declare_device("get_num", "int32()", link=[self.lib])
|
|
201
|
+
|
|
202
|
+
@cuda.jit
|
|
203
|
+
def kernel(arr):
|
|
204
|
+
arr[0] = get_num()
|
|
205
|
+
|
|
206
|
+
arr = np.zeros(1, np.int32)
|
|
207
|
+
kernel[1, 1](arr)
|
|
208
|
+
self.assertEqual(arr[0], 42)
|
|
209
|
+
|
|
210
|
+
def test_jitarg(self):
|
|
211
|
+
get_num = cuda.declare_device("get_num", "int32()")
|
|
212
|
+
|
|
213
|
+
@cuda.jit(link=[self.lib])
|
|
214
|
+
def kernel(arr):
|
|
215
|
+
arr[0] = get_num()
|
|
216
|
+
|
|
217
|
+
arr = np.zeros(1, np.int32)
|
|
218
|
+
kernel[1, 1](arr)
|
|
219
|
+
self.assertEqual(arr[0], 42)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
@skip_on_cudasim("Module loading not implemented in the simulator")
|
|
223
|
+
class TestMultithreadedCallbacks(CUDATestCase):
|
|
224
|
+
def test_concurrent_initialization(self):
|
|
225
|
+
seen_mods = set()
|
|
226
|
+
max_seen_mods = 0
|
|
227
|
+
|
|
228
|
+
def setup(mod):
|
|
229
|
+
nonlocal seen_mods, max_seen_mods
|
|
230
|
+
seen_mods.add(get_hashable_handle_value(mod))
|
|
231
|
+
max_seen_mods = max(max_seen_mods, len(seen_mods))
|
|
232
|
+
|
|
233
|
+
def teardown(mod):
|
|
234
|
+
nonlocal seen_mods
|
|
235
|
+
# Raises an error if the module is not found in the seen_mods
|
|
236
|
+
seen_mods.remove(get_hashable_handle_value(mod))
|
|
237
|
+
|
|
238
|
+
lib = CUSource("", setup_callback=setup, teardown_callback=teardown)
|
|
239
|
+
|
|
240
|
+
@cuda.jit(link=[lib])
|
|
241
|
+
def kernel():
|
|
242
|
+
pass
|
|
243
|
+
|
|
244
|
+
def concurrent_compilation_launch(kernel):
|
|
245
|
+
kernel[1, 1]()
|
|
246
|
+
|
|
247
|
+
threads = [
|
|
248
|
+
threading.Thread(
|
|
249
|
+
target=concurrent_compilation_launch, args=(kernel,)
|
|
250
|
+
)
|
|
251
|
+
for _ in range(4)
|
|
252
|
+
]
|
|
253
|
+
for t in threads:
|
|
254
|
+
t.start()
|
|
255
|
+
for t in threads:
|
|
256
|
+
t.join()
|
|
257
|
+
|
|
258
|
+
wipe_all_modules_in_context()
|
|
259
|
+
self.assertEqual(len(seen_mods), 0)
|
|
260
|
+
self.assertEqual(max_seen_mods, 1) # one moduled shared across threads
|
|
261
|
+
|
|
262
|
+
def test_concurrent_initialization_different_args(self):
|
|
263
|
+
seen_mods = set()
|
|
264
|
+
max_seen_mods = 0
|
|
265
|
+
|
|
266
|
+
def setup(mod):
|
|
267
|
+
nonlocal seen_mods, max_seen_mods
|
|
268
|
+
seen_mods.add(get_hashable_handle_value(mod))
|
|
269
|
+
max_seen_mods = max(max_seen_mods, len(seen_mods))
|
|
270
|
+
|
|
271
|
+
def teardown(mod):
|
|
272
|
+
nonlocal seen_mods
|
|
273
|
+
seen_mods.remove(get_hashable_handle_value(mod))
|
|
274
|
+
|
|
275
|
+
lib = CUSource("", setup_callback=setup, teardown_callback=teardown)
|
|
276
|
+
|
|
277
|
+
@cuda.jit(link=[lib])
|
|
278
|
+
def kernel(a):
|
|
279
|
+
pass
|
|
280
|
+
|
|
281
|
+
def concurrent_compilation_launch():
|
|
282
|
+
kernel[1, 1](42) # (int64)->() : module 1
|
|
283
|
+
kernel[1, 1](9) # (int64)->() : module 1 from cache
|
|
284
|
+
kernel[1, 1](3.14) # (float64)->() : module 2
|
|
285
|
+
|
|
286
|
+
threads = [
|
|
287
|
+
threading.Thread(target=concurrent_compilation_launch)
|
|
288
|
+
for _ in range(4)
|
|
289
|
+
]
|
|
290
|
+
for t in threads:
|
|
291
|
+
t.start()
|
|
292
|
+
for t in threads:
|
|
293
|
+
t.join()
|
|
294
|
+
|
|
295
|
+
wipe_all_modules_in_context()
|
|
296
|
+
assert len(seen_mods) == 0
|
|
297
|
+
self.assertEqual(max_seen_mods, 2) # two modules shared across threads
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
if __name__ == "__main__":
|
|
301
|
+
unittest.main()
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
from numba.cuda.testing import unittest
|
|
6
|
+
from numba.cuda.testing import skip_on_cudasim
|
|
7
|
+
from numba.cuda.testing import CUDATestCase
|
|
8
|
+
from numba.cuda import get_current_device
|
|
9
|
+
from numba.cuda.cudadrv.driver import _Linker, _have_nvjitlink
|
|
10
|
+
|
|
11
|
+
from numba import cuda
|
|
12
|
+
from numba.cuda import config
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
import io
|
|
16
|
+
import contextlib
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR")
|
|
20
|
+
if TEST_BIN_DIR:
|
|
21
|
+
test_device_functions_a = os.path.join(
|
|
22
|
+
TEST_BIN_DIR, "test_device_functions.a"
|
|
23
|
+
)
|
|
24
|
+
test_device_functions_cubin = os.path.join(
|
|
25
|
+
TEST_BIN_DIR, "test_device_functions.cubin"
|
|
26
|
+
)
|
|
27
|
+
test_device_functions_cu = os.path.join(
|
|
28
|
+
TEST_BIN_DIR, "test_device_functions.cu"
|
|
29
|
+
)
|
|
30
|
+
test_device_functions_fatbin = os.path.join(
|
|
31
|
+
TEST_BIN_DIR, "test_device_functions.fatbin"
|
|
32
|
+
)
|
|
33
|
+
test_device_functions_fatbin_multi = os.path.join(
|
|
34
|
+
TEST_BIN_DIR, "test_device_functions_multi.fatbin"
|
|
35
|
+
)
|
|
36
|
+
test_device_functions_o = os.path.join(
|
|
37
|
+
TEST_BIN_DIR, "test_device_functions.o"
|
|
38
|
+
)
|
|
39
|
+
test_device_functions_ptx = os.path.join(
|
|
40
|
+
TEST_BIN_DIR, "test_device_functions.ptx"
|
|
41
|
+
)
|
|
42
|
+
test_device_functions_ltoir = os.path.join(
|
|
43
|
+
TEST_BIN_DIR, "test_device_functions.ltoir"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@unittest.skipIf(
|
|
48
|
+
not TEST_BIN_DIR or not _have_nvjitlink(),
|
|
49
|
+
"nvJitLink not installed or new enough (>12.3)",
|
|
50
|
+
)
|
|
51
|
+
@skip_on_cudasim("Linking unsupported in the simulator")
|
|
52
|
+
class TestLinker(CUDATestCase):
|
|
53
|
+
def test_nvjitlink_add_file_guess_ext_linkable_code(self):
|
|
54
|
+
files = (
|
|
55
|
+
test_device_functions_a,
|
|
56
|
+
test_device_functions_cubin,
|
|
57
|
+
test_device_functions_cu,
|
|
58
|
+
test_device_functions_fatbin,
|
|
59
|
+
test_device_functions_o,
|
|
60
|
+
test_device_functions_ptx,
|
|
61
|
+
)
|
|
62
|
+
for file in files:
|
|
63
|
+
with self.subTest(file=file):
|
|
64
|
+
linker = _Linker(cc=get_current_device().compute_capability)
|
|
65
|
+
linker.add_file_guess_ext(file)
|
|
66
|
+
|
|
67
|
+
def test_nvjitlink_test_add_file_guess_ext_invalid_input(self):
|
|
68
|
+
with open(test_device_functions_cubin, "rb") as f:
|
|
69
|
+
content = f.read()
|
|
70
|
+
|
|
71
|
+
linker = _Linker(cc=get_current_device().compute_capability)
|
|
72
|
+
with self.assertRaisesRegex(
|
|
73
|
+
TypeError, "Expected path to file or a LinkableCode"
|
|
74
|
+
):
|
|
75
|
+
# Feeding raw data as bytes to add_file_guess_ext should raise,
|
|
76
|
+
# because there's no way to know what kind of file to treat it as
|
|
77
|
+
linker.add_file_guess_ext(content)
|
|
78
|
+
|
|
79
|
+
def test_nvjitlink_jit_with_linkable_code(self):
|
|
80
|
+
files = (
|
|
81
|
+
test_device_functions_a,
|
|
82
|
+
test_device_functions_cubin,
|
|
83
|
+
test_device_functions_cu,
|
|
84
|
+
test_device_functions_fatbin,
|
|
85
|
+
test_device_functions_o,
|
|
86
|
+
test_device_functions_ptx,
|
|
87
|
+
)
|
|
88
|
+
for lto in [True, False]:
|
|
89
|
+
for file in files:
|
|
90
|
+
with self.subTest(file=file):
|
|
91
|
+
sig = "uint32(uint32, uint32)"
|
|
92
|
+
add_from_numba = cuda.declare_device("add_from_numba", sig)
|
|
93
|
+
|
|
94
|
+
@cuda.jit(link=[file], lto=lto)
|
|
95
|
+
def kernel(result):
|
|
96
|
+
result[0] = add_from_numba(1, 2)
|
|
97
|
+
|
|
98
|
+
result = cuda.device_array(1)
|
|
99
|
+
kernel[1, 1](result)
|
|
100
|
+
assert result[0] == 3
|
|
101
|
+
|
|
102
|
+
def test_nvjitlink_jit_with_linkable_code_lto_dump_assembly(self):
|
|
103
|
+
files = [
|
|
104
|
+
test_device_functions_cu,
|
|
105
|
+
test_device_functions_ltoir,
|
|
106
|
+
test_device_functions_fatbin_multi,
|
|
107
|
+
]
|
|
108
|
+
|
|
109
|
+
config.DUMP_ASSEMBLY = True
|
|
110
|
+
|
|
111
|
+
for file in files:
|
|
112
|
+
with self.subTest(file=file):
|
|
113
|
+
f = io.StringIO()
|
|
114
|
+
with contextlib.redirect_stdout(f):
|
|
115
|
+
sig = "uint32(uint32, uint32)"
|
|
116
|
+
add_from_numba = cuda.declare_device("add_from_numba", sig)
|
|
117
|
+
|
|
118
|
+
@cuda.jit(link=[file], lto=True)
|
|
119
|
+
def kernel(result):
|
|
120
|
+
result[0] = add_from_numba(1, 2)
|
|
121
|
+
|
|
122
|
+
result = cuda.device_array(1)
|
|
123
|
+
kernel[1, 1](result)
|
|
124
|
+
assert result[0] == 3
|
|
125
|
+
|
|
126
|
+
self.assertTrue("ASSEMBLY (AFTER LTO)" in f.getvalue())
|
|
127
|
+
|
|
128
|
+
config.DUMP_ASSEMBLY = False
|
|
129
|
+
|
|
130
|
+
def test_nvjitlink_jit_with_linkable_code_lto_dump_assembly_warn(self):
|
|
131
|
+
files = [
|
|
132
|
+
test_device_functions_a,
|
|
133
|
+
test_device_functions_cubin,
|
|
134
|
+
test_device_functions_fatbin,
|
|
135
|
+
test_device_functions_o,
|
|
136
|
+
test_device_functions_ptx,
|
|
137
|
+
]
|
|
138
|
+
|
|
139
|
+
config.DUMP_ASSEMBLY = True
|
|
140
|
+
|
|
141
|
+
for file in files:
|
|
142
|
+
with self.subTest(file=file):
|
|
143
|
+
sig = "uint32(uint32, uint32)"
|
|
144
|
+
add_from_numba = cuda.declare_device("add_from_numba", sig)
|
|
145
|
+
|
|
146
|
+
@cuda.jit(link=[file], lto=True)
|
|
147
|
+
def kernel(result):
|
|
148
|
+
result[0] = add_from_numba(1, 2)
|
|
149
|
+
|
|
150
|
+
result = cuda.device_array(1)
|
|
151
|
+
func = kernel[1, 1]
|
|
152
|
+
with pytest.warns(
|
|
153
|
+
UserWarning,
|
|
154
|
+
match="it is not optimizable at link time, and `ignore_nonlto == True`",
|
|
155
|
+
):
|
|
156
|
+
func(result)
|
|
157
|
+
assert result[0] == 3
|
|
158
|
+
|
|
159
|
+
config.DUMP_ASSEMBLY = False
|
|
160
|
+
|
|
161
|
+
def test_nvjitlink_jit_with_invalid_linkable_code(self):
|
|
162
|
+
with open(test_device_functions_cubin, "rb") as f:
|
|
163
|
+
content = f.read()
|
|
164
|
+
with self.assertRaisesRegex(
|
|
165
|
+
TypeError, "Expected path to file or a LinkableCode"
|
|
166
|
+
):
|
|
167
|
+
|
|
168
|
+
@cuda.jit("void()", link=[content])
|
|
169
|
+
def kernel():
|
|
170
|
+
pass
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
if __name__ == "__main__":
|
|
174
|
+
unittest.main()
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from numba.cuda.cudadrv import nvrtc
|
|
5
|
+
from numba.cuda.testing import skip_on_cudasim
|
|
6
|
+
|
|
7
|
+
import unittest
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@skip_on_cudasim("NVVM Driver unsupported in the simulator")
|
|
11
|
+
class TestArchOption(unittest.TestCase):
|
|
12
|
+
def test_get_arch_option(self):
|
|
13
|
+
# Test returning the nearest lowest arch.
|
|
14
|
+
self.assertEqual(nvrtc.get_arch_option(7, 5), "compute_75")
|
|
15
|
+
self.assertEqual(nvrtc.get_arch_option(7, 7), "compute_75")
|
|
16
|
+
self.assertEqual(nvrtc.get_arch_option(8, 5), "compute_80")
|
|
17
|
+
self.assertEqual(nvrtc.get_arch_option(9, 1), "compute_90")
|
|
18
|
+
# Test known arch.
|
|
19
|
+
supported_ccs = nvrtc.get_supported_ccs()
|
|
20
|
+
for cc in supported_ccs:
|
|
21
|
+
self.assertEqual(nvrtc.get_arch_option(*cc), "compute_%d%d" % cc)
|
|
22
|
+
self.assertEqual(
|
|
23
|
+
nvrtc.get_arch_option(1000, 0), "compute_%d%d" % supported_ccs[-1]
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
if __name__ == "__main__":
|
|
28
|
+
unittest.main()
|