numba-cuda 0.22.0__cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- _numba_cuda_redirector.pth +4 -0
- _numba_cuda_redirector.py +89 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +6 -0
- numba_cuda/_version.py +11 -0
- numba_cuda/numba/cuda/__init__.py +70 -0
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
- numba_cuda/numba/cuda/api.py +580 -0
- numba_cuda/numba/cuda/api_util.py +76 -0
- numba_cuda/numba/cuda/args.py +72 -0
- numba_cuda/numba/cuda/bf16.py +397 -0
- numba_cuda/numba/cuda/cache_hints.py +287 -0
- numba_cuda/numba/cuda/cext/__init__.py +2 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
- numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
- numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
- numba_cuda/numba/cuda/cext/_helperlib.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
- numba_cuda/numba/cuda/cext/_typeof.h +19 -0
- numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
- numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
- numba_cuda/numba/cuda/cext/mviewbuf.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
- numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
- numba_cuda/numba/cuda/cg.py +67 -0
- numba_cuda/numba/cuda/cgutils.py +1294 -0
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +541 -0
- numba_cuda/numba/cuda/compiler.py +1396 -0
- numba_cuda/numba/cuda/core/analysis.py +758 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
- numba_cuda/numba/cuda/core/base.py +1332 -0
- numba_cuda/numba/cuda/core/boxing.py +1411 -0
- numba_cuda/numba/cuda/core/bytecode.py +728 -0
- numba_cuda/numba/cuda/core/byteflow.py +2346 -0
- numba_cuda/numba/cuda/core/caching.py +744 -0
- numba_cuda/numba/cuda/core/callconv.py +392 -0
- numba_cuda/numba/cuda/core/codegen.py +171 -0
- numba_cuda/numba/cuda/core/compiler.py +199 -0
- numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +650 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/controlflow.py +989 -0
- numba_cuda/numba/cuda/core/entrypoints.py +57 -0
- numba_cuda/numba/cuda/core/environment.py +66 -0
- numba_cuda/numba/cuda/core/errors.py +917 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/generators.py +387 -0
- numba_cuda/numba/cuda/core/imputils.py +509 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
- numba_cuda/numba/cuda/core/interpreter.py +3617 -0
- numba_cuda/numba/cuda/core/ir.py +1812 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
- numba_cuda/numba/cuda/core/optional.py +129 -0
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
- numba_cuda/numba/cuda/core/registry.py +46 -0
- numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
- numba_cuda/numba/cuda/core/sigutils.py +68 -0
- numba_cuda/numba/cuda/core/ssa.py +498 -0
- numba_cuda/numba/cuda/core/targetconfig.py +330 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +956 -0
- numba_cuda/numba/cuda/core/typed_passes.py +867 -0
- numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
- numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
- numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
- numba_cuda/numba/cuda/cpython/iterators.py +167 -0
- numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
- numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
- numba_cuda/numba/cuda/cpython/slicing.py +322 -0
- numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
- numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
- numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
- numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
- numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
- numba_cuda/numba/cuda/cuda_paths.py +691 -0
- numba_cuda/numba/cuda/cudadecl.py +543 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
- numba_cuda/numba/cuda/cudadrv/error.py +48 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
- numba_cuda/numba/cuda/cudaimpl.py +983 -0
- numba_cuda/numba/cuda/cudamath.py +149 -0
- numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
- numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
- numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
- numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
- numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
- numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
- numba_cuda/numba/cuda/datamodel/manager.py +11 -0
- numba_cuda/numba/cuda/datamodel/models.py +9 -0
- numba_cuda/numba/cuda/datamodel/packer.py +9 -0
- numba_cuda/numba/cuda/datamodel/registry.py +11 -0
- numba_cuda/numba/cuda/datamodel/testing.py +11 -0
- numba_cuda/numba/cuda/debuginfo.py +997 -0
- numba_cuda/numba/cuda/decorators.py +294 -0
- numba_cuda/numba/cuda/descriptor.py +35 -0
- numba_cuda/numba/cuda/device_init.py +155 -0
- numba_cuda/numba/cuda/deviceufunc.py +1021 -0
- numba_cuda/numba/cuda/dispatcher.py +2463 -0
- numba_cuda/numba/cuda/errors.py +72 -0
- numba_cuda/numba/cuda/extending.py +697 -0
- numba_cuda/numba/cuda/flags.py +178 -0
- numba_cuda/numba/cuda/fp16.py +357 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +24 -0
- numba_cuda/numba/cuda/intrinsics.py +531 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +265 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3386 -0
- numba_cuda/numba/cuda/libdevicedecl.py +20 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
- numba_cuda/numba/cuda/locks.py +19 -0
- numba_cuda/numba/cuda/lowering.py +1980 -0
- numba_cuda/numba/cuda/mathimpl.py +374 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
- numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
- numba_cuda/numba/cuda/misc/appdirs.py +594 -0
- numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
- numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
- numba_cuda/numba/cuda/misc/dump_style.py +41 -0
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
- numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
- numba_cuda/numba/cuda/misc/literal.py +28 -0
- numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
- numba_cuda/numba/cuda/misc/special.py +94 -0
- numba_cuda/numba/cuda/models.py +56 -0
- numba_cuda/numba/cuda/np/arraymath.py +5130 -0
- numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
- numba_cuda/numba/cuda/np/extensions.py +11 -0
- numba_cuda/numba/cuda/np/linalg.py +3087 -0
- numba_cuda/numba/cuda/np/math/__init__.py +0 -0
- numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
- numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
- numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
- numba_cuda/numba/cuda/np/npdatetime.py +969 -0
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
- numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
- numba_cuda/numba/cuda/np/numpy_support.py +798 -0
- numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
- numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
- numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
- numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
- numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
- numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
- numba_cuda/numba/cuda/nvvmutils.py +254 -0
- numba_cuda/numba/cuda/printimpl.py +126 -0
- numba_cuda/numba/cuda/random.py +308 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
- numba_cuda/numba/cuda/serialize.py +267 -0
- numba_cuda/numba/cuda/simulator/__init__.py +63 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +179 -0
- numba_cuda/numba/cuda/simulator/bf16.py +4 -0
- numba_cuda/numba/cuda/simulator/compiler.py +38 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
- numba_cuda/numba/cuda/simulator/kernel.py +320 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
- numba_cuda/numba/cuda/simulator/reduction.py +19 -0
- numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
- numba_cuda/numba/cuda/simulator_init.py +18 -0
- numba_cuda/numba/cuda/stubs.py +624 -0
- numba_cuda/numba/cuda/target.py +505 -0
- numba_cuda/numba/cuda/testing.py +347 -0
- numba_cuda/numba/cuda/tests/__init__.py +62 -0
- numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
- numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
- numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
- numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
- numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
- numba_cuda/numba/cuda/tests/data/error.cu +12 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
- numba_cuda/numba/cuda/tests/support.py +900 -0
- numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
- numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
- numba_cuda/numba/cuda/typeconv/rules.py +63 -0
- numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
- numba_cuda/numba/cuda/types/__init__.py +233 -0
- numba_cuda/numba/cuda/types/__init__.pyi +167 -0
- numba_cuda/numba/cuda/types/abstract.py +9 -0
- numba_cuda/numba/cuda/types/common.py +9 -0
- numba_cuda/numba/cuda/types/containers.py +9 -0
- numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
- numba_cuda/numba/cuda/types/cuda_common.py +110 -0
- numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
- numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
- numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
- numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
- numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
- numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
- numba_cuda/numba/cuda/types/ext_types.py +101 -0
- numba_cuda/numba/cuda/types/function_type.py +11 -0
- numba_cuda/numba/cuda/types/functions.py +9 -0
- numba_cuda/numba/cuda/types/iterators.py +9 -0
- numba_cuda/numba/cuda/types/misc.py +9 -0
- numba_cuda/numba/cuda/types/npytypes.py +9 -0
- numba_cuda/numba/cuda/types/scalars.py +9 -0
- numba_cuda/numba/cuda/typing/__init__.py +19 -0
- numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
- numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
- numba_cuda/numba/cuda/typing/bufproto.py +70 -0
- numba_cuda/numba/cuda/typing/builtins.py +1209 -0
- numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
- numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
- numba_cuda/numba/cuda/typing/collections.py +138 -0
- numba_cuda/numba/cuda/typing/context.py +782 -0
- numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
- numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/listdecl.py +147 -0
- numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
- numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
- numba_cuda/numba/cuda/typing/npydecl.py +749 -0
- numba_cuda/numba/cuda/typing/setdecl.py +115 -0
- numba_cuda/numba/cuda/typing/templates.py +1446 -0
- numba_cuda/numba/cuda/typing/typeof.py +301 -0
- numba_cuda/numba/cuda/ufuncs.py +746 -0
- numba_cuda/numba/cuda/utils.py +724 -0
- numba_cuda/numba/cuda/vector_types.py +214 -0
- numba_cuda/numba/cuda/vectorizers.py +260 -0
- numba_cuda-0.22.0.dist-info/METADATA +109 -0
- numba_cuda-0.22.0.dist-info/RECORD +487 -0
- numba_cuda-0.22.0.dist-info/WHEEL +6 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: BSD-2-Clause
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
#include <cuda/atomic>
|
|
7
|
+
|
|
8
|
+
typedef void (*NRT_dtor_function)(void* ptr, size_t size, void* info);
|
|
9
|
+
typedef void (*NRT_dealloc_func)(void* ptr, void* dealloc_info);
|
|
10
|
+
|
|
11
|
+
extern "C"
|
|
12
|
+
struct MemInfo {
|
|
13
|
+
cuda::atomic<size_t, cuda::thread_scope_device> refct;
|
|
14
|
+
NRT_dtor_function dtor;
|
|
15
|
+
void* dtor_info;
|
|
16
|
+
void* data;
|
|
17
|
+
size_t size;
|
|
18
|
+
};
|
|
19
|
+
typedef struct MemInfo NRT_MemInfo;
|
|
20
|
+
|
|
21
|
+
extern "C" __device__ void* NRT_Allocate(size_t size);
|
|
22
|
+
extern "C" __device__ void NRT_MemInfo_init(NRT_MemInfo* mi,
|
|
23
|
+
void* data,
|
|
24
|
+
size_t size,
|
|
25
|
+
NRT_dtor_function dtor,
|
|
26
|
+
void* dtor_info);
|
|
27
|
+
static __device__ void *nrt_allocate_meminfo_and_data_align(size_t size, unsigned align, NRT_MemInfo **mi);
|
|
28
|
+
static __device__ void *nrt_allocate_meminfo_and_data(size_t size, NRT_MemInfo **mi_out);
|
|
29
|
+
extern "C" __device__ void* NRT_Allocate_External(size_t size);
|
|
30
|
+
extern "C" __device__ void NRT_decref(NRT_MemInfo* mi);
|
|
31
|
+
extern "C" __device__ void NRT_incref(NRT_MemInfo* mi);
|
|
32
|
+
extern "C" __device__ void* NRT_Allocate_External(size_t size);
|
|
33
|
+
static __device__ void *nrt_allocate_meminfo_and_data(size_t size, NRT_MemInfo **mi_out);
|
|
34
|
+
static __device__ void *nrt_allocate_meminfo_and_data_align(size_t size, unsigned align, NRT_MemInfo **mi);
|
|
35
|
+
extern "C" __device__ NRT_MemInfo *NRT_MemInfo_alloc_aligned(size_t size, unsigned align);
|
|
36
|
+
extern "C" __device__ void* NRT_MemInfo_data_fast(NRT_MemInfo *mi);
|
|
37
|
+
extern "C" __device__ void NRT_MemInfo_call_dtor(NRT_MemInfo* mi);
|
|
38
|
+
extern "C" __device__ void NRT_MemInfo_destroy(NRT_MemInfo* mi);
|
|
39
|
+
extern "C" __device__ void NRT_dealloc(NRT_MemInfo* mi);
|
|
40
|
+
extern "C" __device__ void NRT_Free(void* ptr);
|
|
41
|
+
extern "C" __device__ NRT_MemInfo* NRT_MemInfo_new(void* data, size_t size, NRT_dtor_function dtor, void* dtor_info);
|
|
42
|
+
extern "C" __device__ void NRT_MemInfo_init(NRT_MemInfo* mi,
|
|
43
|
+
void* data,
|
|
44
|
+
size_t size,
|
|
45
|
+
NRT_dtor_function dtor,
|
|
46
|
+
void* dtor_info);
|
|
47
|
+
extern "C" __device__ NRT_MemInfo* NRT_MemInfo_new_varsize(size_t size);
|
|
48
|
+
extern "C" __device__ NRT_MemInfo* NRT_MemInfo_new_varsize_dtor(size_t size, NRT_dtor_function dtor);
|
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import ctypes
|
|
5
|
+
import os
|
|
6
|
+
from functools import wraps
|
|
7
|
+
import numpy as np
|
|
8
|
+
from collections import namedtuple
|
|
9
|
+
|
|
10
|
+
from numba import cuda
|
|
11
|
+
from numba.cuda import config, types
|
|
12
|
+
|
|
13
|
+
from numba.cuda.cudadrv.driver import (
|
|
14
|
+
_Linker,
|
|
15
|
+
driver,
|
|
16
|
+
launch_kernel,
|
|
17
|
+
_have_nvjitlink,
|
|
18
|
+
)
|
|
19
|
+
from numba.cuda.cudadrv import devices
|
|
20
|
+
from numba.cuda.api import get_current_device
|
|
21
|
+
from numba.cuda.utils import _readenv, cached_file_read
|
|
22
|
+
from numba.cuda.cudadrv.linkable_code import CUSource
|
|
23
|
+
from numba.cuda.typing.templates import signature
|
|
24
|
+
|
|
25
|
+
from numba.cuda.extending import intrinsic, overload_classmethod
|
|
26
|
+
|
|
27
|
+
_nrt_mstats = namedtuple("nrt_mstats", ["alloc", "free", "mi_alloc", "mi_free"])
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# Check environment variable or config for NRT statistics enablement
|
|
31
|
+
NRT_STATS = _readenv("NUMBA_CUDA_NRT_STATS", bool, False) or getattr(
|
|
32
|
+
config, "NUMBA_CUDA_NRT_STATS", False
|
|
33
|
+
)
|
|
34
|
+
if not hasattr(config, "NUMBA_CUDA_NRT_STATS"):
|
|
35
|
+
config.CUDA_NRT_STATS = NRT_STATS
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# Check environment variable or config for NRT enablement
|
|
39
|
+
ENABLE_NRT = _readenv("NUMBA_CUDA_ENABLE_NRT", bool, False) or getattr(
|
|
40
|
+
config, "NUMBA_CUDA_ENABLE_NRT", False
|
|
41
|
+
)
|
|
42
|
+
if not hasattr(config, "NUMBA_CUDA_ENABLE_NRT"):
|
|
43
|
+
config.CUDA_ENABLE_NRT = ENABLE_NRT
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def get_include():
|
|
47
|
+
"""Return the include path for the NRT header"""
|
|
48
|
+
return os.path.dirname(os.path.abspath(__file__))
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# Provide an implementation of Array._allocate() for the CUDA target (used
|
|
52
|
+
# internally by Numba when generating the allocation of an array)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@intrinsic
|
|
56
|
+
def intrin_alloc(typingctx, allocsize, align):
|
|
57
|
+
"""Intrinsic to call into the allocator for Array"""
|
|
58
|
+
|
|
59
|
+
def codegen(context, builder, signature, args):
|
|
60
|
+
allocsize, align = args
|
|
61
|
+
meminfo = context.nrt.meminfo_alloc_aligned(builder, allocsize, align)
|
|
62
|
+
return meminfo
|
|
63
|
+
|
|
64
|
+
mip = types.MemInfoPointer(types.voidptr) # return untyped pointer
|
|
65
|
+
sig = signature(mip, allocsize, align)
|
|
66
|
+
return sig, codegen
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@overload_classmethod(types.Array, "_allocate", target="CUDA")
|
|
70
|
+
def _ol_array_allocate(cls, allocsize, align):
|
|
71
|
+
"""Implements a Numba-only CUDA-target classmethod on the array type."""
|
|
72
|
+
|
|
73
|
+
def impl(cls, allocsize, align):
|
|
74
|
+
return intrin_alloc(allocsize, align)
|
|
75
|
+
|
|
76
|
+
return impl
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# Protect method to ensure NRT memory allocation and initialization
|
|
80
|
+
def _alloc_init_guard(method):
|
|
81
|
+
"""
|
|
82
|
+
Ensure NRT memory allocation and initialization before running the method
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
@wraps(method)
|
|
86
|
+
def wrapper(self, *args, **kwargs):
|
|
87
|
+
self.ensure_allocated()
|
|
88
|
+
self.ensure_initialized()
|
|
89
|
+
return method(self, *args, **kwargs)
|
|
90
|
+
|
|
91
|
+
return wrapper
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class _Runtime:
|
|
95
|
+
"""Singleton class for Numba CUDA runtime"""
|
|
96
|
+
|
|
97
|
+
_instance = None
|
|
98
|
+
|
|
99
|
+
def __new__(cls, *args, **kwargs):
|
|
100
|
+
if cls._instance is None:
|
|
101
|
+
cls._instance = super(_Runtime, cls).__new__(cls, *args, **kwargs)
|
|
102
|
+
return cls._instance
|
|
103
|
+
|
|
104
|
+
def __init__(self):
|
|
105
|
+
"""Initialize memsys module and variable"""
|
|
106
|
+
self._reset()
|
|
107
|
+
|
|
108
|
+
def _reset(self):
|
|
109
|
+
"""Reset to the uninitialized state"""
|
|
110
|
+
self._memsys_module = None
|
|
111
|
+
self._memsys = None
|
|
112
|
+
self._initialized = False
|
|
113
|
+
|
|
114
|
+
def close(self):
|
|
115
|
+
"""Close and reset"""
|
|
116
|
+
self._reset()
|
|
117
|
+
|
|
118
|
+
def _compile_memsys_module(self):
|
|
119
|
+
"""
|
|
120
|
+
Compile memsys.cu and create a module from it in the current context
|
|
121
|
+
"""
|
|
122
|
+
# Define the path for memsys.cu
|
|
123
|
+
memsys_mod = os.path.join(
|
|
124
|
+
os.path.dirname(os.path.abspath(__file__)), "memsys.cu"
|
|
125
|
+
)
|
|
126
|
+
cc = get_current_device().compute_capability
|
|
127
|
+
|
|
128
|
+
# Create a new linker instance and add the cu file
|
|
129
|
+
linker = _Linker.new(cc=cc, lto=_have_nvjitlink())
|
|
130
|
+
linker.add_cu_file(memsys_mod)
|
|
131
|
+
|
|
132
|
+
# Complete the linker and create a module from it
|
|
133
|
+
cubin = linker.complete()
|
|
134
|
+
ctx = devices.get_context()
|
|
135
|
+
module = ctx.create_module_image(cubin)
|
|
136
|
+
|
|
137
|
+
# Set the memsys module
|
|
138
|
+
self._memsys_module = module
|
|
139
|
+
|
|
140
|
+
def ensure_allocated(self, stream=None):
|
|
141
|
+
"""
|
|
142
|
+
If memsys is not allocated, allocate it; otherwise, perform a no-op
|
|
143
|
+
"""
|
|
144
|
+
if self._memsys is not None:
|
|
145
|
+
return
|
|
146
|
+
|
|
147
|
+
# Allocate the memsys
|
|
148
|
+
self.allocate(stream)
|
|
149
|
+
|
|
150
|
+
def allocate(self, stream=None):
|
|
151
|
+
"""
|
|
152
|
+
Allocate memsys on global memory
|
|
153
|
+
"""
|
|
154
|
+
from numba.cuda import device_array
|
|
155
|
+
|
|
156
|
+
# Check if memsys module is defined
|
|
157
|
+
if self._memsys_module is None:
|
|
158
|
+
# Compile the memsys module if not defined
|
|
159
|
+
self._compile_memsys_module()
|
|
160
|
+
|
|
161
|
+
# Allocate space for NRT_MemSys
|
|
162
|
+
memsys_size = ctypes.c_uint64()
|
|
163
|
+
ptr, nbytes = self._memsys_module.get_global_symbol("memsys_size")
|
|
164
|
+
device_memsys_size = ptr.device_ctypes_pointer
|
|
165
|
+
device_memsys_size = device_memsys_size.value
|
|
166
|
+
driver.cuMemcpyDtoH(
|
|
167
|
+
ctypes.addressof(memsys_size), device_memsys_size, nbytes
|
|
168
|
+
)
|
|
169
|
+
self._memsys = device_array(
|
|
170
|
+
(memsys_size.value,), dtype="i1", stream=stream
|
|
171
|
+
)
|
|
172
|
+
self.set_memsys_to_module(self._memsys_module, stream=stream)
|
|
173
|
+
|
|
174
|
+
def _single_thread_launch(self, module, stream, name, params=()):
|
|
175
|
+
"""
|
|
176
|
+
Launch the specified kernel with only 1 thread
|
|
177
|
+
"""
|
|
178
|
+
if stream is None:
|
|
179
|
+
stream = cuda.default_stream()
|
|
180
|
+
|
|
181
|
+
func = module.get_function(name)
|
|
182
|
+
launch_kernel(
|
|
183
|
+
func.handle,
|
|
184
|
+
1,
|
|
185
|
+
1,
|
|
186
|
+
1,
|
|
187
|
+
1,
|
|
188
|
+
1,
|
|
189
|
+
1,
|
|
190
|
+
0,
|
|
191
|
+
stream.handle.value,
|
|
192
|
+
params,
|
|
193
|
+
cooperative=False,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
def ensure_initialized(self, stream=None):
|
|
197
|
+
"""
|
|
198
|
+
If memsys is not initialized, initialize memsys
|
|
199
|
+
"""
|
|
200
|
+
if self._initialized:
|
|
201
|
+
return
|
|
202
|
+
|
|
203
|
+
# Initialize the memsys
|
|
204
|
+
self.initialize(stream)
|
|
205
|
+
|
|
206
|
+
def initialize(self, stream=None):
|
|
207
|
+
"""
|
|
208
|
+
Launch memsys initialization kernel
|
|
209
|
+
"""
|
|
210
|
+
self.ensure_allocated()
|
|
211
|
+
|
|
212
|
+
self._single_thread_launch(
|
|
213
|
+
self._memsys_module, stream, "NRT_MemSys_init"
|
|
214
|
+
)
|
|
215
|
+
self._initialized = True
|
|
216
|
+
|
|
217
|
+
if config.CUDA_NRT_STATS:
|
|
218
|
+
self.memsys_enable_stats()
|
|
219
|
+
|
|
220
|
+
@_alloc_init_guard
|
|
221
|
+
def memsys_enable_stats(self, stream=None):
|
|
222
|
+
"""
|
|
223
|
+
Enable memsys statistics
|
|
224
|
+
"""
|
|
225
|
+
self._single_thread_launch(
|
|
226
|
+
self._memsys_module, stream, "NRT_MemSys_enable_stats"
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
@_alloc_init_guard
|
|
230
|
+
def memsys_disable_stats(self, stream=None):
|
|
231
|
+
"""
|
|
232
|
+
Disable memsys statistics
|
|
233
|
+
"""
|
|
234
|
+
self._single_thread_launch(
|
|
235
|
+
self._memsys_module, stream, "NRT_MemSys_disable_stats"
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
@_alloc_init_guard
|
|
239
|
+
def memsys_stats_enabled(self, stream=None):
|
|
240
|
+
"""
|
|
241
|
+
Return a boolean indicating whether memsys is enabled. Synchronizes
|
|
242
|
+
context
|
|
243
|
+
"""
|
|
244
|
+
enabled_ar = cuda.managed_array(1, np.uint8)
|
|
245
|
+
enabled_ptr = enabled_ar.device_ctypes_pointer
|
|
246
|
+
|
|
247
|
+
self._single_thread_launch(
|
|
248
|
+
self._memsys_module,
|
|
249
|
+
stream,
|
|
250
|
+
"NRT_MemSys_stats_enabled",
|
|
251
|
+
(enabled_ptr,),
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
cuda.synchronize()
|
|
255
|
+
return bool(enabled_ar[0])
|
|
256
|
+
|
|
257
|
+
@_alloc_init_guard
|
|
258
|
+
def _copy_memsys_to_host(self, stream):
|
|
259
|
+
"""
|
|
260
|
+
Copy all statistics of memsys to the host
|
|
261
|
+
"""
|
|
262
|
+
dt = np.dtype(
|
|
263
|
+
[
|
|
264
|
+
("alloc", np.uint64),
|
|
265
|
+
("free", np.uint64),
|
|
266
|
+
("mi_alloc", np.uint64),
|
|
267
|
+
("mi_free", np.uint64),
|
|
268
|
+
]
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
stats_for_read = cuda.managed_array(1, dt)
|
|
272
|
+
stats_ptr = stats_for_read.device_ctypes_pointer
|
|
273
|
+
|
|
274
|
+
self._single_thread_launch(
|
|
275
|
+
self._memsys_module, stream, "NRT_MemSys_read", [stats_ptr]
|
|
276
|
+
)
|
|
277
|
+
cuda.synchronize()
|
|
278
|
+
|
|
279
|
+
return stats_for_read[0]
|
|
280
|
+
|
|
281
|
+
@_alloc_init_guard
|
|
282
|
+
def get_allocation_stats(self, stream=None):
|
|
283
|
+
"""
|
|
284
|
+
Get the allocation statistics
|
|
285
|
+
"""
|
|
286
|
+
enabled = self.memsys_stats_enabled(stream)
|
|
287
|
+
if not enabled:
|
|
288
|
+
raise RuntimeError("NRT stats are disabled.")
|
|
289
|
+
memsys = self._copy_memsys_to_host(stream)
|
|
290
|
+
return _nrt_mstats(
|
|
291
|
+
alloc=memsys["alloc"],
|
|
292
|
+
free=memsys["free"],
|
|
293
|
+
mi_alloc=memsys["mi_alloc"],
|
|
294
|
+
mi_free=memsys["mi_free"],
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
@_alloc_init_guard
|
|
298
|
+
def _get_single_stat(self, stat, stream=None):
|
|
299
|
+
"""
|
|
300
|
+
Get a single stat from the memsys
|
|
301
|
+
"""
|
|
302
|
+
got = cuda.managed_array(1, np.uint64)
|
|
303
|
+
got_ptr = got.device_ctypes_pointer
|
|
304
|
+
|
|
305
|
+
self._single_thread_launch(
|
|
306
|
+
self._memsys_module, stream, f"NRT_MemSys_read_{stat}", [got_ptr]
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
cuda.synchronize()
|
|
310
|
+
return got[0]
|
|
311
|
+
|
|
312
|
+
@_alloc_init_guard
|
|
313
|
+
def memsys_get_stats_alloc(self, stream=None):
|
|
314
|
+
"""
|
|
315
|
+
Get the allocation statistic
|
|
316
|
+
"""
|
|
317
|
+
enabled = self.memsys_stats_enabled(stream)
|
|
318
|
+
if not enabled:
|
|
319
|
+
raise RuntimeError("NRT stats are disabled.")
|
|
320
|
+
|
|
321
|
+
return self._get_single_stat("alloc")
|
|
322
|
+
|
|
323
|
+
@_alloc_init_guard
|
|
324
|
+
def memsys_get_stats_free(self, stream=None):
|
|
325
|
+
"""
|
|
326
|
+
Get the free statistic
|
|
327
|
+
"""
|
|
328
|
+
enabled = self.memsys_stats_enabled(stream)
|
|
329
|
+
if not enabled:
|
|
330
|
+
raise RuntimeError("NRT stats are disabled.")
|
|
331
|
+
|
|
332
|
+
return self._get_single_stat("free")
|
|
333
|
+
|
|
334
|
+
@_alloc_init_guard
|
|
335
|
+
def memsys_get_stats_mi_alloc(self, stream=None):
|
|
336
|
+
"""
|
|
337
|
+
Get the mi alloc statistic
|
|
338
|
+
"""
|
|
339
|
+
enabled = self.memsys_stats_enabled(stream)
|
|
340
|
+
if not enabled:
|
|
341
|
+
raise RuntimeError("NRT stats are disabled.")
|
|
342
|
+
|
|
343
|
+
return self._get_single_stat("mi_alloc")
|
|
344
|
+
|
|
345
|
+
@_alloc_init_guard
|
|
346
|
+
def memsys_get_stats_mi_free(self, stream=None):
|
|
347
|
+
"""
|
|
348
|
+
Get the mi free statistic
|
|
349
|
+
"""
|
|
350
|
+
enabled = self.memsys_stats_enabled(stream)
|
|
351
|
+
if not enabled:
|
|
352
|
+
raise RuntimeError("NRT stats are disabled.")
|
|
353
|
+
|
|
354
|
+
return self._get_single_stat("mi_free")
|
|
355
|
+
|
|
356
|
+
def set_memsys_to_module(self, module, stream=None):
|
|
357
|
+
"""
|
|
358
|
+
Set the memsys module. The module must contain `NRT_MemSys_set` kernel,
|
|
359
|
+
and declare a pointer to NRT_MemSys structure.
|
|
360
|
+
"""
|
|
361
|
+
if self._memsys is None:
|
|
362
|
+
raise RuntimeError(
|
|
363
|
+
"Please allocate NRT Memsys first before setting to module."
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
memsys_ptr = self._memsys.device_ctypes_pointer
|
|
367
|
+
|
|
368
|
+
self._single_thread_launch(
|
|
369
|
+
module, stream, "NRT_MemSys_set", [memsys_ptr]
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
@_alloc_init_guard
|
|
373
|
+
def print_memsys(self, stream=None):
|
|
374
|
+
"""
|
|
375
|
+
Print the current statistics of memsys, for debugging purposes
|
|
376
|
+
"""
|
|
377
|
+
cuda.synchronize()
|
|
378
|
+
self._single_thread_launch(
|
|
379
|
+
self._memsys_module, stream, "NRT_MemSys_print"
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
# Create an instance of the runtime
|
|
384
|
+
rtsys = _Runtime()
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
basedir = os.path.dirname(os.path.abspath(__file__))
|
|
388
|
+
nrt_path = os.path.join(basedir, "nrt.cu")
|
|
389
|
+
nrt_src = cached_file_read(nrt_path)
|
|
390
|
+
NRT_LIBRARY = CUSource(nrt_src, name="nrt.cu", nrt=True)
|