numba-cuda 0.22.0__cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- _numba_cuda_redirector.pth +4 -0
- _numba_cuda_redirector.py +89 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +6 -0
- numba_cuda/_version.py +11 -0
- numba_cuda/numba/cuda/__init__.py +70 -0
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
- numba_cuda/numba/cuda/api.py +580 -0
- numba_cuda/numba/cuda/api_util.py +76 -0
- numba_cuda/numba/cuda/args.py +72 -0
- numba_cuda/numba/cuda/bf16.py +397 -0
- numba_cuda/numba/cuda/cache_hints.py +287 -0
- numba_cuda/numba/cuda/cext/__init__.py +2 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
- numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
- numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
- numba_cuda/numba/cuda/cext/_helperlib.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
- numba_cuda/numba/cuda/cext/_typeof.h +19 -0
- numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
- numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
- numba_cuda/numba/cuda/cext/mviewbuf.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
- numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
- numba_cuda/numba/cuda/cg.py +67 -0
- numba_cuda/numba/cuda/cgutils.py +1294 -0
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +541 -0
- numba_cuda/numba/cuda/compiler.py +1396 -0
- numba_cuda/numba/cuda/core/analysis.py +758 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
- numba_cuda/numba/cuda/core/base.py +1332 -0
- numba_cuda/numba/cuda/core/boxing.py +1411 -0
- numba_cuda/numba/cuda/core/bytecode.py +728 -0
- numba_cuda/numba/cuda/core/byteflow.py +2346 -0
- numba_cuda/numba/cuda/core/caching.py +744 -0
- numba_cuda/numba/cuda/core/callconv.py +392 -0
- numba_cuda/numba/cuda/core/codegen.py +171 -0
- numba_cuda/numba/cuda/core/compiler.py +199 -0
- numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +650 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/controlflow.py +989 -0
- numba_cuda/numba/cuda/core/entrypoints.py +57 -0
- numba_cuda/numba/cuda/core/environment.py +66 -0
- numba_cuda/numba/cuda/core/errors.py +917 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/generators.py +387 -0
- numba_cuda/numba/cuda/core/imputils.py +509 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
- numba_cuda/numba/cuda/core/interpreter.py +3617 -0
- numba_cuda/numba/cuda/core/ir.py +1812 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
- numba_cuda/numba/cuda/core/optional.py +129 -0
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
- numba_cuda/numba/cuda/core/registry.py +46 -0
- numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
- numba_cuda/numba/cuda/core/sigutils.py +68 -0
- numba_cuda/numba/cuda/core/ssa.py +498 -0
- numba_cuda/numba/cuda/core/targetconfig.py +330 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +956 -0
- numba_cuda/numba/cuda/core/typed_passes.py +867 -0
- numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
- numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
- numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
- numba_cuda/numba/cuda/cpython/iterators.py +167 -0
- numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
- numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
- numba_cuda/numba/cuda/cpython/slicing.py +322 -0
- numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
- numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
- numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
- numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
- numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
- numba_cuda/numba/cuda/cuda_paths.py +691 -0
- numba_cuda/numba/cuda/cudadecl.py +543 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
- numba_cuda/numba/cuda/cudadrv/error.py +48 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
- numba_cuda/numba/cuda/cudaimpl.py +983 -0
- numba_cuda/numba/cuda/cudamath.py +149 -0
- numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
- numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
- numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
- numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
- numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
- numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
- numba_cuda/numba/cuda/datamodel/manager.py +11 -0
- numba_cuda/numba/cuda/datamodel/models.py +9 -0
- numba_cuda/numba/cuda/datamodel/packer.py +9 -0
- numba_cuda/numba/cuda/datamodel/registry.py +11 -0
- numba_cuda/numba/cuda/datamodel/testing.py +11 -0
- numba_cuda/numba/cuda/debuginfo.py +997 -0
- numba_cuda/numba/cuda/decorators.py +294 -0
- numba_cuda/numba/cuda/descriptor.py +35 -0
- numba_cuda/numba/cuda/device_init.py +155 -0
- numba_cuda/numba/cuda/deviceufunc.py +1021 -0
- numba_cuda/numba/cuda/dispatcher.py +2463 -0
- numba_cuda/numba/cuda/errors.py +72 -0
- numba_cuda/numba/cuda/extending.py +697 -0
- numba_cuda/numba/cuda/flags.py +178 -0
- numba_cuda/numba/cuda/fp16.py +357 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +24 -0
- numba_cuda/numba/cuda/intrinsics.py +531 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +265 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3386 -0
- numba_cuda/numba/cuda/libdevicedecl.py +20 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
- numba_cuda/numba/cuda/locks.py +19 -0
- numba_cuda/numba/cuda/lowering.py +1980 -0
- numba_cuda/numba/cuda/mathimpl.py +374 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
- numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
- numba_cuda/numba/cuda/misc/appdirs.py +594 -0
- numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
- numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
- numba_cuda/numba/cuda/misc/dump_style.py +41 -0
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
- numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
- numba_cuda/numba/cuda/misc/literal.py +28 -0
- numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
- numba_cuda/numba/cuda/misc/special.py +94 -0
- numba_cuda/numba/cuda/models.py +56 -0
- numba_cuda/numba/cuda/np/arraymath.py +5130 -0
- numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
- numba_cuda/numba/cuda/np/extensions.py +11 -0
- numba_cuda/numba/cuda/np/linalg.py +3087 -0
- numba_cuda/numba/cuda/np/math/__init__.py +0 -0
- numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
- numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
- numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
- numba_cuda/numba/cuda/np/npdatetime.py +969 -0
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
- numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
- numba_cuda/numba/cuda/np/numpy_support.py +798 -0
- numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
- numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
- numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
- numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
- numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
- numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
- numba_cuda/numba/cuda/nvvmutils.py +254 -0
- numba_cuda/numba/cuda/printimpl.py +126 -0
- numba_cuda/numba/cuda/random.py +308 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
- numba_cuda/numba/cuda/serialize.py +267 -0
- numba_cuda/numba/cuda/simulator/__init__.py +63 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +179 -0
- numba_cuda/numba/cuda/simulator/bf16.py +4 -0
- numba_cuda/numba/cuda/simulator/compiler.py +38 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
- numba_cuda/numba/cuda/simulator/kernel.py +320 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
- numba_cuda/numba/cuda/simulator/reduction.py +19 -0
- numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
- numba_cuda/numba/cuda/simulator_init.py +18 -0
- numba_cuda/numba/cuda/stubs.py +624 -0
- numba_cuda/numba/cuda/target.py +505 -0
- numba_cuda/numba/cuda/testing.py +347 -0
- numba_cuda/numba/cuda/tests/__init__.py +62 -0
- numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
- numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
- numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
- numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
- numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
- numba_cuda/numba/cuda/tests/data/error.cu +12 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
- numba_cuda/numba/cuda/tests/support.py +900 -0
- numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
- numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
- numba_cuda/numba/cuda/typeconv/rules.py +63 -0
- numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
- numba_cuda/numba/cuda/types/__init__.py +233 -0
- numba_cuda/numba/cuda/types/__init__.pyi +167 -0
- numba_cuda/numba/cuda/types/abstract.py +9 -0
- numba_cuda/numba/cuda/types/common.py +9 -0
- numba_cuda/numba/cuda/types/containers.py +9 -0
- numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
- numba_cuda/numba/cuda/types/cuda_common.py +110 -0
- numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
- numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
- numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
- numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
- numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
- numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
- numba_cuda/numba/cuda/types/ext_types.py +101 -0
- numba_cuda/numba/cuda/types/function_type.py +11 -0
- numba_cuda/numba/cuda/types/functions.py +9 -0
- numba_cuda/numba/cuda/types/iterators.py +9 -0
- numba_cuda/numba/cuda/types/misc.py +9 -0
- numba_cuda/numba/cuda/types/npytypes.py +9 -0
- numba_cuda/numba/cuda/types/scalars.py +9 -0
- numba_cuda/numba/cuda/typing/__init__.py +19 -0
- numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
- numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
- numba_cuda/numba/cuda/typing/bufproto.py +70 -0
- numba_cuda/numba/cuda/typing/builtins.py +1209 -0
- numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
- numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
- numba_cuda/numba/cuda/typing/collections.py +138 -0
- numba_cuda/numba/cuda/typing/context.py +782 -0
- numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
- numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/listdecl.py +147 -0
- numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
- numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
- numba_cuda/numba/cuda/typing/npydecl.py +749 -0
- numba_cuda/numba/cuda/typing/setdecl.py +115 -0
- numba_cuda/numba/cuda/typing/templates.py +1446 -0
- numba_cuda/numba/cuda/typing/typeof.py +301 -0
- numba_cuda/numba/cuda/ufuncs.py +746 -0
- numba_cuda/numba/cuda/utils.py +724 -0
- numba_cuda/numba/cuda/vector_types.py +214 -0
- numba_cuda/numba/cuda/vectorizers.py +260 -0
- numba_cuda-0.22.0.dist-info/METADATA +109 -0
- numba_cuda-0.22.0.dist-info/RECORD +487 -0
- numba_cuda-0.22.0.dist-info/WHEEL +6 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Serialization support for compiled functions.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import sys
|
|
9
|
+
import abc
|
|
10
|
+
import io
|
|
11
|
+
import copyreg
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
import pickle
|
|
15
|
+
from numba.cuda import cloudpickle
|
|
16
|
+
from llvmlite import ir
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
#
|
|
20
|
+
# Pickle support
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _rebuild_reduction(cls, *args):
|
|
25
|
+
"""
|
|
26
|
+
Global hook to rebuild a given class from its __reduce__ arguments.
|
|
27
|
+
"""
|
|
28
|
+
return cls._rebuild(*args)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# Keep unpickled object via `numba_unpickle` alive.
|
|
32
|
+
_unpickled_memo = {}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _numba_unpickle(address, bytedata, hashed):
|
|
36
|
+
"""Used by `numba_unpickle` from _helperlib.c
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
address : int
|
|
41
|
+
bytedata : bytes
|
|
42
|
+
hashed : bytes
|
|
43
|
+
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
obj : object
|
|
47
|
+
unpickled object
|
|
48
|
+
"""
|
|
49
|
+
key = (address, hashed)
|
|
50
|
+
try:
|
|
51
|
+
obj = _unpickled_memo[key]
|
|
52
|
+
except KeyError:
|
|
53
|
+
_unpickled_memo[key] = obj = cloudpickle.loads(bytedata)
|
|
54
|
+
return obj
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def dumps(obj):
|
|
58
|
+
"""Similar to `pickle.dumps()`. Returns the serialized object in bytes."""
|
|
59
|
+
pickler = NumbaPickler
|
|
60
|
+
with io.BytesIO() as buf:
|
|
61
|
+
p = pickler(buf, protocol=4)
|
|
62
|
+
p.dump(obj)
|
|
63
|
+
pickled = buf.getvalue()
|
|
64
|
+
|
|
65
|
+
return pickled
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def runtime_build_excinfo_struct(static_exc, exc_args):
|
|
69
|
+
exc, static_args, locinfo = cloudpickle.loads(static_exc)
|
|
70
|
+
real_args = []
|
|
71
|
+
exc_args_iter = iter(exc_args)
|
|
72
|
+
for arg in static_args:
|
|
73
|
+
if isinstance(arg, ir.Value):
|
|
74
|
+
real_args.append(next(exc_args_iter))
|
|
75
|
+
else:
|
|
76
|
+
real_args.append(arg)
|
|
77
|
+
return (exc, tuple(real_args), locinfo)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# Alias to pickle.loads to allow `serialize.loads()`
|
|
81
|
+
loads = cloudpickle.loads
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class _CustomPickled:
|
|
85
|
+
"""A wrapper for objects that must be pickled with `NumbaPickler`.
|
|
86
|
+
|
|
87
|
+
Standard `pickle` will pick up the implementation registered via `copyreg`.
|
|
88
|
+
This will spawn a `NumbaPickler` instance to serialize the data.
|
|
89
|
+
|
|
90
|
+
`NumbaPickler` overrides the handling of this type so as not to spawn a
|
|
91
|
+
new pickler for the object when it is already being pickled by a
|
|
92
|
+
`NumbaPickler`.
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
__slots__ = "ctor", "states"
|
|
96
|
+
|
|
97
|
+
def __init__(self, ctor, states):
|
|
98
|
+
self.ctor = ctor
|
|
99
|
+
self.states = states
|
|
100
|
+
|
|
101
|
+
def _reduce(self):
|
|
102
|
+
return _CustomPickled._rebuild, (self.ctor, self.states)
|
|
103
|
+
|
|
104
|
+
@classmethod
|
|
105
|
+
def _rebuild(cls, ctor, states):
|
|
106
|
+
return cls(ctor, states)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _unpickle__CustomPickled(serialized):
|
|
110
|
+
"""standard unpickling for `_CustomPickled`.
|
|
111
|
+
|
|
112
|
+
Uses `NumbaPickler` to load.
|
|
113
|
+
"""
|
|
114
|
+
ctor, states = loads(serialized)
|
|
115
|
+
return _CustomPickled(ctor, states)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _pickle__CustomPickled(cp):
|
|
119
|
+
"""standard pickling for `_CustomPickled`.
|
|
120
|
+
|
|
121
|
+
Uses `NumbaPickler` to dump.
|
|
122
|
+
"""
|
|
123
|
+
serialized = dumps((cp.ctor, cp.states))
|
|
124
|
+
return _unpickle__CustomPickled, (serialized,)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# Register custom pickling for the standard pickler.
|
|
128
|
+
copyreg.pickle(_CustomPickled, _pickle__CustomPickled)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def custom_reduce(cls, states):
|
|
132
|
+
"""For customizing object serialization in `__reduce__`.
|
|
133
|
+
|
|
134
|
+
Object states provided here are used as keyword arguments to the
|
|
135
|
+
`._rebuild()` class method.
|
|
136
|
+
|
|
137
|
+
Parameters
|
|
138
|
+
----------
|
|
139
|
+
states : dict
|
|
140
|
+
Dictionary of object states to be serialized.
|
|
141
|
+
|
|
142
|
+
Returns
|
|
143
|
+
-------
|
|
144
|
+
result : tuple
|
|
145
|
+
This tuple conforms to the return type requirement for `__reduce__`.
|
|
146
|
+
"""
|
|
147
|
+
return custom_rebuild, (_CustomPickled(cls, states),)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def custom_rebuild(custom_pickled):
|
|
151
|
+
"""Customized object deserialization.
|
|
152
|
+
|
|
153
|
+
This function is referenced internally by `custom_reduce()`.
|
|
154
|
+
"""
|
|
155
|
+
cls, states = custom_pickled.ctor, custom_pickled.states
|
|
156
|
+
return cls._rebuild(**states)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def is_serialiable(obj):
|
|
160
|
+
"""Check if *obj* can be serialized.
|
|
161
|
+
|
|
162
|
+
Parameters
|
|
163
|
+
----------
|
|
164
|
+
obj : object
|
|
165
|
+
|
|
166
|
+
Returns
|
|
167
|
+
--------
|
|
168
|
+
can_serialize : bool
|
|
169
|
+
"""
|
|
170
|
+
with io.BytesIO() as fout:
|
|
171
|
+
pickler = NumbaPickler(fout)
|
|
172
|
+
try:
|
|
173
|
+
pickler.dump(obj)
|
|
174
|
+
except pickle.PicklingError:
|
|
175
|
+
return False
|
|
176
|
+
else:
|
|
177
|
+
return True
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _no_pickle(obj):
|
|
181
|
+
raise pickle.PicklingError(f"Pickling of {type(obj)} is unsupported")
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def disable_pickling(typ):
|
|
185
|
+
"""This is called on a type to disable pickling"""
|
|
186
|
+
NumbaPickler.disabled_types.add(typ)
|
|
187
|
+
# Return `typ` to allow use as a decorator
|
|
188
|
+
return typ
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
class NumbaPickler(cloudpickle.CloudPickler):
|
|
192
|
+
disabled_types = set()
|
|
193
|
+
"""A set of types that pickling cannot is disabled.
|
|
194
|
+
"""
|
|
195
|
+
|
|
196
|
+
def reducer_override(self, obj):
|
|
197
|
+
# Overridden to disable pickling of certain types
|
|
198
|
+
if type(obj) in self.disabled_types:
|
|
199
|
+
_no_pickle(obj) # noreturn
|
|
200
|
+
return super().reducer_override(obj)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _custom_reduce__custompickled(cp):
|
|
204
|
+
return cp._reduce()
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
NumbaPickler.dispatch_table[_CustomPickled] = _custom_reduce__custompickled
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
class ReduceMixin(abc.ABC):
|
|
211
|
+
"""A mixin class for objects that should be reduced by the NumbaPickler
|
|
212
|
+
instead of the standard pickler.
|
|
213
|
+
"""
|
|
214
|
+
|
|
215
|
+
# Subclass MUST override the below methods
|
|
216
|
+
|
|
217
|
+
@abc.abstractmethod
|
|
218
|
+
def _reduce_states(self):
|
|
219
|
+
raise NotImplementedError
|
|
220
|
+
|
|
221
|
+
@abc.abstractclassmethod
|
|
222
|
+
def _rebuild(cls, **kwargs):
|
|
223
|
+
raise NotImplementedError
|
|
224
|
+
|
|
225
|
+
# Subclass can override the below methods
|
|
226
|
+
|
|
227
|
+
def _reduce_class(self):
|
|
228
|
+
return self.__class__
|
|
229
|
+
|
|
230
|
+
# Private methods
|
|
231
|
+
|
|
232
|
+
def __reduce__(self):
|
|
233
|
+
return custom_reduce(self._reduce_class(), self._reduce_states())
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
class PickleCallableByPath:
|
|
237
|
+
"""Wrap a callable object to be pickled by path to workaround limitation
|
|
238
|
+
in pickling due to non-pickleable objects in function non-locals.
|
|
239
|
+
|
|
240
|
+
Note:
|
|
241
|
+
- Do not use this as a decorator.
|
|
242
|
+
- Wrapped object must be a global that exist in its parent module and it
|
|
243
|
+
can be imported by `from the_module import the_object`.
|
|
244
|
+
|
|
245
|
+
Usage:
|
|
246
|
+
|
|
247
|
+
>>> def my_fn(x):
|
|
248
|
+
>>> ...
|
|
249
|
+
>>> wrapped_fn = PickleCallableByPath(my_fn)
|
|
250
|
+
>>> # refer to `wrapped_fn` instead of `my_fn`
|
|
251
|
+
"""
|
|
252
|
+
|
|
253
|
+
def __init__(self, fn):
|
|
254
|
+
self._fn = fn
|
|
255
|
+
|
|
256
|
+
def __call__(self, *args, **kwargs):
|
|
257
|
+
return self._fn(*args, **kwargs)
|
|
258
|
+
|
|
259
|
+
def __reduce__(self):
|
|
260
|
+
return type(self)._rebuild, (
|
|
261
|
+
self._fn.__module__,
|
|
262
|
+
self._fn.__name__,
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
@classmethod
|
|
266
|
+
def _rebuild(cls, modname, fn_path):
|
|
267
|
+
return cls(getattr(sys.modules[modname], fn_path))
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
from .api import *
|
|
7
|
+
from .vector_types import vector_types
|
|
8
|
+
from .reduction import Reduce
|
|
9
|
+
from .cudadrv.devicearray import (
|
|
10
|
+
device_array,
|
|
11
|
+
device_array_like,
|
|
12
|
+
pinned,
|
|
13
|
+
pinned_array,
|
|
14
|
+
pinned_array_like,
|
|
15
|
+
mapped_array,
|
|
16
|
+
to_device,
|
|
17
|
+
auto_device,
|
|
18
|
+
)
|
|
19
|
+
from .cudadrv import devicearray
|
|
20
|
+
from .cudadrv.devices import require_context, gpus
|
|
21
|
+
from .cudadrv.devices import get_context as current_context
|
|
22
|
+
from .cudadrv.runtime import runtime
|
|
23
|
+
from .cudadrv.linkable_code import LinkableCode
|
|
24
|
+
from numba.cuda.core import config
|
|
25
|
+
|
|
26
|
+
reduce = Reduce
|
|
27
|
+
|
|
28
|
+
# Register simulated vector types as module level variables
|
|
29
|
+
for name, svty in vector_types.items():
|
|
30
|
+
setattr(sys.modules[__name__], name, svty)
|
|
31
|
+
for alias in svty.aliases:
|
|
32
|
+
setattr(sys.modules[__name__], alias, svty)
|
|
33
|
+
del vector_types, name, svty, alias
|
|
34
|
+
|
|
35
|
+
# Ensure that any user code attempting to import cudadrv etc. gets the
|
|
36
|
+
# simulator's version and not the real version if the simulator is enabled.
|
|
37
|
+
if config.ENABLE_CUDASIM:
|
|
38
|
+
import sys
|
|
39
|
+
from numba.cuda.simulator import cudadrv
|
|
40
|
+
from . import dispatcher
|
|
41
|
+
|
|
42
|
+
sys.modules["numba.cuda.cudadrv"] = cudadrv
|
|
43
|
+
sys.modules["numba.cuda.cudadrv.devicearray"] = cudadrv.devicearray
|
|
44
|
+
sys.modules["numba.cuda.cudadrv.devices"] = cudadrv.devices
|
|
45
|
+
sys.modules["numba.cuda.cudadrv.driver"] = cudadrv.driver
|
|
46
|
+
sys.modules["numba.cuda.cudadrv.linkable_code"] = cudadrv.linkable_code
|
|
47
|
+
sys.modules["numba.cuda.cudadrv.runtime"] = cudadrv.runtime
|
|
48
|
+
sys.modules["numba.cuda.cudadrv.drvapi"] = cudadrv.drvapi
|
|
49
|
+
sys.modules["numba.cuda.cudadrv.error"] = cudadrv.error
|
|
50
|
+
sys.modules["numba.cuda.cudadrv.nvvm"] = cudadrv.nvvm
|
|
51
|
+
sys.modules["numba.cuda.dispatcher"] = dispatcher
|
|
52
|
+
|
|
53
|
+
from . import bf16, compiler, _internal
|
|
54
|
+
|
|
55
|
+
sys.modules["numba.cuda.bf16"] = bf16
|
|
56
|
+
sys.modules["numba.cuda.compiler"] = compiler
|
|
57
|
+
sys.modules["numba.cuda._internal"] = _internal
|
|
58
|
+
sys.modules["numba.cuda._internal.cuda_bf16"] = _internal.cuda_bf16
|
|
59
|
+
|
|
60
|
+
from numba.cuda.simulator import memory_management
|
|
61
|
+
|
|
62
|
+
sys.modules["numba.cuda.memory_management"] = memory_management
|
|
63
|
+
sys.modules["numba.cuda.memory_management.nrt"] = memory_management.nrt
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Contains CUDA API functions
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
# Imports here bring together parts of the API from other modules, so some of
|
|
9
|
+
# them appear unused.
|
|
10
|
+
from contextlib import contextmanager
|
|
11
|
+
|
|
12
|
+
from .cudadrv.devices import require_context, reset, gpus # noqa: F401
|
|
13
|
+
from .cudadrv.linkable_code import (
|
|
14
|
+
PTXSource, # noqa: F401
|
|
15
|
+
CUSource, # noqa: F401
|
|
16
|
+
Cubin, # noqa: F401
|
|
17
|
+
Fatbin, # noqa: F401
|
|
18
|
+
Archive, # noqa: F401
|
|
19
|
+
Object, # noqa: F401
|
|
20
|
+
LTOIR, # noqa: F401
|
|
21
|
+
) # noqa: F401
|
|
22
|
+
from .kernel import FakeCUDAKernel
|
|
23
|
+
from numba.cuda.core import config
|
|
24
|
+
from numba.cuda.core.sigutils import is_signature
|
|
25
|
+
from ..args import In, Out, InOut # noqa: F401
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def select_device(dev=0):
|
|
29
|
+
assert dev == 0, "Only a single device supported by the simulator"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def is_float16_supported():
|
|
33
|
+
return True
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def is_bfloat16_supported():
|
|
37
|
+
return False
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class stream(object):
|
|
41
|
+
"""
|
|
42
|
+
The stream API is supported in the simulator - however, all execution
|
|
43
|
+
occurs synchronously, so synchronization requires no operation.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
@contextmanager
|
|
47
|
+
def auto_synchronize(self):
|
|
48
|
+
yield
|
|
49
|
+
|
|
50
|
+
def synchronize(self):
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# Default stream APIs. Since execution from the perspective of the host is
|
|
55
|
+
# synchronous in the simulator, these can be the same as the stream class.
|
|
56
|
+
default_stream = stream
|
|
57
|
+
legacy_default_stream = stream
|
|
58
|
+
per_thread_default_stream = stream
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# There is no way to use external streams with the simulator. Since the
|
|
62
|
+
# implementation is not really using streams, we can't meaningfully interact
|
|
63
|
+
# with external ones.
|
|
64
|
+
def external_stream(ptr):
|
|
65
|
+
raise RuntimeError("External streams are unsupported in the simulator")
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def synchronize():
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def close():
|
|
73
|
+
gpus.closed = True
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def declare_device(*args, **kwargs):
|
|
77
|
+
pass
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def detect():
|
|
81
|
+
print("Found 1 CUDA devices")
|
|
82
|
+
print("id %d %20s %40s" % (0, "SIMULATOR", "[SUPPORTED]"))
|
|
83
|
+
print("%40s: 5.0" % "compute capability")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def list_devices():
|
|
87
|
+
return gpus
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def get_current_device():
|
|
91
|
+
return gpus[0].device
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# Events
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class Event(object):
|
|
98
|
+
"""
|
|
99
|
+
The simulator supports the event API, but they do not record timing info,
|
|
100
|
+
and all simulation is synchronous. Execution time is not recorded.
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
def record(self, stream=0):
|
|
104
|
+
pass
|
|
105
|
+
|
|
106
|
+
def wait(self, stream=0):
|
|
107
|
+
pass
|
|
108
|
+
|
|
109
|
+
def synchronize(self):
|
|
110
|
+
pass
|
|
111
|
+
|
|
112
|
+
def elapsed_time(self, event):
|
|
113
|
+
"""This is here to preserve the API; the output is meaningless."""
|
|
114
|
+
return -1.0
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
event = Event
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def jit(
|
|
121
|
+
func_or_sig=None,
|
|
122
|
+
device=False,
|
|
123
|
+
debug=None,
|
|
124
|
+
argtypes=None,
|
|
125
|
+
inline=False,
|
|
126
|
+
restype=None,
|
|
127
|
+
fastmath=False,
|
|
128
|
+
link=None,
|
|
129
|
+
boundscheck=None,
|
|
130
|
+
opt=None,
|
|
131
|
+
cache=None,
|
|
132
|
+
):
|
|
133
|
+
# Here for API compatibility
|
|
134
|
+
if boundscheck:
|
|
135
|
+
raise NotImplementedError("bounds checking is not supported for CUDA")
|
|
136
|
+
|
|
137
|
+
if link is not None:
|
|
138
|
+
raise NotImplementedError("Cannot link PTX in the simulator")
|
|
139
|
+
|
|
140
|
+
debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug
|
|
141
|
+
|
|
142
|
+
# Check for first argument specifying types - in that case the
|
|
143
|
+
# decorator is not being passed a function
|
|
144
|
+
if (
|
|
145
|
+
func_or_sig is None
|
|
146
|
+
or is_signature(func_or_sig)
|
|
147
|
+
or isinstance(func_or_sig, list)
|
|
148
|
+
):
|
|
149
|
+
|
|
150
|
+
def jitwrapper(fn):
|
|
151
|
+
return FakeCUDAKernel(
|
|
152
|
+
fn, device=device, fastmath=fastmath, debug=debug
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
return jitwrapper
|
|
156
|
+
return FakeCUDAKernel(func_or_sig, device=device, debug=debug)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@contextmanager
|
|
160
|
+
def defer_cleanup():
|
|
161
|
+
# No effect for simulator
|
|
162
|
+
yield
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def is_supported_version():
|
|
166
|
+
return True
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
# cache hints operations
|
|
170
|
+
ldca = None
|
|
171
|
+
ldcg = None
|
|
172
|
+
ldcs = None
|
|
173
|
+
ldlu = None
|
|
174
|
+
ldcv = None
|
|
175
|
+
|
|
176
|
+
stcg = None
|
|
177
|
+
stcs = None
|
|
178
|
+
stwb = None
|
|
179
|
+
stwt = None
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
The compiler is not implemented in the simulator. This module provides a stub
|
|
6
|
+
to allow tests to import successfully.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
compile = None
|
|
10
|
+
compile_for_current_device = None
|
|
11
|
+
compile_ptx = None
|
|
12
|
+
compile_ptx_for_current_device = None
|
|
13
|
+
declare_device_function = None
|
|
14
|
+
compile_all = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def run_frontend(func):
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DefaultPassBuilder(object):
|
|
22
|
+
@staticmethod
|
|
23
|
+
def define_nopython_lowering_pipeline(state, name="nopython_lowering"):
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
@staticmethod
|
|
27
|
+
def define_typed_pipeline(state, name="typed"):
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class CompilerBase:
|
|
32
|
+
def __init__(
|
|
33
|
+
self, typingctx, targetctx, library, args, return_type, flags, locals
|
|
34
|
+
):
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
PassManager = None
|