numba-cuda 0.22.0__cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- _numba_cuda_redirector.pth +4 -0
- _numba_cuda_redirector.py +89 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +6 -0
- numba_cuda/_version.py +11 -0
- numba_cuda/numba/cuda/__init__.py +70 -0
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
- numba_cuda/numba/cuda/api.py +580 -0
- numba_cuda/numba/cuda/api_util.py +76 -0
- numba_cuda/numba/cuda/args.py +72 -0
- numba_cuda/numba/cuda/bf16.py +397 -0
- numba_cuda/numba/cuda/cache_hints.py +287 -0
- numba_cuda/numba/cuda/cext/__init__.py +2 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
- numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
- numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
- numba_cuda/numba/cuda/cext/_helperlib.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
- numba_cuda/numba/cuda/cext/_typeof.h +19 -0
- numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
- numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
- numba_cuda/numba/cuda/cext/mviewbuf.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
- numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
- numba_cuda/numba/cuda/cg.py +67 -0
- numba_cuda/numba/cuda/cgutils.py +1294 -0
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +541 -0
- numba_cuda/numba/cuda/compiler.py +1396 -0
- numba_cuda/numba/cuda/core/analysis.py +758 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
- numba_cuda/numba/cuda/core/base.py +1332 -0
- numba_cuda/numba/cuda/core/boxing.py +1411 -0
- numba_cuda/numba/cuda/core/bytecode.py +728 -0
- numba_cuda/numba/cuda/core/byteflow.py +2346 -0
- numba_cuda/numba/cuda/core/caching.py +744 -0
- numba_cuda/numba/cuda/core/callconv.py +392 -0
- numba_cuda/numba/cuda/core/codegen.py +171 -0
- numba_cuda/numba/cuda/core/compiler.py +199 -0
- numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +650 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/controlflow.py +989 -0
- numba_cuda/numba/cuda/core/entrypoints.py +57 -0
- numba_cuda/numba/cuda/core/environment.py +66 -0
- numba_cuda/numba/cuda/core/errors.py +917 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/generators.py +387 -0
- numba_cuda/numba/cuda/core/imputils.py +509 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
- numba_cuda/numba/cuda/core/interpreter.py +3617 -0
- numba_cuda/numba/cuda/core/ir.py +1812 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
- numba_cuda/numba/cuda/core/optional.py +129 -0
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
- numba_cuda/numba/cuda/core/registry.py +46 -0
- numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
- numba_cuda/numba/cuda/core/sigutils.py +68 -0
- numba_cuda/numba/cuda/core/ssa.py +498 -0
- numba_cuda/numba/cuda/core/targetconfig.py +330 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +956 -0
- numba_cuda/numba/cuda/core/typed_passes.py +867 -0
- numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
- numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
- numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
- numba_cuda/numba/cuda/cpython/iterators.py +167 -0
- numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
- numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
- numba_cuda/numba/cuda/cpython/slicing.py +322 -0
- numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
- numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
- numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
- numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
- numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
- numba_cuda/numba/cuda/cuda_paths.py +691 -0
- numba_cuda/numba/cuda/cudadecl.py +543 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
- numba_cuda/numba/cuda/cudadrv/error.py +48 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
- numba_cuda/numba/cuda/cudaimpl.py +983 -0
- numba_cuda/numba/cuda/cudamath.py +149 -0
- numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
- numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
- numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
- numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
- numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
- numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
- numba_cuda/numba/cuda/datamodel/manager.py +11 -0
- numba_cuda/numba/cuda/datamodel/models.py +9 -0
- numba_cuda/numba/cuda/datamodel/packer.py +9 -0
- numba_cuda/numba/cuda/datamodel/registry.py +11 -0
- numba_cuda/numba/cuda/datamodel/testing.py +11 -0
- numba_cuda/numba/cuda/debuginfo.py +997 -0
- numba_cuda/numba/cuda/decorators.py +294 -0
- numba_cuda/numba/cuda/descriptor.py +35 -0
- numba_cuda/numba/cuda/device_init.py +155 -0
- numba_cuda/numba/cuda/deviceufunc.py +1021 -0
- numba_cuda/numba/cuda/dispatcher.py +2463 -0
- numba_cuda/numba/cuda/errors.py +72 -0
- numba_cuda/numba/cuda/extending.py +697 -0
- numba_cuda/numba/cuda/flags.py +178 -0
- numba_cuda/numba/cuda/fp16.py +357 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +24 -0
- numba_cuda/numba/cuda/intrinsics.py +531 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +265 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3386 -0
- numba_cuda/numba/cuda/libdevicedecl.py +20 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
- numba_cuda/numba/cuda/locks.py +19 -0
- numba_cuda/numba/cuda/lowering.py +1980 -0
- numba_cuda/numba/cuda/mathimpl.py +374 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
- numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
- numba_cuda/numba/cuda/misc/appdirs.py +594 -0
- numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
- numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
- numba_cuda/numba/cuda/misc/dump_style.py +41 -0
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
- numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
- numba_cuda/numba/cuda/misc/literal.py +28 -0
- numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
- numba_cuda/numba/cuda/misc/special.py +94 -0
- numba_cuda/numba/cuda/models.py +56 -0
- numba_cuda/numba/cuda/np/arraymath.py +5130 -0
- numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
- numba_cuda/numba/cuda/np/extensions.py +11 -0
- numba_cuda/numba/cuda/np/linalg.py +3087 -0
- numba_cuda/numba/cuda/np/math/__init__.py +0 -0
- numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
- numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
- numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
- numba_cuda/numba/cuda/np/npdatetime.py +969 -0
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
- numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
- numba_cuda/numba/cuda/np/numpy_support.py +798 -0
- numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
- numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
- numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
- numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
- numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
- numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
- numba_cuda/numba/cuda/nvvmutils.py +254 -0
- numba_cuda/numba/cuda/printimpl.py +126 -0
- numba_cuda/numba/cuda/random.py +308 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
- numba_cuda/numba/cuda/serialize.py +267 -0
- numba_cuda/numba/cuda/simulator/__init__.py +63 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +179 -0
- numba_cuda/numba/cuda/simulator/bf16.py +4 -0
- numba_cuda/numba/cuda/simulator/compiler.py +38 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
- numba_cuda/numba/cuda/simulator/kernel.py +320 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
- numba_cuda/numba/cuda/simulator/reduction.py +19 -0
- numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
- numba_cuda/numba/cuda/simulator_init.py +18 -0
- numba_cuda/numba/cuda/stubs.py +624 -0
- numba_cuda/numba/cuda/target.py +505 -0
- numba_cuda/numba/cuda/testing.py +347 -0
- numba_cuda/numba/cuda/tests/__init__.py +62 -0
- numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
- numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
- numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
- numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
- numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
- numba_cuda/numba/cuda/tests/data/error.cu +12 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
- numba_cuda/numba/cuda/tests/support.py +900 -0
- numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
- numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
- numba_cuda/numba/cuda/typeconv/rules.py +63 -0
- numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
- numba_cuda/numba/cuda/types/__init__.py +233 -0
- numba_cuda/numba/cuda/types/__init__.pyi +167 -0
- numba_cuda/numba/cuda/types/abstract.py +9 -0
- numba_cuda/numba/cuda/types/common.py +9 -0
- numba_cuda/numba/cuda/types/containers.py +9 -0
- numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
- numba_cuda/numba/cuda/types/cuda_common.py +110 -0
- numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
- numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
- numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
- numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
- numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
- numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
- numba_cuda/numba/cuda/types/ext_types.py +101 -0
- numba_cuda/numba/cuda/types/function_type.py +11 -0
- numba_cuda/numba/cuda/types/functions.py +9 -0
- numba_cuda/numba/cuda/types/iterators.py +9 -0
- numba_cuda/numba/cuda/types/misc.py +9 -0
- numba_cuda/numba/cuda/types/npytypes.py +9 -0
- numba_cuda/numba/cuda/types/scalars.py +9 -0
- numba_cuda/numba/cuda/typing/__init__.py +19 -0
- numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
- numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
- numba_cuda/numba/cuda/typing/bufproto.py +70 -0
- numba_cuda/numba/cuda/typing/builtins.py +1209 -0
- numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
- numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
- numba_cuda/numba/cuda/typing/collections.py +138 -0
- numba_cuda/numba/cuda/typing/context.py +782 -0
- numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
- numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/listdecl.py +147 -0
- numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
- numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
- numba_cuda/numba/cuda/typing/npydecl.py +749 -0
- numba_cuda/numba/cuda/typing/setdecl.py +115 -0
- numba_cuda/numba/cuda/typing/templates.py +1446 -0
- numba_cuda/numba/cuda/typing/typeof.py +301 -0
- numba_cuda/numba/cuda/ufuncs.py +746 -0
- numba_cuda/numba/cuda/utils.py +724 -0
- numba_cuda/numba/cuda/vector_types.py +214 -0
- numba_cuda/numba/cuda/vectorizers.py +260 -0
- numba_cuda-0.22.0.dist-info/METADATA +109 -0
- numba_cuda-0.22.0.dist-info/RECORD +487 -0
- numba_cuda-0.22.0.dist-info/WHEEL +6 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class LinkableCode:
|
|
6
|
+
"""An object that holds code to be linked from memory.
|
|
7
|
+
|
|
8
|
+
:param data: A buffer containing the data to link.
|
|
9
|
+
:param name: The name of the file to be referenced in any compilation or
|
|
10
|
+
linking errors that may be produced.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, data, name=None):
|
|
14
|
+
self.data = data
|
|
15
|
+
self._name = name
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def name(self):
|
|
19
|
+
return self._name or self.default_name
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class PTXSource(LinkableCode):
|
|
23
|
+
"""PTX source code in memory."""
|
|
24
|
+
|
|
25
|
+
default_name = "<unnamed-ptx>"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class CUSource(LinkableCode):
|
|
29
|
+
"""CUDA C/C++ source code in memory."""
|
|
30
|
+
|
|
31
|
+
default_name = "<unnamed-cu>"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class Fatbin(LinkableCode):
|
|
35
|
+
"""An ELF Fatbin in memory."""
|
|
36
|
+
|
|
37
|
+
default_name = "<unnamed-fatbin>"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class Cubin(LinkableCode):
|
|
41
|
+
"""An ELF Cubin in memory."""
|
|
42
|
+
|
|
43
|
+
default_name = "<unnamed-cubin>"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class Archive(LinkableCode):
|
|
47
|
+
"""An archive of objects in memory."""
|
|
48
|
+
|
|
49
|
+
default_name = "<unnamed-archive>"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class Object(LinkableCode):
|
|
53
|
+
"""An object file in memory."""
|
|
54
|
+
|
|
55
|
+
default_name = "<unnamed-object>"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class LTOIR(LinkableCode):
|
|
59
|
+
"""An LTOIR file in memory."""
|
|
60
|
+
|
|
61
|
+
default_name = "<unnamed-ltoir>"
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
NVVM is not supported in the simulator, but stubs are provided to allow tests
|
|
6
|
+
to import correctly.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def compile(src, name, cc, ltoir=False):
|
|
11
|
+
raise RuntimeError("NVRTC is not supported in the simulator")
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
NVVM is not supported in the simulator, but stubs are provided to allow tests
|
|
6
|
+
to import correctly.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class NvvmSupportError(ImportError):
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class NVVM(object):
|
|
15
|
+
def __init__(self):
|
|
16
|
+
raise NvvmSupportError("NVVM not supported in the simulator")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
CompilationUnit = None
|
|
20
|
+
compile_ir = None
|
|
21
|
+
set_cuda_kernel = None
|
|
22
|
+
get_arch_option = None
|
|
23
|
+
LibDevice = None
|
|
24
|
+
NvvmError = None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def is_available():
|
|
28
|
+
return False
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def get_supported_ccs():
|
|
32
|
+
return ()
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
The runtime API is unsupported in the simulator, but some stubs are
|
|
6
|
+
provided to allow tests to import correctly.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class FakeRuntime(object):
|
|
11
|
+
def get_version(self):
|
|
12
|
+
return (-1, -1)
|
|
13
|
+
|
|
14
|
+
def is_supported_version(self):
|
|
15
|
+
return True
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def supported_versions(self):
|
|
19
|
+
return ((-1, -1),)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
runtime = FakeRuntime()
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class CUDADispatcher:
|
|
6
|
+
"""
|
|
7
|
+
Dummy class so that consumers that try to import the real CUDADispatcher
|
|
8
|
+
do not get an import failure when running with the simulator.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
...
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from contextlib import contextmanager
|
|
5
|
+
import functools
|
|
6
|
+
import sys
|
|
7
|
+
import threading
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
from .cudadrv.devicearray import FakeCUDAArray, FakeWithinKernelCUDAArray
|
|
12
|
+
from .kernelapi import Dim3, FakeCUDAModule, swapped_cuda_module
|
|
13
|
+
from ..errors import normalize_kernel_dimensions
|
|
14
|
+
from ..args import ArgHint, InOut
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
"""
|
|
18
|
+
Global variable to keep track of the current "kernel context", i.e the
|
|
19
|
+
FakeCUDAModule. We only support one kernel launch at a time.
|
|
20
|
+
No support for concurrent kernel launch.
|
|
21
|
+
"""
|
|
22
|
+
_kernel_context = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@contextmanager
|
|
26
|
+
def _push_kernel_context(mod):
|
|
27
|
+
"""
|
|
28
|
+
Push the current kernel context.
|
|
29
|
+
"""
|
|
30
|
+
global _kernel_context
|
|
31
|
+
assert _kernel_context is None, "concurrent simulated kernel not supported"
|
|
32
|
+
_kernel_context = mod
|
|
33
|
+
try:
|
|
34
|
+
yield
|
|
35
|
+
finally:
|
|
36
|
+
_kernel_context = None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _get_kernel_context():
|
|
40
|
+
"""
|
|
41
|
+
Get the current kernel context. This is usually done by a device function.
|
|
42
|
+
"""
|
|
43
|
+
return _kernel_context
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class FakeOverload:
|
|
47
|
+
"""
|
|
48
|
+
Used only to provide the max_cooperative_grid_blocks method
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def max_cooperative_grid_blocks(self, blockdim):
|
|
52
|
+
# We can only run one block in a cooperative grid because we have no
|
|
53
|
+
# mechanism for synchronization between different blocks
|
|
54
|
+
return 1
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class FakeOverloadDict(dict):
|
|
58
|
+
def __getitem__(self, key):
|
|
59
|
+
# Always return a fake overload for any signature, as we don't keep
|
|
60
|
+
# track of overloads in the simulator.
|
|
61
|
+
return FakeOverload()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class FakeCUDAKernel(object):
|
|
65
|
+
"""
|
|
66
|
+
Wraps a @cuda.jit-ed function.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
def __init__(self, fn, device, fastmath=False, extensions=[], debug=False):
|
|
70
|
+
self.fn = fn
|
|
71
|
+
self._device = device
|
|
72
|
+
self._fastmath = fastmath
|
|
73
|
+
self._debug = debug
|
|
74
|
+
self.extensions = list(extensions) # defensive copy
|
|
75
|
+
# Initial configuration: grid unconfigured, stream 0, no dynamic shared
|
|
76
|
+
# memory.
|
|
77
|
+
self.grid_dim = None
|
|
78
|
+
self.block_dim = None
|
|
79
|
+
self.stream = 0
|
|
80
|
+
self.dynshared_size = 0
|
|
81
|
+
functools.update_wrapper(self, fn)
|
|
82
|
+
|
|
83
|
+
def __call__(self, *args):
|
|
84
|
+
if self._device or _kernel_context:
|
|
85
|
+
with swapped_cuda_module(self.fn, _get_kernel_context()):
|
|
86
|
+
return self.fn(*args)
|
|
87
|
+
|
|
88
|
+
# Ensure we've been given a valid grid configuration
|
|
89
|
+
grid_dim, block_dim = normalize_kernel_dimensions(
|
|
90
|
+
self.grid_dim, self.block_dim
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
fake_cuda_module = FakeCUDAModule(
|
|
94
|
+
grid_dim, block_dim, self.dynshared_size
|
|
95
|
+
)
|
|
96
|
+
with _push_kernel_context(fake_cuda_module):
|
|
97
|
+
# fake_args substitutes all numpy arrays for FakeCUDAArrays
|
|
98
|
+
# because they implement some semantics differently
|
|
99
|
+
retr = []
|
|
100
|
+
|
|
101
|
+
def fake_arg(arg):
|
|
102
|
+
# map the arguments using any extension you've registered
|
|
103
|
+
_, arg = functools.reduce(
|
|
104
|
+
lambda ty_val, extension: extension.prepare_args(
|
|
105
|
+
*ty_val, stream=0, retr=retr
|
|
106
|
+
),
|
|
107
|
+
self.extensions,
|
|
108
|
+
(None, arg),
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
if isinstance(arg, np.ndarray) and arg.ndim > 0:
|
|
112
|
+
ret = InOut(arg).to_device(retr)
|
|
113
|
+
elif isinstance(arg, ArgHint):
|
|
114
|
+
ret = arg.to_device(retr)
|
|
115
|
+
elif isinstance(arg, np.void):
|
|
116
|
+
ret = FakeCUDAArray(arg) # In case a np record comes in.
|
|
117
|
+
else:
|
|
118
|
+
ret = arg
|
|
119
|
+
if isinstance(ret, FakeCUDAArray):
|
|
120
|
+
return FakeWithinKernelCUDAArray(ret)
|
|
121
|
+
return ret
|
|
122
|
+
|
|
123
|
+
fake_args = [fake_arg(arg) for arg in args]
|
|
124
|
+
with swapped_cuda_module(self.fn, fake_cuda_module):
|
|
125
|
+
# Execute one block at a time
|
|
126
|
+
for grid_point in np.ndindex(*grid_dim):
|
|
127
|
+
bm = BlockManager(self.fn, grid_dim, block_dim, self._debug)
|
|
128
|
+
bm.run(grid_point, *fake_args)
|
|
129
|
+
|
|
130
|
+
for wb in retr:
|
|
131
|
+
wb()
|
|
132
|
+
|
|
133
|
+
def __getitem__(self, configuration):
|
|
134
|
+
self.grid_dim, self.block_dim = normalize_kernel_dimensions(
|
|
135
|
+
*configuration[:2]
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
if len(configuration) == 4:
|
|
139
|
+
self.dynshared_size = configuration[3]
|
|
140
|
+
|
|
141
|
+
return self
|
|
142
|
+
|
|
143
|
+
def bind(self):
|
|
144
|
+
pass
|
|
145
|
+
|
|
146
|
+
def specialize(self, *args):
|
|
147
|
+
return self
|
|
148
|
+
|
|
149
|
+
def forall(self, ntasks, tpb=0, stream=0, sharedmem=0):
|
|
150
|
+
if ntasks < 0:
|
|
151
|
+
raise ValueError(
|
|
152
|
+
"Can't create ForAll with negative task count: %s" % ntasks
|
|
153
|
+
)
|
|
154
|
+
return self[ntasks, 1, stream, sharedmem]
|
|
155
|
+
|
|
156
|
+
@property
|
|
157
|
+
def overloads(self):
|
|
158
|
+
return FakeOverloadDict()
|
|
159
|
+
|
|
160
|
+
@property
|
|
161
|
+
def py_func(self):
|
|
162
|
+
return self.fn
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
# Thread emulation
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class BlockThread(threading.Thread):
|
|
169
|
+
"""
|
|
170
|
+
Manages the execution of a function for a single CUDA thread.
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
def __init__(self, f, manager, blockIdx, threadIdx, debug):
|
|
174
|
+
if debug:
|
|
175
|
+
|
|
176
|
+
def debug_wrapper(*args, **kwargs):
|
|
177
|
+
np.seterr(divide="raise")
|
|
178
|
+
f(*args, **kwargs)
|
|
179
|
+
|
|
180
|
+
target = debug_wrapper
|
|
181
|
+
else:
|
|
182
|
+
target = f
|
|
183
|
+
|
|
184
|
+
super(BlockThread, self).__init__(target=target)
|
|
185
|
+
self.syncthreads_event = threading.Event()
|
|
186
|
+
self.syncthreads_blocked = False
|
|
187
|
+
self._manager = manager
|
|
188
|
+
self.blockIdx = Dim3(*blockIdx)
|
|
189
|
+
self.threadIdx = Dim3(*threadIdx)
|
|
190
|
+
self.exception = None
|
|
191
|
+
self.daemon = True
|
|
192
|
+
self.abort = False
|
|
193
|
+
self.debug = debug
|
|
194
|
+
blockDim = Dim3(*self._manager._block_dim)
|
|
195
|
+
self.thread_id = self.threadIdx.x + (
|
|
196
|
+
blockDim.x * (self.threadIdx.y + blockDim.y * self.threadIdx.z)
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
def run(self):
|
|
200
|
+
try:
|
|
201
|
+
super(BlockThread, self).run()
|
|
202
|
+
except Exception as e:
|
|
203
|
+
tid = "tid=%s" % list(self.threadIdx)
|
|
204
|
+
ctaid = "ctaid=%s" % list(self.blockIdx)
|
|
205
|
+
if str(e) == "":
|
|
206
|
+
msg = "%s %s" % (tid, ctaid)
|
|
207
|
+
else:
|
|
208
|
+
msg = "%s %s: %s" % (tid, ctaid, e)
|
|
209
|
+
tb = sys.exc_info()[2]
|
|
210
|
+
# Using `with_traceback` here would cause it to be mutated by
|
|
211
|
+
# future raise statements, which may or may not matter.
|
|
212
|
+
self.exception = (type(e)(msg), tb)
|
|
213
|
+
|
|
214
|
+
def syncthreads(self):
|
|
215
|
+
if self.abort:
|
|
216
|
+
raise RuntimeError("abort flag set on syncthreads call")
|
|
217
|
+
|
|
218
|
+
self.syncthreads_blocked = True
|
|
219
|
+
self.syncthreads_event.wait()
|
|
220
|
+
self.syncthreads_event.clear()
|
|
221
|
+
|
|
222
|
+
if self.abort:
|
|
223
|
+
raise RuntimeError("abort flag set on syncthreads clear")
|
|
224
|
+
|
|
225
|
+
def syncthreads_count(self, value):
|
|
226
|
+
idx = self.threadIdx.x, self.threadIdx.y, self.threadIdx.z
|
|
227
|
+
self._manager.block_state[idx] = value
|
|
228
|
+
self.syncthreads()
|
|
229
|
+
count = np.count_nonzero(self._manager.block_state)
|
|
230
|
+
self.syncthreads()
|
|
231
|
+
return count
|
|
232
|
+
|
|
233
|
+
def syncthreads_and(self, value):
|
|
234
|
+
idx = self.threadIdx.x, self.threadIdx.y, self.threadIdx.z
|
|
235
|
+
self._manager.block_state[idx] = value
|
|
236
|
+
self.syncthreads()
|
|
237
|
+
test = np.all(self._manager.block_state)
|
|
238
|
+
self.syncthreads()
|
|
239
|
+
return 1 if test else 0
|
|
240
|
+
|
|
241
|
+
def syncthreads_or(self, value):
|
|
242
|
+
idx = self.threadIdx.x, self.threadIdx.y, self.threadIdx.z
|
|
243
|
+
self._manager.block_state[idx] = value
|
|
244
|
+
self.syncthreads()
|
|
245
|
+
test = np.any(self._manager.block_state)
|
|
246
|
+
self.syncthreads()
|
|
247
|
+
return 1 if test else 0
|
|
248
|
+
|
|
249
|
+
def __str__(self):
|
|
250
|
+
return "Thread <<<%s, %s>>>" % (self.blockIdx, self.threadIdx)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
class BlockManager(object):
|
|
254
|
+
"""
|
|
255
|
+
Manages the execution of a thread block.
|
|
256
|
+
|
|
257
|
+
When run() is called, all threads are started. Each thread executes until it
|
|
258
|
+
hits syncthreads(), at which point it sets its own syncthreads_blocked to
|
|
259
|
+
True so that the BlockManager knows it is blocked. It then waits on its
|
|
260
|
+
syncthreads_event.
|
|
261
|
+
|
|
262
|
+
The BlockManager polls threads to determine if they are blocked in
|
|
263
|
+
syncthreads(). If it finds a blocked thread, it adds it to the set of
|
|
264
|
+
blocked threads. When all threads are blocked, it unblocks all the threads.
|
|
265
|
+
The thread are unblocked by setting their syncthreads_blocked back to False
|
|
266
|
+
and setting their syncthreads_event.
|
|
267
|
+
|
|
268
|
+
The polling continues until no threads are alive, when execution is
|
|
269
|
+
complete.
|
|
270
|
+
"""
|
|
271
|
+
|
|
272
|
+
def __init__(self, f, grid_dim, block_dim, debug):
|
|
273
|
+
self._grid_dim = grid_dim
|
|
274
|
+
self._block_dim = block_dim
|
|
275
|
+
self._f = f
|
|
276
|
+
self._debug = debug
|
|
277
|
+
self.block_state = np.zeros(block_dim, dtype=np.bool_)
|
|
278
|
+
|
|
279
|
+
def run(self, grid_point, *args):
|
|
280
|
+
# Create all threads
|
|
281
|
+
threads = set()
|
|
282
|
+
livethreads = set()
|
|
283
|
+
blockedthreads = set()
|
|
284
|
+
for block_point in np.ndindex(*self._block_dim):
|
|
285
|
+
|
|
286
|
+
def target():
|
|
287
|
+
self._f(*args)
|
|
288
|
+
|
|
289
|
+
t = BlockThread(target, self, grid_point, block_point, self._debug)
|
|
290
|
+
t.start()
|
|
291
|
+
threads.add(t)
|
|
292
|
+
livethreads.add(t)
|
|
293
|
+
|
|
294
|
+
# Potential optimisations:
|
|
295
|
+
# 1. Continue the while loop immediately after finding a blocked thread
|
|
296
|
+
# 2. Don't poll already-blocked threads
|
|
297
|
+
while livethreads:
|
|
298
|
+
for t in livethreads:
|
|
299
|
+
if t.syncthreads_blocked:
|
|
300
|
+
blockedthreads.add(t)
|
|
301
|
+
elif t.exception:
|
|
302
|
+
# Abort all other simulator threads on exception,
|
|
303
|
+
# do *not* join immediately to facilitate debugging.
|
|
304
|
+
for t_other in threads:
|
|
305
|
+
t_other.abort = True
|
|
306
|
+
t_other.syncthreads_blocked = False
|
|
307
|
+
t_other.syncthreads_event.set()
|
|
308
|
+
|
|
309
|
+
raise t.exception[0].with_traceback(t.exception[1])
|
|
310
|
+
if livethreads == blockedthreads:
|
|
311
|
+
for t in blockedthreads:
|
|
312
|
+
t.syncthreads_blocked = False
|
|
313
|
+
t.syncthreads_event.set()
|
|
314
|
+
blockedthreads = set()
|
|
315
|
+
livethreads = set([t for t in livethreads if t.is_alive()])
|
|
316
|
+
# Final check for exceptions in case any were set prior to thread
|
|
317
|
+
# finishing, before we could check it
|
|
318
|
+
for t in threads:
|
|
319
|
+
if t.exception:
|
|
320
|
+
raise t.exception[0].with_traceback(t.exception[1])
|