numba-cuda 0.22.1__cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +4 -0
- _numba_cuda_redirector.py +89 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +6 -0
- numba_cuda/_version.py +11 -0
- numba_cuda/numba/cuda/__init__.py +70 -0
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
- numba_cuda/numba/cuda/api.py +580 -0
- numba_cuda/numba/cuda/api_util.py +76 -0
- numba_cuda/numba/cuda/args.py +72 -0
- numba_cuda/numba/cuda/bf16.py +397 -0
- numba_cuda/numba/cuda/cache_hints.py +287 -0
- numba_cuda/numba/cuda/cext/__init__.py +2 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpython-311-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpython-311-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
- numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
- numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
- numba_cuda/numba/cuda/cext/_helperlib.cpython-311-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpython-311-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
- numba_cuda/numba/cuda/cext/_typeof.h +19 -0
- numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
- numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
- numba_cuda/numba/cuda/cext/mviewbuf.cpython-311-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
- numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
- numba_cuda/numba/cuda/cg.py +67 -0
- numba_cuda/numba/cuda/cgutils.py +1294 -0
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +541 -0
- numba_cuda/numba/cuda/compiler.py +1396 -0
- numba_cuda/numba/cuda/core/analysis.py +758 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
- numba_cuda/numba/cuda/core/base.py +1332 -0
- numba_cuda/numba/cuda/core/boxing.py +1411 -0
- numba_cuda/numba/cuda/core/bytecode.py +728 -0
- numba_cuda/numba/cuda/core/byteflow.py +2346 -0
- numba_cuda/numba/cuda/core/caching.py +744 -0
- numba_cuda/numba/cuda/core/callconv.py +392 -0
- numba_cuda/numba/cuda/core/codegen.py +171 -0
- numba_cuda/numba/cuda/core/compiler.py +199 -0
- numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +650 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/controlflow.py +989 -0
- numba_cuda/numba/cuda/core/cuda_errors.py +917 -0
- numba_cuda/numba/cuda/core/entrypoints.py +57 -0
- numba_cuda/numba/cuda/core/environment.py +66 -0
- numba_cuda/numba/cuda/core/errors.py +9 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/generators.py +387 -0
- numba_cuda/numba/cuda/core/imputils.py +509 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
- numba_cuda/numba/cuda/core/interpreter.py +3617 -0
- numba_cuda/numba/cuda/core/ir.py +1812 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
- numba_cuda/numba/cuda/core/optional.py +129 -0
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
- numba_cuda/numba/cuda/core/registry.py +46 -0
- numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
- numba_cuda/numba/cuda/core/sigutils.py +68 -0
- numba_cuda/numba/cuda/core/ssa.py +498 -0
- numba_cuda/numba/cuda/core/targetconfig.py +330 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +956 -0
- numba_cuda/numba/cuda/core/typed_passes.py +867 -0
- numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
- numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
- numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
- numba_cuda/numba/cuda/cpython/iterators.py +167 -0
- numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
- numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
- numba_cuda/numba/cuda/cpython/slicing.py +322 -0
- numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
- numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
- numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
- numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
- numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
- numba_cuda/numba/cuda/cuda_paths.py +691 -0
- numba_cuda/numba/cuda/cudadecl.py +543 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
- numba_cuda/numba/cuda/cudadrv/error.py +48 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
- numba_cuda/numba/cuda/cudaimpl.py +983 -0
- numba_cuda/numba/cuda/cudamath.py +149 -0
- numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
- numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
- numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
- numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
- numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
- numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
- numba_cuda/numba/cuda/datamodel/manager.py +11 -0
- numba_cuda/numba/cuda/datamodel/models.py +9 -0
- numba_cuda/numba/cuda/datamodel/packer.py +9 -0
- numba_cuda/numba/cuda/datamodel/registry.py +11 -0
- numba_cuda/numba/cuda/datamodel/testing.py +11 -0
- numba_cuda/numba/cuda/debuginfo.py +997 -0
- numba_cuda/numba/cuda/decorators.py +294 -0
- numba_cuda/numba/cuda/descriptor.py +35 -0
- numba_cuda/numba/cuda/device_init.py +155 -0
- numba_cuda/numba/cuda/deviceufunc.py +1021 -0
- numba_cuda/numba/cuda/dispatcher.py +2463 -0
- numba_cuda/numba/cuda/errors.py +72 -0
- numba_cuda/numba/cuda/extending.py +697 -0
- numba_cuda/numba/cuda/flags.py +178 -0
- numba_cuda/numba/cuda/fp16.py +357 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +24 -0
- numba_cuda/numba/cuda/intrinsics.py +531 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +265 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3386 -0
- numba_cuda/numba/cuda/libdevicedecl.py +20 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
- numba_cuda/numba/cuda/locks.py +19 -0
- numba_cuda/numba/cuda/lowering.py +1980 -0
- numba_cuda/numba/cuda/mathimpl.py +374 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
- numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
- numba_cuda/numba/cuda/misc/appdirs.py +594 -0
- numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
- numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
- numba_cuda/numba/cuda/misc/dump_style.py +41 -0
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
- numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
- numba_cuda/numba/cuda/misc/literal.py +28 -0
- numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
- numba_cuda/numba/cuda/misc/special.py +94 -0
- numba_cuda/numba/cuda/models.py +56 -0
- numba_cuda/numba/cuda/np/arraymath.py +5130 -0
- numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
- numba_cuda/numba/cuda/np/extensions.py +11 -0
- numba_cuda/numba/cuda/np/linalg.py +3087 -0
- numba_cuda/numba/cuda/np/math/__init__.py +0 -0
- numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
- numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
- numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
- numba_cuda/numba/cuda/np/npdatetime.py +969 -0
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
- numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
- numba_cuda/numba/cuda/np/numpy_support.py +798 -0
- numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
- numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
- numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
- numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
- numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
- numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
- numba_cuda/numba/cuda/nvvmutils.py +254 -0
- numba_cuda/numba/cuda/printimpl.py +126 -0
- numba_cuda/numba/cuda/random.py +308 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
- numba_cuda/numba/cuda/serialize.py +267 -0
- numba_cuda/numba/cuda/simulator/__init__.py +63 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +179 -0
- numba_cuda/numba/cuda/simulator/bf16.py +4 -0
- numba_cuda/numba/cuda/simulator/compiler.py +38 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
- numba_cuda/numba/cuda/simulator/kernel.py +320 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
- numba_cuda/numba/cuda/simulator/reduction.py +19 -0
- numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
- numba_cuda/numba/cuda/simulator_init.py +18 -0
- numba_cuda/numba/cuda/stubs.py +624 -0
- numba_cuda/numba/cuda/target.py +505 -0
- numba_cuda/numba/cuda/testing.py +347 -0
- numba_cuda/numba/cuda/tests/__init__.py +62 -0
- numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +360 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
- numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
- numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
- numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
- numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
- numba_cuda/numba/cuda/tests/data/error.cu +12 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
- numba_cuda/numba/cuda/tests/support.py +900 -0
- numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
- numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
- numba_cuda/numba/cuda/typeconv/rules.py +63 -0
- numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
- numba_cuda/numba/cuda/types/__init__.py +233 -0
- numba_cuda/numba/cuda/types/__init__.pyi +167 -0
- numba_cuda/numba/cuda/types/abstract.py +9 -0
- numba_cuda/numba/cuda/types/common.py +9 -0
- numba_cuda/numba/cuda/types/containers.py +9 -0
- numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
- numba_cuda/numba/cuda/types/cuda_common.py +110 -0
- numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
- numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
- numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
- numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
- numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
- numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
- numba_cuda/numba/cuda/types/ext_types.py +101 -0
- numba_cuda/numba/cuda/types/function_type.py +11 -0
- numba_cuda/numba/cuda/types/functions.py +9 -0
- numba_cuda/numba/cuda/types/iterators.py +9 -0
- numba_cuda/numba/cuda/types/misc.py +9 -0
- numba_cuda/numba/cuda/types/npytypes.py +9 -0
- numba_cuda/numba/cuda/types/scalars.py +9 -0
- numba_cuda/numba/cuda/typing/__init__.py +19 -0
- numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
- numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
- numba_cuda/numba/cuda/typing/bufproto.py +70 -0
- numba_cuda/numba/cuda/typing/builtins.py +1209 -0
- numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
- numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
- numba_cuda/numba/cuda/typing/collections.py +138 -0
- numba_cuda/numba/cuda/typing/context.py +782 -0
- numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
- numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/listdecl.py +147 -0
- numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
- numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
- numba_cuda/numba/cuda/typing/npydecl.py +749 -0
- numba_cuda/numba/cuda/typing/setdecl.py +115 -0
- numba_cuda/numba/cuda/typing/templates.py +1446 -0
- numba_cuda/numba/cuda/typing/typeof.py +301 -0
- numba_cuda/numba/cuda/ufuncs.py +746 -0
- numba_cuda/numba/cuda/utils.py +724 -0
- numba_cuda/numba/cuda/vector_types.py +214 -0
- numba_cuda/numba/cuda/vectorizers.py +260 -0
- numba_cuda-0.22.1.dist-info/METADATA +109 -0
- numba_cuda-0.22.1.dist-info/RECORD +488 -0
- numba_cuda-0.22.1.dist-info/WHEEL +6 -0
- numba_cuda-0.22.1.dist-info/licenses/LICENSE +26 -0
- numba_cuda-0.22.1.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda-0.22.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import operator
|
|
5
|
+
|
|
6
|
+
from numba.cuda import types, typing
|
|
7
|
+
from numba.cuda import cgutils
|
|
8
|
+
|
|
9
|
+
from numba.cuda.core.imputils import Registry, impl_ret_untracked
|
|
10
|
+
|
|
11
|
+
registry = Registry("optional")
|
|
12
|
+
lower_builtin = registry.lower
|
|
13
|
+
lower_cast = registry.lower_cast
|
|
14
|
+
lower_getattr_generic = registry.lower_getattr_generic
|
|
15
|
+
lower_setattr_generic = registry.lower_setattr_generic
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def always_return_true_impl(context, builder, sig, args):
|
|
19
|
+
return cgutils.true_bit
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def always_return_false_impl(context, builder, sig, args):
|
|
23
|
+
return cgutils.false_bit
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def optional_is_none(context, builder, sig, args):
|
|
27
|
+
"""
|
|
28
|
+
Check if an Optional value is invalid
|
|
29
|
+
"""
|
|
30
|
+
[lty, rty] = sig.args
|
|
31
|
+
[lval, rval] = args
|
|
32
|
+
|
|
33
|
+
# Make sure None is on the right
|
|
34
|
+
if lty == types.none:
|
|
35
|
+
lty, rty = rty, lty
|
|
36
|
+
lval, rval = rval, lval
|
|
37
|
+
|
|
38
|
+
opt_type = lty
|
|
39
|
+
opt_val = lval
|
|
40
|
+
|
|
41
|
+
opt = context.make_helper(builder, opt_type, opt_val)
|
|
42
|
+
res = builder.not_(cgutils.as_bool_bit(builder, opt.valid))
|
|
43
|
+
return impl_ret_untracked(context, builder, sig.return_type, res)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# None is/not None
|
|
47
|
+
lower_builtin(operator.is_, types.none, types.none)(always_return_true_impl)
|
|
48
|
+
|
|
49
|
+
# Optional is None
|
|
50
|
+
lower_builtin(operator.is_, types.Optional, types.none)(optional_is_none)
|
|
51
|
+
lower_builtin(operator.is_, types.none, types.Optional)(optional_is_none)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@lower_getattr_generic(types.Optional)
|
|
55
|
+
def optional_getattr(context, builder, typ, value, attr):
|
|
56
|
+
"""
|
|
57
|
+
Optional.__getattr__ => redirect to the wrapped type.
|
|
58
|
+
"""
|
|
59
|
+
inner_type = typ.type
|
|
60
|
+
val = context.cast(builder, value, typ, inner_type)
|
|
61
|
+
imp = context.get_getattr(inner_type, attr)
|
|
62
|
+
return imp(context, builder, inner_type, val, attr)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@lower_setattr_generic(types.Optional)
|
|
66
|
+
def optional_setattr(context, builder, sig, args, attr):
|
|
67
|
+
"""
|
|
68
|
+
Optional.__setattr__ => redirect to the wrapped type.
|
|
69
|
+
"""
|
|
70
|
+
basety, valty = sig.args
|
|
71
|
+
target, val = args
|
|
72
|
+
target_type = basety.type
|
|
73
|
+
target = context.cast(builder, target, basety, target_type)
|
|
74
|
+
|
|
75
|
+
newsig = typing.signature(sig.return_type, target_type, valty)
|
|
76
|
+
imp = context.get_setattr(attr, newsig)
|
|
77
|
+
return imp(builder, (target, val))
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@lower_cast(types.Optional, types.Optional)
|
|
81
|
+
def optional_to_optional(context, builder, fromty, toty, val):
|
|
82
|
+
"""
|
|
83
|
+
The handling of optional->optional cast must be special cased for
|
|
84
|
+
correct propagation of None value. Given type T and U. casting of
|
|
85
|
+
T? to U? (? denotes optional) should always succeed. If the from-value
|
|
86
|
+
is None, the None value the casted value (U?) should be None; otherwise,
|
|
87
|
+
the from-value is casted to U. This is different from casting T? to U,
|
|
88
|
+
which requires the from-value must not be None.
|
|
89
|
+
"""
|
|
90
|
+
optval = context.make_helper(builder, fromty, value=val)
|
|
91
|
+
validbit = cgutils.as_bool_bit(builder, optval.valid)
|
|
92
|
+
# Create uninitialized optional value
|
|
93
|
+
outoptval = context.make_helper(builder, toty)
|
|
94
|
+
|
|
95
|
+
with builder.if_else(validbit) as (is_valid, is_not_valid):
|
|
96
|
+
with is_valid:
|
|
97
|
+
# Cast internal value
|
|
98
|
+
outoptval.valid = cgutils.true_bit
|
|
99
|
+
outoptval.data = context.cast(
|
|
100
|
+
builder, optval.data, fromty.type, toty.type
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
with is_not_valid:
|
|
104
|
+
# Store None to result
|
|
105
|
+
outoptval.valid = cgutils.false_bit
|
|
106
|
+
outoptval.data = cgutils.get_null_value(outoptval.data.type)
|
|
107
|
+
|
|
108
|
+
return outoptval._getvalue()
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@lower_cast(types.Any, types.Optional)
|
|
112
|
+
def any_to_optional(context, builder, fromty, toty, val):
|
|
113
|
+
if fromty == types.none:
|
|
114
|
+
return context.make_optional_none(builder, toty.type)
|
|
115
|
+
else:
|
|
116
|
+
val = context.cast(builder, val, fromty, toty.type)
|
|
117
|
+
return context.make_optional_value(builder, toty.type, val)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@lower_cast(types.Optional, types.Any)
|
|
121
|
+
@lower_cast(types.Optional, types.Boolean)
|
|
122
|
+
def optional_to_any(context, builder, fromty, toty, val):
|
|
123
|
+
optval = context.make_helper(builder, fromty, value=val)
|
|
124
|
+
validbit = cgutils.as_bool_bit(builder, optval.valid)
|
|
125
|
+
with builder.if_then(builder.not_(validbit), likely=False):
|
|
126
|
+
msg = "expected %s, got None" % (fromty.type,)
|
|
127
|
+
context.call_conv.return_user_exc(builder, TypeError, (msg,))
|
|
128
|
+
|
|
129
|
+
return context.cast(builder, optval.data, fromty.type, toty)
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Defines CUDA Options for use in the CUDA target
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from abc import ABCMeta, abstractmethod
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class AbstractOptionValue(metaclass=ABCMeta):
|
|
12
|
+
"""Abstract base class for custom option values."""
|
|
13
|
+
|
|
14
|
+
@abstractmethod
|
|
15
|
+
def encode(self) -> str:
|
|
16
|
+
"""Returns an encoding of the values"""
|
|
17
|
+
...
|
|
18
|
+
|
|
19
|
+
def __repr__(self) -> str:
|
|
20
|
+
return f"{self.__class__.__name__}({self.encode()})"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class FastMathOptions(AbstractOptionValue):
|
|
24
|
+
"""
|
|
25
|
+
Options for controlling fast math optimization.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, value):
|
|
29
|
+
# https://releases.llvm.org/7.0.0/docs/LangRef.html#fast-math-flags
|
|
30
|
+
valid_flags = {
|
|
31
|
+
"fast",
|
|
32
|
+
"nnan",
|
|
33
|
+
"ninf",
|
|
34
|
+
"nsz",
|
|
35
|
+
"arcp",
|
|
36
|
+
"contract",
|
|
37
|
+
"afn",
|
|
38
|
+
"reassoc",
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if isinstance(value, FastMathOptions):
|
|
42
|
+
self.flags = value.flags.copy()
|
|
43
|
+
elif value is True:
|
|
44
|
+
self.flags = {"fast"}
|
|
45
|
+
elif value is False:
|
|
46
|
+
self.flags = set()
|
|
47
|
+
elif isinstance(value, set):
|
|
48
|
+
invalid = value - valid_flags
|
|
49
|
+
if invalid:
|
|
50
|
+
raise ValueError("Unrecognized fastmath flags: %s" % invalid)
|
|
51
|
+
self.flags = value
|
|
52
|
+
elif isinstance(value, dict):
|
|
53
|
+
invalid = set(value.keys()) - valid_flags
|
|
54
|
+
if invalid:
|
|
55
|
+
raise ValueError("Unrecognized fastmath flags: %s" % invalid)
|
|
56
|
+
self.flags = {v for v, enable in value.items() if enable}
|
|
57
|
+
else:
|
|
58
|
+
msg = "Expected fastmath option(s) to be either a bool, dict or set"
|
|
59
|
+
raise ValueError(msg)
|
|
60
|
+
|
|
61
|
+
def __bool__(self):
|
|
62
|
+
return bool(self.flags)
|
|
63
|
+
|
|
64
|
+
__nonzero__ = __bool__
|
|
65
|
+
|
|
66
|
+
def encode(self) -> str:
|
|
67
|
+
return str(self.flags)
|
|
68
|
+
|
|
69
|
+
def __eq__(self, other):
|
|
70
|
+
if type(other) is type(self):
|
|
71
|
+
return self.flags == other.flags
|
|
72
|
+
return NotImplemented
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class ParallelOptions(AbstractOptionValue):
|
|
76
|
+
"""
|
|
77
|
+
Options for controlling auto parallelization.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
__slots__ = (
|
|
81
|
+
"enabled",
|
|
82
|
+
"comprehension",
|
|
83
|
+
"reduction",
|
|
84
|
+
"inplace_binop",
|
|
85
|
+
"setitem",
|
|
86
|
+
"numpy",
|
|
87
|
+
"stencil",
|
|
88
|
+
"fusion",
|
|
89
|
+
"prange",
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
def __init__(self, value):
|
|
93
|
+
if isinstance(value, bool):
|
|
94
|
+
self.enabled = value
|
|
95
|
+
self.comprehension = value
|
|
96
|
+
self.reduction = value
|
|
97
|
+
self.inplace_binop = value
|
|
98
|
+
self.setitem = value
|
|
99
|
+
self.numpy = value
|
|
100
|
+
self.stencil = value
|
|
101
|
+
self.fusion = value
|
|
102
|
+
self.prange = value
|
|
103
|
+
elif isinstance(value, dict):
|
|
104
|
+
self.enabled = True
|
|
105
|
+
self.comprehension = value.pop("comprehension", True)
|
|
106
|
+
self.reduction = value.pop("reduction", True)
|
|
107
|
+
self.inplace_binop = value.pop("inplace_binop", True)
|
|
108
|
+
self.setitem = value.pop("setitem", True)
|
|
109
|
+
self.numpy = value.pop("numpy", True)
|
|
110
|
+
self.stencil = value.pop("stencil", True)
|
|
111
|
+
self.fusion = value.pop("fusion", True)
|
|
112
|
+
self.prange = value.pop("prange", True)
|
|
113
|
+
if value:
|
|
114
|
+
msg = "Unrecognized parallel options: %s" % value.keys()
|
|
115
|
+
raise NameError(msg)
|
|
116
|
+
elif isinstance(value, ParallelOptions):
|
|
117
|
+
self.enabled = value.enabled
|
|
118
|
+
self.comprehension = value.comprehension
|
|
119
|
+
self.reduction = value.reduction
|
|
120
|
+
self.inplace_binop = value.inplace_binop
|
|
121
|
+
self.setitem = value.setitem
|
|
122
|
+
self.numpy = value.numpy
|
|
123
|
+
self.stencil = value.stencil
|
|
124
|
+
self.fusion = value.fusion
|
|
125
|
+
self.prange = value.prange
|
|
126
|
+
else:
|
|
127
|
+
msg = "Expect parallel option to be either a bool or a dict"
|
|
128
|
+
raise ValueError(msg)
|
|
129
|
+
|
|
130
|
+
def _get_values(self):
|
|
131
|
+
"""Get values as dictionary."""
|
|
132
|
+
return {k: getattr(self, k) for k in self.__slots__}
|
|
133
|
+
|
|
134
|
+
def __eq__(self, other):
|
|
135
|
+
if type(other) is type(self):
|
|
136
|
+
return self._get_values() == other._get_values()
|
|
137
|
+
return NotImplemented
|
|
138
|
+
|
|
139
|
+
def encode(self) -> str:
|
|
140
|
+
return ", ".join(f"{k}={v}" for k, v in self._get_values().items())
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class InlineOptions(AbstractOptionValue):
|
|
144
|
+
"""
|
|
145
|
+
Options for controlling inlining
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
def __init__(self, value):
|
|
149
|
+
ok = False
|
|
150
|
+
if isinstance(value, str):
|
|
151
|
+
if value in ("always", "never"):
|
|
152
|
+
ok = True
|
|
153
|
+
else:
|
|
154
|
+
ok = hasattr(value, "__call__")
|
|
155
|
+
|
|
156
|
+
if ok:
|
|
157
|
+
self._inline = value
|
|
158
|
+
else:
|
|
159
|
+
msg = (
|
|
160
|
+
"kwarg 'inline' must be one of the strings 'always' or "
|
|
161
|
+
"'never', or it can be a callable that returns True/False. "
|
|
162
|
+
"Found value %s" % value
|
|
163
|
+
)
|
|
164
|
+
raise ValueError(msg)
|
|
165
|
+
|
|
166
|
+
@property
|
|
167
|
+
def is_never_inline(self):
|
|
168
|
+
"""
|
|
169
|
+
True if never inline
|
|
170
|
+
"""
|
|
171
|
+
return self._inline == "never"
|
|
172
|
+
|
|
173
|
+
@property
|
|
174
|
+
def is_always_inline(self):
|
|
175
|
+
"""
|
|
176
|
+
True if always inline
|
|
177
|
+
"""
|
|
178
|
+
return self._inline == "always"
|
|
179
|
+
|
|
180
|
+
@property
|
|
181
|
+
def has_cost_model(self):
|
|
182
|
+
"""
|
|
183
|
+
True if a cost model is provided
|
|
184
|
+
"""
|
|
185
|
+
return not (self.is_always_inline or self.is_never_inline)
|
|
186
|
+
|
|
187
|
+
@property
|
|
188
|
+
def value(self):
|
|
189
|
+
"""
|
|
190
|
+
The raw value
|
|
191
|
+
"""
|
|
192
|
+
return self._inline
|
|
193
|
+
|
|
194
|
+
def __eq__(self, other):
|
|
195
|
+
if type(other) is type(self):
|
|
196
|
+
return self.value == other.value
|
|
197
|
+
return NotImplemented
|
|
198
|
+
|
|
199
|
+
def encode(self) -> str:
|
|
200
|
+
return repr(self._inline)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class TargetOptions:
|
|
204
|
+
"""Target options maps user options from decorators to the
|
|
205
|
+
``numba.cuda.core.compiler.Flags`` used by lowering and target context.
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
class Mapping:
|
|
209
|
+
def __init__(self, flag_name, apply=lambda x: x):
|
|
210
|
+
self.flag_name = flag_name
|
|
211
|
+
self.apply = apply
|
|
212
|
+
|
|
213
|
+
def finalize(self, flags, options):
|
|
214
|
+
"""Subclasses can override this method to make target specific
|
|
215
|
+
customizations of default flags.
|
|
216
|
+
|
|
217
|
+
Parameters
|
|
218
|
+
----------
|
|
219
|
+
flags : Flags
|
|
220
|
+
options : dict
|
|
221
|
+
"""
|
|
222
|
+
pass
|
|
223
|
+
|
|
224
|
+
@classmethod
|
|
225
|
+
def parse_as_flags(cls, flags, options):
|
|
226
|
+
"""Parse target options defined in ``options`` and set ``flags``
|
|
227
|
+
accordingly.
|
|
228
|
+
|
|
229
|
+
Parameters
|
|
230
|
+
----------
|
|
231
|
+
flags : Flags
|
|
232
|
+
options : dict
|
|
233
|
+
"""
|
|
234
|
+
opt = cls()
|
|
235
|
+
opt._apply(flags, options)
|
|
236
|
+
opt.finalize(flags, options)
|
|
237
|
+
return flags
|
|
238
|
+
|
|
239
|
+
def _apply(self, flags, options):
|
|
240
|
+
# Find all Mapping instances in the class
|
|
241
|
+
mappings = {}
|
|
242
|
+
cls = type(self)
|
|
243
|
+
for k in dir(cls):
|
|
244
|
+
v = getattr(cls, k)
|
|
245
|
+
if isinstance(v, cls.Mapping):
|
|
246
|
+
mappings[k] = v
|
|
247
|
+
|
|
248
|
+
used = set()
|
|
249
|
+
for k, mapping in mappings.items():
|
|
250
|
+
if k in options:
|
|
251
|
+
v = mapping.apply(options[k])
|
|
252
|
+
setattr(flags, mapping.flag_name, v)
|
|
253
|
+
used.add(k)
|
|
254
|
+
|
|
255
|
+
unused = set(options) - used
|
|
256
|
+
if unused:
|
|
257
|
+
# Unread options?
|
|
258
|
+
m = (
|
|
259
|
+
f"Unrecognized options: {unused}. "
|
|
260
|
+
f"Known options are {mappings.keys()}"
|
|
261
|
+
)
|
|
262
|
+
raise KeyError(m)
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
from functools import cached_property
|
|
4
|
+
from numba.cuda.core import analysis, ir
|
|
5
|
+
from numba.cuda.core import ir_utils, transforms
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class YieldPoint(object):
|
|
9
|
+
def __init__(self, block, inst):
|
|
10
|
+
assert isinstance(block, ir.Block)
|
|
11
|
+
assert isinstance(inst, ir.Yield)
|
|
12
|
+
self.block = block
|
|
13
|
+
self.inst = inst
|
|
14
|
+
self.live_vars = None
|
|
15
|
+
self.weak_live_vars = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class GeneratorInfo(object):
|
|
19
|
+
def __init__(self):
|
|
20
|
+
# { index: YieldPoint }
|
|
21
|
+
self.yield_points = {}
|
|
22
|
+
# Ordered list of variable names
|
|
23
|
+
self.state_vars = []
|
|
24
|
+
|
|
25
|
+
def get_yield_points(self):
|
|
26
|
+
"""
|
|
27
|
+
Return an iterable of YieldPoint instances.
|
|
28
|
+
"""
|
|
29
|
+
return self.yield_points.values()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class VariableLifetime(object):
|
|
33
|
+
"""
|
|
34
|
+
For lazily building information of variable lifetime
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, blocks):
|
|
38
|
+
self._blocks = blocks
|
|
39
|
+
|
|
40
|
+
@cached_property
|
|
41
|
+
def cfg(self):
|
|
42
|
+
return analysis.compute_cfg_from_blocks(self._blocks)
|
|
43
|
+
|
|
44
|
+
@cached_property
|
|
45
|
+
def usedefs(self):
|
|
46
|
+
return analysis.compute_use_defs(self._blocks)
|
|
47
|
+
|
|
48
|
+
@cached_property
|
|
49
|
+
def livemap(self):
|
|
50
|
+
return analysis.compute_live_map(
|
|
51
|
+
self.cfg, self._blocks, self.usedefs.usemap, self.usedefs.defmap
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
@cached_property
|
|
55
|
+
def deadmaps(self):
|
|
56
|
+
return analysis.compute_dead_maps(
|
|
57
|
+
self.cfg, self._blocks, self.livemap, self.usedefs.defmap
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# other packages that define new nodes add calls for inserting dels
|
|
62
|
+
# format: {type:function}
|
|
63
|
+
ir_extension_insert_dels = {}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class PostProcessor(object):
|
|
67
|
+
"""
|
|
68
|
+
A post-processor for Numba IR.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
def __init__(self, func_ir):
|
|
72
|
+
self.func_ir = func_ir
|
|
73
|
+
|
|
74
|
+
def run(self, emit_dels: bool = False, extend_lifetimes: bool = False):
|
|
75
|
+
"""
|
|
76
|
+
Run the following passes over Numba IR:
|
|
77
|
+
- canonicalize the CFG
|
|
78
|
+
- emit explicit `del` instructions for variables
|
|
79
|
+
- compute lifetime of variables
|
|
80
|
+
- compute generator info (if function is a generator function)
|
|
81
|
+
"""
|
|
82
|
+
self.func_ir.blocks = transforms.canonicalize_cfg(self.func_ir.blocks)
|
|
83
|
+
vlt = VariableLifetime(self.func_ir.blocks)
|
|
84
|
+
self.func_ir.variable_lifetime = vlt
|
|
85
|
+
|
|
86
|
+
bev = analysis.compute_live_variables(
|
|
87
|
+
vlt.cfg,
|
|
88
|
+
self.func_ir.blocks,
|
|
89
|
+
vlt.usedefs.defmap,
|
|
90
|
+
vlt.deadmaps.combined,
|
|
91
|
+
)
|
|
92
|
+
for offset, ir_block in self.func_ir.blocks.items():
|
|
93
|
+
self.func_ir.block_entry_vars[ir_block] = bev[offset]
|
|
94
|
+
|
|
95
|
+
if self.func_ir.is_generator:
|
|
96
|
+
self.func_ir.generator_info = GeneratorInfo()
|
|
97
|
+
self._compute_generator_info()
|
|
98
|
+
else:
|
|
99
|
+
self.func_ir.generator_info = None
|
|
100
|
+
|
|
101
|
+
# Emit del nodes, do this last as the generator info parsing generates
|
|
102
|
+
# and then strips dels as part of its analysis.
|
|
103
|
+
if emit_dels:
|
|
104
|
+
self._insert_var_dels(extend_lifetimes=extend_lifetimes)
|
|
105
|
+
|
|
106
|
+
def _populate_generator_info(self):
|
|
107
|
+
"""
|
|
108
|
+
Fill `index` for the Yield instruction and create YieldPoints.
|
|
109
|
+
"""
|
|
110
|
+
dct = self.func_ir.generator_info.yield_points
|
|
111
|
+
assert not dct, "rerunning _populate_generator_info"
|
|
112
|
+
for block in self.func_ir.blocks.values():
|
|
113
|
+
for inst in block.body:
|
|
114
|
+
if isinstance(inst, ir.Assign):
|
|
115
|
+
yieldinst = inst.value
|
|
116
|
+
if isinstance(yieldinst, ir.Yield):
|
|
117
|
+
index = len(dct) + 1
|
|
118
|
+
yieldinst.index = index
|
|
119
|
+
yp = YieldPoint(block, yieldinst)
|
|
120
|
+
dct[yieldinst.index] = yp
|
|
121
|
+
|
|
122
|
+
def _compute_generator_info(self):
|
|
123
|
+
"""
|
|
124
|
+
Compute the generator's state variables as the union of live variables
|
|
125
|
+
at all yield points.
|
|
126
|
+
"""
|
|
127
|
+
# generate del info, it's used in analysis here, strip it out at the end
|
|
128
|
+
self._insert_var_dels()
|
|
129
|
+
self._populate_generator_info()
|
|
130
|
+
gi = self.func_ir.generator_info
|
|
131
|
+
for yp in gi.get_yield_points():
|
|
132
|
+
live_vars = set(self.func_ir.get_block_entry_vars(yp.block))
|
|
133
|
+
weak_live_vars = set()
|
|
134
|
+
stmts = iter(yp.block.body)
|
|
135
|
+
for stmt in stmts:
|
|
136
|
+
if isinstance(stmt, ir.Assign):
|
|
137
|
+
if stmt.value is yp.inst:
|
|
138
|
+
break
|
|
139
|
+
live_vars.add(stmt.target.name)
|
|
140
|
+
elif isinstance(stmt, ir.Del):
|
|
141
|
+
live_vars.remove(stmt.value)
|
|
142
|
+
else:
|
|
143
|
+
assert 0, "couldn't find yield point"
|
|
144
|
+
# Try to optimize out any live vars that are deleted immediately
|
|
145
|
+
# after the yield point.
|
|
146
|
+
for stmt in stmts:
|
|
147
|
+
if isinstance(stmt, ir.Del):
|
|
148
|
+
name = stmt.value
|
|
149
|
+
if name in live_vars:
|
|
150
|
+
live_vars.remove(name)
|
|
151
|
+
weak_live_vars.add(name)
|
|
152
|
+
else:
|
|
153
|
+
break
|
|
154
|
+
yp.live_vars = live_vars
|
|
155
|
+
yp.weak_live_vars = weak_live_vars
|
|
156
|
+
|
|
157
|
+
st = set()
|
|
158
|
+
for yp in gi.get_yield_points():
|
|
159
|
+
st |= yp.live_vars
|
|
160
|
+
st |= yp.weak_live_vars
|
|
161
|
+
gi.state_vars = sorted(st)
|
|
162
|
+
self.remove_dels()
|
|
163
|
+
|
|
164
|
+
def _insert_var_dels(self, extend_lifetimes=False):
|
|
165
|
+
"""
|
|
166
|
+
Insert del statements for each variable.
|
|
167
|
+
Returns a 2-tuple of (variable definition map, variable deletion map)
|
|
168
|
+
which indicates variables defined and deleted in each block.
|
|
169
|
+
|
|
170
|
+
The algorithm avoids relying on explicit knowledge on loops and
|
|
171
|
+
distinguish between variables that are defined locally vs variables that
|
|
172
|
+
come from incoming blocks.
|
|
173
|
+
We start with simple usage (variable reference) and definition (variable
|
|
174
|
+
creation) maps on each block. Propagate the liveness info to predecessor
|
|
175
|
+
blocks until it stabilize, at which point we know which variables must
|
|
176
|
+
exist before entering each block. Then, we compute the end of variable
|
|
177
|
+
lives and insert del statements accordingly. Variables are deleted after
|
|
178
|
+
the last use. Variable referenced by terminators (e.g. conditional
|
|
179
|
+
branch and return) are deleted by the successors or the caller.
|
|
180
|
+
"""
|
|
181
|
+
vlt = self.func_ir.variable_lifetime
|
|
182
|
+
self._patch_var_dels(
|
|
183
|
+
vlt.deadmaps.internal,
|
|
184
|
+
vlt.deadmaps.escaping,
|
|
185
|
+
extend_lifetimes=extend_lifetimes,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
def _patch_var_dels(
|
|
189
|
+
self, internal_dead_map, escaping_dead_map, extend_lifetimes=False
|
|
190
|
+
):
|
|
191
|
+
"""
|
|
192
|
+
Insert delete in each block
|
|
193
|
+
"""
|
|
194
|
+
for offset, ir_block in self.func_ir.blocks.items():
|
|
195
|
+
# for each internal var, insert delete after the last use
|
|
196
|
+
internal_dead_set = internal_dead_map[offset].copy()
|
|
197
|
+
delete_pts = []
|
|
198
|
+
# for each statement in reverse order
|
|
199
|
+
for stmt in reversed(ir_block.body[:-1]):
|
|
200
|
+
# internal vars that are used here
|
|
201
|
+
live_set = set(v.name for v in stmt.list_vars())
|
|
202
|
+
dead_set = live_set & internal_dead_set
|
|
203
|
+
for T, def_func in ir_extension_insert_dels.items():
|
|
204
|
+
if isinstance(stmt, T):
|
|
205
|
+
done_dels = def_func(stmt, dead_set)
|
|
206
|
+
dead_set -= done_dels
|
|
207
|
+
internal_dead_set -= done_dels
|
|
208
|
+
# used here but not afterwards
|
|
209
|
+
delete_pts.append((stmt, dead_set))
|
|
210
|
+
internal_dead_set -= dead_set
|
|
211
|
+
|
|
212
|
+
# rewrite body and insert dels
|
|
213
|
+
body = []
|
|
214
|
+
lastloc = ir_block.loc
|
|
215
|
+
del_store = []
|
|
216
|
+
for stmt, delete_set in reversed(delete_pts):
|
|
217
|
+
# If using extended lifetimes then the Dels are all put at the
|
|
218
|
+
# block end just ahead of the terminator, so associate their
|
|
219
|
+
# location with the terminator.
|
|
220
|
+
if extend_lifetimes:
|
|
221
|
+
lastloc = ir_block.body[-1].loc
|
|
222
|
+
else:
|
|
223
|
+
lastloc = stmt.loc
|
|
224
|
+
# Ignore dels (assuming no user inserted deletes)
|
|
225
|
+
if not isinstance(stmt, ir.Del):
|
|
226
|
+
body.append(stmt)
|
|
227
|
+
# note: the reverse sort is not necessary for correctness
|
|
228
|
+
# it is just to minimize changes to test for now
|
|
229
|
+
for var_name in sorted(delete_set, reverse=True):
|
|
230
|
+
delnode = ir.Del(var_name, loc=lastloc)
|
|
231
|
+
if extend_lifetimes:
|
|
232
|
+
del_store.append(delnode)
|
|
233
|
+
else:
|
|
234
|
+
body.append(delnode)
|
|
235
|
+
if extend_lifetimes:
|
|
236
|
+
body.extend(del_store)
|
|
237
|
+
body.append(ir_block.body[-1]) # terminator
|
|
238
|
+
ir_block.body = body
|
|
239
|
+
|
|
240
|
+
# vars to delete at the start
|
|
241
|
+
escape_dead_set = escaping_dead_map[offset]
|
|
242
|
+
for var_name in sorted(escape_dead_set):
|
|
243
|
+
ir_block.prepend(ir.Del(var_name, loc=ir_block.body[0].loc))
|
|
244
|
+
|
|
245
|
+
def remove_dels(self):
|
|
246
|
+
"""
|
|
247
|
+
Strips the IR of Del nodes
|
|
248
|
+
"""
|
|
249
|
+
ir_utils.remove_dels(self.func_ir.blocks)
|