numba-cuda 0.22.0__cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +4 -0
- _numba_cuda_redirector.py +89 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +6 -0
- numba_cuda/_version.py +11 -0
- numba_cuda/numba/cuda/__init__.py +70 -0
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
- numba_cuda/numba/cuda/api.py +580 -0
- numba_cuda/numba/cuda/api_util.py +76 -0
- numba_cuda/numba/cuda/args.py +72 -0
- numba_cuda/numba/cuda/bf16.py +397 -0
- numba_cuda/numba/cuda/cache_hints.py +287 -0
- numba_cuda/numba/cuda/cext/__init__.py +2 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
- numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
- numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
- numba_cuda/numba/cuda/cext/_helperlib.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
- numba_cuda/numba/cuda/cext/_typeof.h +19 -0
- numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
- numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
- numba_cuda/numba/cuda/cext/mviewbuf.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
- numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
- numba_cuda/numba/cuda/cg.py +67 -0
- numba_cuda/numba/cuda/cgutils.py +1294 -0
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +541 -0
- numba_cuda/numba/cuda/compiler.py +1396 -0
- numba_cuda/numba/cuda/core/analysis.py +758 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
- numba_cuda/numba/cuda/core/base.py +1332 -0
- numba_cuda/numba/cuda/core/boxing.py +1411 -0
- numba_cuda/numba/cuda/core/bytecode.py +728 -0
- numba_cuda/numba/cuda/core/byteflow.py +2346 -0
- numba_cuda/numba/cuda/core/caching.py +744 -0
- numba_cuda/numba/cuda/core/callconv.py +392 -0
- numba_cuda/numba/cuda/core/codegen.py +171 -0
- numba_cuda/numba/cuda/core/compiler.py +199 -0
- numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +650 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/controlflow.py +989 -0
- numba_cuda/numba/cuda/core/entrypoints.py +57 -0
- numba_cuda/numba/cuda/core/environment.py +66 -0
- numba_cuda/numba/cuda/core/errors.py +917 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/generators.py +387 -0
- numba_cuda/numba/cuda/core/imputils.py +509 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
- numba_cuda/numba/cuda/core/interpreter.py +3617 -0
- numba_cuda/numba/cuda/core/ir.py +1812 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
- numba_cuda/numba/cuda/core/optional.py +129 -0
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
- numba_cuda/numba/cuda/core/registry.py +46 -0
- numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
- numba_cuda/numba/cuda/core/sigutils.py +68 -0
- numba_cuda/numba/cuda/core/ssa.py +498 -0
- numba_cuda/numba/cuda/core/targetconfig.py +330 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +956 -0
- numba_cuda/numba/cuda/core/typed_passes.py +867 -0
- numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
- numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
- numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
- numba_cuda/numba/cuda/cpython/iterators.py +167 -0
- numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
- numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
- numba_cuda/numba/cuda/cpython/slicing.py +322 -0
- numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
- numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
- numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
- numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
- numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
- numba_cuda/numba/cuda/cuda_paths.py +691 -0
- numba_cuda/numba/cuda/cudadecl.py +543 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
- numba_cuda/numba/cuda/cudadrv/error.py +48 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
- numba_cuda/numba/cuda/cudaimpl.py +983 -0
- numba_cuda/numba/cuda/cudamath.py +149 -0
- numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
- numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
- numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
- numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
- numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
- numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
- numba_cuda/numba/cuda/datamodel/manager.py +11 -0
- numba_cuda/numba/cuda/datamodel/models.py +9 -0
- numba_cuda/numba/cuda/datamodel/packer.py +9 -0
- numba_cuda/numba/cuda/datamodel/registry.py +11 -0
- numba_cuda/numba/cuda/datamodel/testing.py +11 -0
- numba_cuda/numba/cuda/debuginfo.py +997 -0
- numba_cuda/numba/cuda/decorators.py +294 -0
- numba_cuda/numba/cuda/descriptor.py +35 -0
- numba_cuda/numba/cuda/device_init.py +155 -0
- numba_cuda/numba/cuda/deviceufunc.py +1021 -0
- numba_cuda/numba/cuda/dispatcher.py +2463 -0
- numba_cuda/numba/cuda/errors.py +72 -0
- numba_cuda/numba/cuda/extending.py +697 -0
- numba_cuda/numba/cuda/flags.py +178 -0
- numba_cuda/numba/cuda/fp16.py +357 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +24 -0
- numba_cuda/numba/cuda/intrinsics.py +531 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +265 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3386 -0
- numba_cuda/numba/cuda/libdevicedecl.py +20 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
- numba_cuda/numba/cuda/locks.py +19 -0
- numba_cuda/numba/cuda/lowering.py +1980 -0
- numba_cuda/numba/cuda/mathimpl.py +374 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
- numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
- numba_cuda/numba/cuda/misc/appdirs.py +594 -0
- numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
- numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
- numba_cuda/numba/cuda/misc/dump_style.py +41 -0
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
- numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
- numba_cuda/numba/cuda/misc/literal.py +28 -0
- numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
- numba_cuda/numba/cuda/misc/special.py +94 -0
- numba_cuda/numba/cuda/models.py +56 -0
- numba_cuda/numba/cuda/np/arraymath.py +5130 -0
- numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
- numba_cuda/numba/cuda/np/extensions.py +11 -0
- numba_cuda/numba/cuda/np/linalg.py +3087 -0
- numba_cuda/numba/cuda/np/math/__init__.py +0 -0
- numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
- numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
- numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
- numba_cuda/numba/cuda/np/npdatetime.py +969 -0
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
- numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
- numba_cuda/numba/cuda/np/numpy_support.py +798 -0
- numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
- numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
- numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
- numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
- numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
- numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
- numba_cuda/numba/cuda/nvvmutils.py +254 -0
- numba_cuda/numba/cuda/printimpl.py +126 -0
- numba_cuda/numba/cuda/random.py +308 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
- numba_cuda/numba/cuda/serialize.py +267 -0
- numba_cuda/numba/cuda/simulator/__init__.py +63 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +179 -0
- numba_cuda/numba/cuda/simulator/bf16.py +4 -0
- numba_cuda/numba/cuda/simulator/compiler.py +38 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
- numba_cuda/numba/cuda/simulator/kernel.py +320 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
- numba_cuda/numba/cuda/simulator/reduction.py +19 -0
- numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
- numba_cuda/numba/cuda/simulator_init.py +18 -0
- numba_cuda/numba/cuda/stubs.py +624 -0
- numba_cuda/numba/cuda/target.py +505 -0
- numba_cuda/numba/cuda/testing.py +347 -0
- numba_cuda/numba/cuda/tests/__init__.py +62 -0
- numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
- numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
- numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
- numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
- numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
- numba_cuda/numba/cuda/tests/data/error.cu +12 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
- numba_cuda/numba/cuda/tests/support.py +900 -0
- numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
- numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
- numba_cuda/numba/cuda/typeconv/rules.py +63 -0
- numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
- numba_cuda/numba/cuda/types/__init__.py +233 -0
- numba_cuda/numba/cuda/types/__init__.pyi +167 -0
- numba_cuda/numba/cuda/types/abstract.py +9 -0
- numba_cuda/numba/cuda/types/common.py +9 -0
- numba_cuda/numba/cuda/types/containers.py +9 -0
- numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
- numba_cuda/numba/cuda/types/cuda_common.py +110 -0
- numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
- numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
- numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
- numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
- numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
- numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
- numba_cuda/numba/cuda/types/ext_types.py +101 -0
- numba_cuda/numba/cuda/types/function_type.py +11 -0
- numba_cuda/numba/cuda/types/functions.py +9 -0
- numba_cuda/numba/cuda/types/iterators.py +9 -0
- numba_cuda/numba/cuda/types/misc.py +9 -0
- numba_cuda/numba/cuda/types/npytypes.py +9 -0
- numba_cuda/numba/cuda/types/scalars.py +9 -0
- numba_cuda/numba/cuda/typing/__init__.py +19 -0
- numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
- numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
- numba_cuda/numba/cuda/typing/bufproto.py +70 -0
- numba_cuda/numba/cuda/typing/builtins.py +1209 -0
- numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
- numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
- numba_cuda/numba/cuda/typing/collections.py +138 -0
- numba_cuda/numba/cuda/typing/context.py +782 -0
- numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
- numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/listdecl.py +147 -0
- numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
- numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
- numba_cuda/numba/cuda/typing/npydecl.py +749 -0
- numba_cuda/numba/cuda/typing/setdecl.py +115 -0
- numba_cuda/numba/cuda/typing/templates.py +1446 -0
- numba_cuda/numba/cuda/typing/typeof.py +301 -0
- numba_cuda/numba/cuda/ufuncs.py +746 -0
- numba_cuda/numba/cuda/utils.py +724 -0
- numba_cuda/numba/cuda/vector_types.py +214 -0
- numba_cuda/numba/cuda/vectorizers.py +260 -0
- numba_cuda-0.22.0.dist-info/METADATA +109 -0
- numba_cuda-0.22.0.dist-info/RECORD +487 -0
- numba_cuda-0.22.0.dist-info/WHEEL +6 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,412 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import re
|
|
5
|
+
import operator
|
|
6
|
+
import heapq
|
|
7
|
+
from collections import namedtuple
|
|
8
|
+
from collections.abc import Sequence
|
|
9
|
+
from contextlib import contextmanager
|
|
10
|
+
from functools import cached_property
|
|
11
|
+
|
|
12
|
+
from numba.cuda import config
|
|
13
|
+
|
|
14
|
+
import llvmlite.binding as llvm
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class RecordLLVMPassTimings:
|
|
18
|
+
"""A helper context manager to track LLVM pass timings."""
|
|
19
|
+
|
|
20
|
+
__slots__ = ["_data"]
|
|
21
|
+
|
|
22
|
+
def __enter__(self):
|
|
23
|
+
"""Enables the pass timing in LLVM."""
|
|
24
|
+
llvm.set_time_passes(True)
|
|
25
|
+
return self
|
|
26
|
+
|
|
27
|
+
def __exit__(self, exc_val, exc_type, exc_tb):
|
|
28
|
+
"""Reset timings and save report internally."""
|
|
29
|
+
self._data = llvm.report_and_reset_timings()
|
|
30
|
+
llvm.set_time_passes(False)
|
|
31
|
+
return
|
|
32
|
+
|
|
33
|
+
def get(self):
|
|
34
|
+
"""Retrieve timing data for processing.
|
|
35
|
+
|
|
36
|
+
Returns
|
|
37
|
+
-------
|
|
38
|
+
timings: ProcessedPassTimings
|
|
39
|
+
"""
|
|
40
|
+
return ProcessedPassTimings(self._data)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
PassTimingRecord = namedtuple(
|
|
44
|
+
"PassTimingRecord",
|
|
45
|
+
[
|
|
46
|
+
"user_time",
|
|
47
|
+
"user_percent",
|
|
48
|
+
"system_time",
|
|
49
|
+
"system_percent",
|
|
50
|
+
"user_system_time",
|
|
51
|
+
"user_system_percent",
|
|
52
|
+
"wall_time",
|
|
53
|
+
"wall_percent",
|
|
54
|
+
"pass_name",
|
|
55
|
+
"instruction",
|
|
56
|
+
],
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _adjust_timings(records):
|
|
61
|
+
"""Adjust timing records because of truncated information.
|
|
62
|
+
|
|
63
|
+
Details: The percent information can be used to improve the timing
|
|
64
|
+
information.
|
|
65
|
+
|
|
66
|
+
Returns
|
|
67
|
+
-------
|
|
68
|
+
res: List[PassTimingRecord]
|
|
69
|
+
"""
|
|
70
|
+
total_rec = records[-1]
|
|
71
|
+
assert total_rec.pass_name == "Total" # guard for implementation error
|
|
72
|
+
|
|
73
|
+
def make_adjuster(attr):
|
|
74
|
+
time_attr = f"{attr}_time"
|
|
75
|
+
percent_attr = f"{attr}_percent"
|
|
76
|
+
time_getter = operator.attrgetter(time_attr)
|
|
77
|
+
|
|
78
|
+
def adjust(d):
|
|
79
|
+
"""Compute percent x total_time = adjusted"""
|
|
80
|
+
total = time_getter(total_rec)
|
|
81
|
+
adjusted = total * d[percent_attr] * 0.01
|
|
82
|
+
d[time_attr] = adjusted
|
|
83
|
+
return d
|
|
84
|
+
|
|
85
|
+
return adjust
|
|
86
|
+
|
|
87
|
+
# Make adjustment functions for each field
|
|
88
|
+
adj_fns = [
|
|
89
|
+
make_adjuster(x) for x in ["user", "system", "user_system", "wall"]
|
|
90
|
+
]
|
|
91
|
+
|
|
92
|
+
# Extract dictionaries from the namedtuples
|
|
93
|
+
dicts = map(lambda x: x._asdict(), records)
|
|
94
|
+
|
|
95
|
+
def chained(d):
|
|
96
|
+
# Chain the adjustment functions
|
|
97
|
+
for fn in adj_fns:
|
|
98
|
+
d = fn(d)
|
|
99
|
+
# Reconstruct the namedtuple
|
|
100
|
+
return PassTimingRecord(**d)
|
|
101
|
+
|
|
102
|
+
return list(map(chained, dicts))
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class ProcessedPassTimings:
|
|
106
|
+
"""A class for processing raw timing report from LLVM.
|
|
107
|
+
|
|
108
|
+
The processing is done lazily so we don't waste time processing unused
|
|
109
|
+
timing information.
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
def __init__(self, raw_data):
|
|
113
|
+
self._raw_data = raw_data
|
|
114
|
+
|
|
115
|
+
def __bool__(self):
|
|
116
|
+
return bool(self._raw_data)
|
|
117
|
+
|
|
118
|
+
def get_raw_data(self):
|
|
119
|
+
"""Returns the raw string data.
|
|
120
|
+
|
|
121
|
+
Returns
|
|
122
|
+
-------
|
|
123
|
+
res: str
|
|
124
|
+
"""
|
|
125
|
+
return self._raw_data
|
|
126
|
+
|
|
127
|
+
def get_total_time(self):
|
|
128
|
+
"""Compute the total time spend in all passes.
|
|
129
|
+
|
|
130
|
+
Returns
|
|
131
|
+
-------
|
|
132
|
+
res: float
|
|
133
|
+
"""
|
|
134
|
+
return self.list_records()[-1].wall_time
|
|
135
|
+
|
|
136
|
+
def list_records(self):
|
|
137
|
+
"""Get the processed data for the timing report.
|
|
138
|
+
|
|
139
|
+
Returns
|
|
140
|
+
-------
|
|
141
|
+
res: List[PassTimingRecord]
|
|
142
|
+
"""
|
|
143
|
+
return self._processed
|
|
144
|
+
|
|
145
|
+
def list_top(self, n):
|
|
146
|
+
"""Returns the top(n) most time-consuming (by wall-time) passes.
|
|
147
|
+
|
|
148
|
+
Parameters
|
|
149
|
+
----------
|
|
150
|
+
n: int
|
|
151
|
+
This limits the maximum number of items to show.
|
|
152
|
+
This function will show the ``n`` most time-consuming passes.
|
|
153
|
+
|
|
154
|
+
Returns
|
|
155
|
+
-------
|
|
156
|
+
res: List[PassTimingRecord]
|
|
157
|
+
Returns the top(n) most time-consuming passes in descending order.
|
|
158
|
+
"""
|
|
159
|
+
records = self.list_records()
|
|
160
|
+
key = operator.attrgetter("wall_time")
|
|
161
|
+
return heapq.nlargest(n, records[:-1], key)
|
|
162
|
+
|
|
163
|
+
def summary(self, topn=5, indent=0):
|
|
164
|
+
"""Return a string summarizing the timing information.
|
|
165
|
+
|
|
166
|
+
Parameters
|
|
167
|
+
----------
|
|
168
|
+
topn: int; optional
|
|
169
|
+
This limits the maximum number of items to show.
|
|
170
|
+
This function will show the ``topn`` most time-consuming passes.
|
|
171
|
+
indent: int; optional
|
|
172
|
+
Set the indentation level. Defaults to 0 for no indentation.
|
|
173
|
+
|
|
174
|
+
Returns
|
|
175
|
+
-------
|
|
176
|
+
res: str
|
|
177
|
+
"""
|
|
178
|
+
buf = []
|
|
179
|
+
prefix = " " * indent
|
|
180
|
+
|
|
181
|
+
def ap(arg):
|
|
182
|
+
buf.append(f"{prefix}{arg}")
|
|
183
|
+
|
|
184
|
+
ap(f"Total {self.get_total_time():.4f}s")
|
|
185
|
+
ap("Top timings:")
|
|
186
|
+
for p in self.list_top(topn):
|
|
187
|
+
ap(f" {p.wall_time:.4f}s ({p.wall_percent:5}%) {p.pass_name}")
|
|
188
|
+
return "\n".join(buf)
|
|
189
|
+
|
|
190
|
+
@cached_property
|
|
191
|
+
def _processed(self):
|
|
192
|
+
"""A cached property for lazily processing the data and returning it.
|
|
193
|
+
|
|
194
|
+
See ``_process()`` for details.
|
|
195
|
+
"""
|
|
196
|
+
return self._process()
|
|
197
|
+
|
|
198
|
+
def _process(self):
|
|
199
|
+
"""Parses the raw string data from LLVM timing report and attempts
|
|
200
|
+
to improve the data by recomputing the times
|
|
201
|
+
(See `_adjust_timings()``).
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
def parse(raw_data):
|
|
205
|
+
"""A generator that parses the raw_data line-by-line to extract
|
|
206
|
+
timing information for each pass.
|
|
207
|
+
"""
|
|
208
|
+
lines = raw_data.splitlines()
|
|
209
|
+
colheader = r"[a-zA-Z+ ]+"
|
|
210
|
+
# Take at least one column header.
|
|
211
|
+
multicolheaders = rf"(?:\s*-+{colheader}-+)+"
|
|
212
|
+
|
|
213
|
+
line_iter = iter(lines)
|
|
214
|
+
# find column headers
|
|
215
|
+
header_map = {
|
|
216
|
+
"User Time": "user",
|
|
217
|
+
"System Time": "system",
|
|
218
|
+
"User+System": "user_system",
|
|
219
|
+
"Wall Time": "wall",
|
|
220
|
+
"Instr": "instruction",
|
|
221
|
+
"Name": "pass_name",
|
|
222
|
+
}
|
|
223
|
+
for ln in line_iter:
|
|
224
|
+
m = re.match(multicolheaders, ln)
|
|
225
|
+
if m:
|
|
226
|
+
# Get all the column headers
|
|
227
|
+
raw_headers = re.findall(r"[a-zA-Z][a-zA-Z+ ]+", ln)
|
|
228
|
+
headers = [header_map[k.strip()] for k in raw_headers]
|
|
229
|
+
break
|
|
230
|
+
|
|
231
|
+
assert headers[-1] == "pass_name"
|
|
232
|
+
# compute the list of available attributes from the column headers
|
|
233
|
+
attrs = []
|
|
234
|
+
n = r"\s*((?:[0-9]+\.)?[0-9]+)"
|
|
235
|
+
pat = ""
|
|
236
|
+
for k in headers[:-1]:
|
|
237
|
+
if k == "instruction":
|
|
238
|
+
pat += n
|
|
239
|
+
else:
|
|
240
|
+
attrs.append(f"{k}_time")
|
|
241
|
+
attrs.append(f"{k}_percent")
|
|
242
|
+
pat += rf"\s+(?:{n}\s*\({n}%\)|-+)"
|
|
243
|
+
|
|
244
|
+
# put default value 0.0 to all missing attributes
|
|
245
|
+
missing = {}
|
|
246
|
+
for k in PassTimingRecord._fields:
|
|
247
|
+
if k not in attrs and k != "pass_name":
|
|
248
|
+
missing[k] = 0.0
|
|
249
|
+
# parse timings
|
|
250
|
+
pat += r"\s*(.*)"
|
|
251
|
+
for ln in line_iter:
|
|
252
|
+
m = re.match(pat, ln)
|
|
253
|
+
if m is not None:
|
|
254
|
+
raw_data = list(m.groups())
|
|
255
|
+
data = {
|
|
256
|
+
k: float(v) if v is not None else 0.0
|
|
257
|
+
for k, v in zip(attrs, raw_data)
|
|
258
|
+
}
|
|
259
|
+
data.update(missing)
|
|
260
|
+
pass_name = raw_data[-1]
|
|
261
|
+
rec = PassTimingRecord(
|
|
262
|
+
pass_name=pass_name,
|
|
263
|
+
**data,
|
|
264
|
+
)
|
|
265
|
+
yield rec
|
|
266
|
+
if rec.pass_name == "Total":
|
|
267
|
+
# "Total" means the report has ended
|
|
268
|
+
break
|
|
269
|
+
# Check that we have reach the end of the report
|
|
270
|
+
remaining = "\n".join(line_iter)
|
|
271
|
+
if remaining:
|
|
272
|
+
raise ValueError(
|
|
273
|
+
f"unexpected text after parser finished:\n{remaining}"
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
# Parse raw data
|
|
277
|
+
records = list(parse(self._raw_data))
|
|
278
|
+
return _adjust_timings(records)
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
NamedTimings = namedtuple("NamedTimings", ["name", "timings"])
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
class PassTimingsCollection(Sequence):
|
|
285
|
+
"""A collection of pass timings.
|
|
286
|
+
|
|
287
|
+
This class implements the ``Sequence`` protocol for accessing the
|
|
288
|
+
individual timing records.
|
|
289
|
+
"""
|
|
290
|
+
|
|
291
|
+
def __init__(self, name):
|
|
292
|
+
self._name = name
|
|
293
|
+
self._records = []
|
|
294
|
+
|
|
295
|
+
@contextmanager
|
|
296
|
+
def record(self, name):
|
|
297
|
+
"""Record new timings and append to this collection.
|
|
298
|
+
|
|
299
|
+
Note: this is mainly for internal use inside the compiler pipeline.
|
|
300
|
+
|
|
301
|
+
See also ``RecordLLVMPassTimings``
|
|
302
|
+
|
|
303
|
+
Parameters
|
|
304
|
+
----------
|
|
305
|
+
name: str
|
|
306
|
+
Name for the records.
|
|
307
|
+
"""
|
|
308
|
+
if config.LLVM_PASS_TIMINGS:
|
|
309
|
+
# Recording of pass timings is enabled
|
|
310
|
+
with RecordLLVMPassTimings() as timings:
|
|
311
|
+
yield
|
|
312
|
+
rec = timings.get()
|
|
313
|
+
# Only keep non-empty records
|
|
314
|
+
if rec:
|
|
315
|
+
self._append(name, rec)
|
|
316
|
+
else:
|
|
317
|
+
# Do nothing. Recording of pass timings is disabled.
|
|
318
|
+
yield
|
|
319
|
+
|
|
320
|
+
def _append(self, name, timings):
|
|
321
|
+
"""Append timing records
|
|
322
|
+
|
|
323
|
+
Parameters
|
|
324
|
+
----------
|
|
325
|
+
name: str
|
|
326
|
+
Name for the records.
|
|
327
|
+
timings: ProcessedPassTimings
|
|
328
|
+
the timing records.
|
|
329
|
+
"""
|
|
330
|
+
self._records.append(NamedTimings(name, timings))
|
|
331
|
+
|
|
332
|
+
def get_total_time(self):
|
|
333
|
+
"""Computes the sum of the total time across all contained timings.
|
|
334
|
+
|
|
335
|
+
Returns
|
|
336
|
+
-------
|
|
337
|
+
res: float or None
|
|
338
|
+
Returns the total number of seconds or None if no timings were
|
|
339
|
+
recorded
|
|
340
|
+
"""
|
|
341
|
+
if self._records:
|
|
342
|
+
return sum(r.timings.get_total_time() for r in self._records)
|
|
343
|
+
else:
|
|
344
|
+
return None
|
|
345
|
+
|
|
346
|
+
def list_longest_first(self):
|
|
347
|
+
"""Returns the timings in descending order of total time duration.
|
|
348
|
+
|
|
349
|
+
Returns
|
|
350
|
+
-------
|
|
351
|
+
res: List[ProcessedPassTimings]
|
|
352
|
+
"""
|
|
353
|
+
return sorted(
|
|
354
|
+
self._records,
|
|
355
|
+
key=lambda x: x.timings.get_total_time(),
|
|
356
|
+
reverse=True,
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
@property
|
|
360
|
+
def is_empty(self):
|
|
361
|
+
""" """
|
|
362
|
+
return not self._records
|
|
363
|
+
|
|
364
|
+
def summary(self, topn=5):
|
|
365
|
+
"""Return a string representing the summary of the timings.
|
|
366
|
+
|
|
367
|
+
Parameters
|
|
368
|
+
----------
|
|
369
|
+
topn: int; optional, default=5.
|
|
370
|
+
This limits the maximum number of items to show.
|
|
371
|
+
This function will show the ``topn`` most time-consuming passes.
|
|
372
|
+
|
|
373
|
+
Returns
|
|
374
|
+
-------
|
|
375
|
+
res: str
|
|
376
|
+
|
|
377
|
+
See also ``ProcessedPassTimings.summary()``
|
|
378
|
+
"""
|
|
379
|
+
if self.is_empty:
|
|
380
|
+
return "No pass timings were recorded"
|
|
381
|
+
else:
|
|
382
|
+
buf = []
|
|
383
|
+
ap = buf.append
|
|
384
|
+
ap(f"Printing pass timings for {self._name}")
|
|
385
|
+
overall_time = self.get_total_time()
|
|
386
|
+
ap(f"Total time: {overall_time:.4f}")
|
|
387
|
+
for i, r in enumerate(self._records):
|
|
388
|
+
ap(f"== #{i} {r.name}")
|
|
389
|
+
percent = r.timings.get_total_time() / overall_time * 100
|
|
390
|
+
ap(f" Percent: {percent:.1f}%")
|
|
391
|
+
ap(r.timings.summary(topn=topn, indent=1))
|
|
392
|
+
return "\n".join(buf)
|
|
393
|
+
|
|
394
|
+
def __getitem__(self, i):
|
|
395
|
+
"""Get the i-th timing record.
|
|
396
|
+
|
|
397
|
+
Returns
|
|
398
|
+
-------
|
|
399
|
+
res: (name, timings)
|
|
400
|
+
A named tuple with two fields:
|
|
401
|
+
|
|
402
|
+
- name: str
|
|
403
|
+
- timings: ProcessedPassTimings
|
|
404
|
+
"""
|
|
405
|
+
return self._records[i]
|
|
406
|
+
|
|
407
|
+
def __len__(self):
|
|
408
|
+
"""Length of this collection."""
|
|
409
|
+
return len(self._records)
|
|
410
|
+
|
|
411
|
+
def __str__(self):
|
|
412
|
+
return self.summary()
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class prange(object):
|
|
6
|
+
"""Provides a 1D parallel iterator that generates a sequence of integers.
|
|
7
|
+
In non-parallel contexts, prange is identical to range.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
def __new__(cls, *args):
|
|
11
|
+
return range(*args)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _gdb_python_call_gen(func_name, *args):
|
|
15
|
+
# generates a call to a function containing a compiled in gdb command,
|
|
16
|
+
# this is to make `numba.gdb*` work in the interpreter.
|
|
17
|
+
import numba
|
|
18
|
+
|
|
19
|
+
fn = getattr(numba, func_name)
|
|
20
|
+
argstr = ",".join(['"%s"' for _ in args]) % args
|
|
21
|
+
defn = """def _gdb_func_injection():\n\t%s(%s)\n
|
|
22
|
+
""" % (func_name, argstr)
|
|
23
|
+
l = {}
|
|
24
|
+
exec(defn, {func_name: fn}, l)
|
|
25
|
+
return numba.njit(l["_gdb_func_injection"])
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def gdb(*args):
|
|
29
|
+
"""
|
|
30
|
+
Calling this function will invoke gdb and attach it to the current process
|
|
31
|
+
at the call site. Arguments are strings in the gdb command language syntax
|
|
32
|
+
which will be executed by gdb once initialisation has occurred.
|
|
33
|
+
"""
|
|
34
|
+
_gdb_python_call_gen("gdb", *args)()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def gdb_breakpoint():
|
|
38
|
+
"""
|
|
39
|
+
Calling this function will inject a breakpoint at the call site that is
|
|
40
|
+
recognised by both `gdb` and `gdb_init`, this is to allow breaking at
|
|
41
|
+
multiple points. gdb will stop in the user defined code just after the frame
|
|
42
|
+
employed by the breakpoint returns.
|
|
43
|
+
"""
|
|
44
|
+
_gdb_python_call_gen("gdb_breakpoint")()
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def gdb_init(*args):
|
|
48
|
+
"""
|
|
49
|
+
Calling this function will invoke gdb and attach it to the current process
|
|
50
|
+
at the call site, then continue executing the process under gdb's control.
|
|
51
|
+
Arguments are strings in the gdb command language syntax which will be
|
|
52
|
+
executed by gdb once initialisation has occurred.
|
|
53
|
+
"""
|
|
54
|
+
_gdb_python_call_gen("gdb_init", *args)()
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def literally(obj):
|
|
58
|
+
"""Forces Numba to interpret *obj* as an Literal value.
|
|
59
|
+
|
|
60
|
+
*obj* must be either a literal or an argument of the caller function, where
|
|
61
|
+
the argument must be bound to a literal. The literal requirement
|
|
62
|
+
propagates up the call stack.
|
|
63
|
+
|
|
64
|
+
This function is intercepted by the compiler to alter the compilation
|
|
65
|
+
behavior to wrap the corresponding function parameters as ``Literal``.
|
|
66
|
+
It has **no effect** outside of nopython-mode (interpreter, and objectmode).
|
|
67
|
+
|
|
68
|
+
The current implementation detects literal arguments in two ways:
|
|
69
|
+
|
|
70
|
+
1. Scans for uses of ``literally`` via a compiler pass.
|
|
71
|
+
2. ``literally`` is overloaded to raise ``numba.errors.ForceLiteralArg``
|
|
72
|
+
to signal the dispatcher to treat the corresponding parameter
|
|
73
|
+
differently. This mode is to support indirect use (via a function call).
|
|
74
|
+
|
|
75
|
+
The execution semantic of this function is equivalent to an identity
|
|
76
|
+
function.
|
|
77
|
+
|
|
78
|
+
See :ghfile:`numba/tests/test_literal_dispatch.py` for examples.
|
|
79
|
+
"""
|
|
80
|
+
return obj
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def literal_unroll(container):
|
|
84
|
+
return container
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
__all__ = [
|
|
88
|
+
"prange",
|
|
89
|
+
"gdb",
|
|
90
|
+
"gdb_breakpoint",
|
|
91
|
+
"gdb_init",
|
|
92
|
+
"literally",
|
|
93
|
+
"literal_unroll",
|
|
94
|
+
]
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import functools
|
|
5
|
+
|
|
6
|
+
from llvmlite import ir
|
|
7
|
+
|
|
8
|
+
from numba.cuda.datamodel.registry import DataModelManager, register
|
|
9
|
+
from numba.cuda.datamodel import PrimitiveModel
|
|
10
|
+
from numba.cuda.datamodel.models import StructModel
|
|
11
|
+
from numba.cuda.extending import core_models as models
|
|
12
|
+
from numba.cuda import types
|
|
13
|
+
from numba.cuda.types.ext_types import Dim3, GridGroup, CUDADispatcher, Bfloat16
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
cuda_data_manager = DataModelManager()
|
|
17
|
+
|
|
18
|
+
register_model = functools.partial(register, cuda_data_manager)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@register_model(Dim3)
|
|
22
|
+
class Dim3Model(StructModel):
|
|
23
|
+
def __init__(self, dmm, fe_type):
|
|
24
|
+
members = [("x", types.int32), ("y", types.int32), ("z", types.int32)]
|
|
25
|
+
super().__init__(dmm, fe_type, members)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@register_model(GridGroup)
|
|
29
|
+
class GridGroupModel(models.PrimitiveModel):
|
|
30
|
+
def __init__(self, dmm, fe_type):
|
|
31
|
+
be_type = ir.IntType(64)
|
|
32
|
+
super().__init__(dmm, fe_type, be_type)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@register_model(types.Float)
|
|
36
|
+
class FloatModel(models.PrimitiveModel):
|
|
37
|
+
def __init__(self, dmm, fe_type):
|
|
38
|
+
if fe_type == types.float16:
|
|
39
|
+
be_type = ir.IntType(16)
|
|
40
|
+
elif fe_type == types.float32:
|
|
41
|
+
be_type = ir.FloatType()
|
|
42
|
+
elif fe_type == types.float64:
|
|
43
|
+
be_type = ir.DoubleType()
|
|
44
|
+
else:
|
|
45
|
+
raise NotImplementedError(fe_type)
|
|
46
|
+
super(FloatModel, self).__init__(dmm, fe_type, be_type)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
register_model(CUDADispatcher)(models.OpaqueModel)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@register_model(Bfloat16)
|
|
53
|
+
class _model___nv_bfloat16(PrimitiveModel):
|
|
54
|
+
def __init__(self, dmm, fe_type):
|
|
55
|
+
be_type = ir.IntType(16)
|
|
56
|
+
super(_model___nv_bfloat16, self).__init__(dmm, fe_type, be_type)
|