numba-cuda 0.21.1__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +4 -0
- _numba_cuda_redirector.py +89 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +6 -0
- numba_cuda/_version.py +11 -0
- numba_cuda/numba/cuda/__init__.py +70 -0
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
- numba_cuda/numba/cuda/api.py +577 -0
- numba_cuda/numba/cuda/api_util.py +76 -0
- numba_cuda/numba/cuda/args.py +72 -0
- numba_cuda/numba/cuda/bf16.py +397 -0
- numba_cuda/numba/cuda/cache_hints.py +287 -0
- numba_cuda/numba/cuda/cext/__init__.py +2 -0
- numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
- numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
- numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
- numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
- numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
- numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
- numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
- numba_cuda/numba/cuda/cext/_typeof.h +19 -0
- numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
- numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
- numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
- numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
- numba_cuda/numba/cuda/cg.py +67 -0
- numba_cuda/numba/cuda/cgutils.py +1294 -0
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +541 -0
- numba_cuda/numba/cuda/compiler.py +1396 -0
- numba_cuda/numba/cuda/core/analysis.py +758 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
- numba_cuda/numba/cuda/core/base.py +1332 -0
- numba_cuda/numba/cuda/core/boxing.py +1411 -0
- numba_cuda/numba/cuda/core/bytecode.py +728 -0
- numba_cuda/numba/cuda/core/byteflow.py +2346 -0
- numba_cuda/numba/cuda/core/caching.py +744 -0
- numba_cuda/numba/cuda/core/callconv.py +392 -0
- numba_cuda/numba/cuda/core/codegen.py +171 -0
- numba_cuda/numba/cuda/core/compiler.py +199 -0
- numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +650 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/controlflow.py +989 -0
- numba_cuda/numba/cuda/core/entrypoints.py +57 -0
- numba_cuda/numba/cuda/core/environment.py +66 -0
- numba_cuda/numba/cuda/core/errors.py +917 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/generators.py +387 -0
- numba_cuda/numba/cuda/core/imputils.py +509 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
- numba_cuda/numba/cuda/core/interpreter.py +3617 -0
- numba_cuda/numba/cuda/core/ir.py +1812 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
- numba_cuda/numba/cuda/core/optional.py +129 -0
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
- numba_cuda/numba/cuda/core/registry.py +46 -0
- numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
- numba_cuda/numba/cuda/core/sigutils.py +68 -0
- numba_cuda/numba/cuda/core/ssa.py +498 -0
- numba_cuda/numba/cuda/core/targetconfig.py +330 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +956 -0
- numba_cuda/numba/cuda/core/typed_passes.py +867 -0
- numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
- numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
- numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
- numba_cuda/numba/cuda/cpython/iterators.py +167 -0
- numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
- numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
- numba_cuda/numba/cuda/cpython/slicing.py +322 -0
- numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
- numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
- numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
- numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
- numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
- numba_cuda/numba/cuda/cuda_paths.py +691 -0
- numba_cuda/numba/cuda/cudadecl.py +556 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
- numba_cuda/numba/cuda/cudadrv/error.py +48 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
- numba_cuda/numba/cuda/cudaimpl.py +995 -0
- numba_cuda/numba/cuda/cudamath.py +149 -0
- numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
- numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
- numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
- numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
- numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
- numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
- numba_cuda/numba/cuda/datamodel/manager.py +11 -0
- numba_cuda/numba/cuda/datamodel/models.py +9 -0
- numba_cuda/numba/cuda/datamodel/packer.py +9 -0
- numba_cuda/numba/cuda/datamodel/registry.py +11 -0
- numba_cuda/numba/cuda/datamodel/testing.py +11 -0
- numba_cuda/numba/cuda/debuginfo.py +903 -0
- numba_cuda/numba/cuda/decorators.py +294 -0
- numba_cuda/numba/cuda/descriptor.py +35 -0
- numba_cuda/numba/cuda/device_init.py +158 -0
- numba_cuda/numba/cuda/deviceufunc.py +1021 -0
- numba_cuda/numba/cuda/dispatcher.py +2463 -0
- numba_cuda/numba/cuda/errors.py +72 -0
- numba_cuda/numba/cuda/extending.py +697 -0
- numba_cuda/numba/cuda/flags.py +178 -0
- numba_cuda/numba/cuda/fp16.py +357 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +24 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
- numba_cuda/numba/cuda/intrinsics.py +382 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +265 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3386 -0
- numba_cuda/numba/cuda/libdevicedecl.py +20 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
- numba_cuda/numba/cuda/locks.py +19 -0
- numba_cuda/numba/cuda/lowering.py +1951 -0
- numba_cuda/numba/cuda/mathimpl.py +374 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
- numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
- numba_cuda/numba/cuda/misc/appdirs.py +594 -0
- numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
- numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
- numba_cuda/numba/cuda/misc/dump_style.py +41 -0
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
- numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
- numba_cuda/numba/cuda/misc/literal.py +28 -0
- numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
- numba_cuda/numba/cuda/misc/special.py +94 -0
- numba_cuda/numba/cuda/models.py +56 -0
- numba_cuda/numba/cuda/np/arraymath.py +5130 -0
- numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
- numba_cuda/numba/cuda/np/extensions.py +11 -0
- numba_cuda/numba/cuda/np/linalg.py +3087 -0
- numba_cuda/numba/cuda/np/math/__init__.py +0 -0
- numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
- numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
- numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
- numba_cuda/numba/cuda/np/npdatetime.py +969 -0
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
- numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
- numba_cuda/numba/cuda/np/numpy_support.py +798 -0
- numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
- numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
- numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
- numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
- numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
- numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
- numba_cuda/numba/cuda/nvvmutils.py +254 -0
- numba_cuda/numba/cuda/printimpl.py +126 -0
- numba_cuda/numba/cuda/random.py +308 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
- numba_cuda/numba/cuda/serialize.py +267 -0
- numba_cuda/numba/cuda/simulator/__init__.py +63 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +179 -0
- numba_cuda/numba/cuda/simulator/bf16.py +4 -0
- numba_cuda/numba/cuda/simulator/compiler.py +38 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
- numba_cuda/numba/cuda/simulator/kernel.py +320 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
- numba_cuda/numba/cuda/simulator/reduction.py +19 -0
- numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
- numba_cuda/numba/cuda/simulator_init.py +18 -0
- numba_cuda/numba/cuda/stubs.py +635 -0
- numba_cuda/numba/cuda/target.py +505 -0
- numba_cuda/numba/cuda/testing.py +347 -0
- numba_cuda/numba/cuda/tests/__init__.py +62 -0
- numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
- numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
- numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
- numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
- numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
- numba_cuda/numba/cuda/tests/data/error.cu +12 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
- numba_cuda/numba/cuda/tests/support.py +900 -0
- numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
- numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
- numba_cuda/numba/cuda/typeconv/rules.py +63 -0
- numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
- numba_cuda/numba/cuda/types/__init__.py +233 -0
- numba_cuda/numba/cuda/types/__init__.pyi +167 -0
- numba_cuda/numba/cuda/types/abstract.py +9 -0
- numba_cuda/numba/cuda/types/common.py +9 -0
- numba_cuda/numba/cuda/types/containers.py +9 -0
- numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
- numba_cuda/numba/cuda/types/cuda_common.py +110 -0
- numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
- numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
- numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
- numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
- numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
- numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
- numba_cuda/numba/cuda/types/ext_types.py +101 -0
- numba_cuda/numba/cuda/types/function_type.py +11 -0
- numba_cuda/numba/cuda/types/functions.py +9 -0
- numba_cuda/numba/cuda/types/iterators.py +9 -0
- numba_cuda/numba/cuda/types/misc.py +9 -0
- numba_cuda/numba/cuda/types/npytypes.py +9 -0
- numba_cuda/numba/cuda/types/scalars.py +9 -0
- numba_cuda/numba/cuda/typing/__init__.py +19 -0
- numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
- numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
- numba_cuda/numba/cuda/typing/bufproto.py +70 -0
- numba_cuda/numba/cuda/typing/builtins.py +1209 -0
- numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
- numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
- numba_cuda/numba/cuda/typing/collections.py +138 -0
- numba_cuda/numba/cuda/typing/context.py +782 -0
- numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
- numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/listdecl.py +147 -0
- numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
- numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
- numba_cuda/numba/cuda/typing/npydecl.py +749 -0
- numba_cuda/numba/cuda/typing/setdecl.py +115 -0
- numba_cuda/numba/cuda/typing/templates.py +1446 -0
- numba_cuda/numba/cuda/typing/typeof.py +301 -0
- numba_cuda/numba/cuda/ufuncs.py +746 -0
- numba_cuda/numba/cuda/utils.py +724 -0
- numba_cuda/numba/cuda/vector_types.py +214 -0
- numba_cuda/numba/cuda/vectorizers.py +260 -0
- numba_cuda-0.21.1.dist-info/METADATA +109 -0
- numba_cuda-0.21.1.dist-info/RECORD +488 -0
- numba_cuda-0.21.1.dist-info/WHEEL +5 -0
- numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
- numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,889 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from collections import namedtuple
|
|
5
|
+
from numba.cuda.tests.support import override_config, captured_stdout
|
|
6
|
+
from numba.cuda.testing import skip_on_cudasim
|
|
7
|
+
from numba import cuda
|
|
8
|
+
from numba.cuda import types
|
|
9
|
+
from numba.cuda.testing import CUDATestCase
|
|
10
|
+
from numba.cuda.core import config
|
|
11
|
+
from textwrap import dedent
|
|
12
|
+
import math
|
|
13
|
+
import itertools
|
|
14
|
+
import re
|
|
15
|
+
import unittest
|
|
16
|
+
import warnings
|
|
17
|
+
from numba.cuda.core.errors import NumbaDebugInfoWarning
|
|
18
|
+
from numba.cuda.tests.support import ignore_internal_warnings
|
|
19
|
+
import numpy as np
|
|
20
|
+
import inspect
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@skip_on_cudasim("Simulator does not produce debug dumps")
|
|
24
|
+
class TestCudaDebugInfo(CUDATestCase):
|
|
25
|
+
"""
|
|
26
|
+
These tests only checks the compiled PTX for debuginfo section
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def _getasm(self, fn, sig):
|
|
30
|
+
fn.compile(sig)
|
|
31
|
+
return fn.inspect_asm(sig)
|
|
32
|
+
|
|
33
|
+
def _check(self, fn, sig, expect):
|
|
34
|
+
asm = self._getasm(fn, sig=sig)
|
|
35
|
+
re_section_dbginfo = re.compile(r"\.section\s+\.debug_info\s+{")
|
|
36
|
+
match = re_section_dbginfo.search(asm)
|
|
37
|
+
assertfn = self.assertIsNotNone if expect else self.assertIsNone
|
|
38
|
+
assertfn(match, msg=asm)
|
|
39
|
+
|
|
40
|
+
def test_no_debuginfo_in_asm(self):
|
|
41
|
+
@cuda.jit(debug=False, opt=False)
|
|
42
|
+
def foo(x):
|
|
43
|
+
x[0] = 1
|
|
44
|
+
|
|
45
|
+
self._check(foo, sig=(types.int32[:],), expect=False)
|
|
46
|
+
|
|
47
|
+
def test_debuginfo_in_asm(self):
|
|
48
|
+
@cuda.jit(debug=True, opt=False)
|
|
49
|
+
def foo(x):
|
|
50
|
+
x[0] = 1
|
|
51
|
+
|
|
52
|
+
self._check(foo, sig=(types.int32[:],), expect=True)
|
|
53
|
+
|
|
54
|
+
def test_environment_override(self):
|
|
55
|
+
with override_config("CUDA_DEBUGINFO_DEFAULT", 1):
|
|
56
|
+
# Using default value
|
|
57
|
+
@cuda.jit(opt=False)
|
|
58
|
+
def foo(x):
|
|
59
|
+
x[0] = 1
|
|
60
|
+
|
|
61
|
+
self._check(foo, sig=(types.int32[:],), expect=True)
|
|
62
|
+
|
|
63
|
+
# User override default value
|
|
64
|
+
@cuda.jit(debug=False)
|
|
65
|
+
def bar(x):
|
|
66
|
+
x[0] = 1
|
|
67
|
+
|
|
68
|
+
self._check(bar, sig=(types.int32[:],), expect=False)
|
|
69
|
+
|
|
70
|
+
def test_issue_5835(self):
|
|
71
|
+
# Invalid debug metadata would segfault NVVM when any function was
|
|
72
|
+
# compiled with debug turned on and optimization off. This eager
|
|
73
|
+
# compilation should not crash anything.
|
|
74
|
+
@cuda.jit((types.int32[::1],), debug=True, opt=False)
|
|
75
|
+
def f(x):
|
|
76
|
+
x[0] = 0
|
|
77
|
+
|
|
78
|
+
def test_issue_9888(self):
|
|
79
|
+
# Compiler created symbol should not be emitted in DILocalVariable
|
|
80
|
+
# See Numba Issue #9888 https://github.com/numba/numba/pull/9888
|
|
81
|
+
sig = (types.boolean,)
|
|
82
|
+
|
|
83
|
+
@cuda.jit(sig, debug=True, opt=False)
|
|
84
|
+
def f(cond):
|
|
85
|
+
if cond:
|
|
86
|
+
x = 1 # noqa: F841
|
|
87
|
+
else:
|
|
88
|
+
x = 0 # noqa: F841
|
|
89
|
+
|
|
90
|
+
llvm_ir = f.inspect_llvm(sig)
|
|
91
|
+
# A varible name starting with "bool" in the debug metadata
|
|
92
|
+
pat = r"!DILocalVariable\(.*name:\s+\"bool"
|
|
93
|
+
match = re.compile(pat).search(llvm_ir)
|
|
94
|
+
self.assertIsNone(match, msg=llvm_ir)
|
|
95
|
+
|
|
96
|
+
def test_bool_type(self):
|
|
97
|
+
sig = (types.int32, types.int32)
|
|
98
|
+
|
|
99
|
+
@cuda.jit("void(int32, int32)", debug=True, opt=False)
|
|
100
|
+
def f(x, y):
|
|
101
|
+
z = x == y # noqa: F841
|
|
102
|
+
|
|
103
|
+
llvm_ir = f.inspect_llvm(sig)
|
|
104
|
+
|
|
105
|
+
# extract the metadata node id from `type` field of DILocalVariable
|
|
106
|
+
pat = r'!DILocalVariable\(.*name:\s+"z".*type:\s+!(\d+)'
|
|
107
|
+
match = re.compile(pat).search(llvm_ir)
|
|
108
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
|
109
|
+
mdnode_id = match.group(1)
|
|
110
|
+
|
|
111
|
+
# verify the DIBasicType has correct encoding attribute DW_ATE_boolean
|
|
112
|
+
pat = rf"!{mdnode_id}\s+=\s+!DIBasicType\(.*DW_ATE_boolean"
|
|
113
|
+
match = re.compile(pat).search(llvm_ir)
|
|
114
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
|
115
|
+
|
|
116
|
+
def test_grid_group_type(self):
|
|
117
|
+
sig = (types.int32,)
|
|
118
|
+
|
|
119
|
+
@cuda.jit(sig, debug=True, opt=False)
|
|
120
|
+
def f(x):
|
|
121
|
+
grid = cuda.cg.this_grid() # noqa: F841
|
|
122
|
+
|
|
123
|
+
llvm_ir = f.inspect_llvm(sig)
|
|
124
|
+
|
|
125
|
+
pat = r'!DIBasicType\(.*DW_ATE_unsigned, name: "GridGroup", size: 64'
|
|
126
|
+
match = re.compile(pat).search(llvm_ir)
|
|
127
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
|
128
|
+
|
|
129
|
+
@unittest.skip("Wrappers no longer exist")
|
|
130
|
+
def test_wrapper_has_debuginfo(self):
|
|
131
|
+
sig = (types.int32[::1],)
|
|
132
|
+
|
|
133
|
+
@cuda.jit(sig, debug=True, opt=0)
|
|
134
|
+
def f(x):
|
|
135
|
+
x[0] = 1
|
|
136
|
+
|
|
137
|
+
llvm_ir = f.inspect_llvm(sig)
|
|
138
|
+
|
|
139
|
+
defines = [
|
|
140
|
+
line
|
|
141
|
+
for line in llvm_ir.splitlines()
|
|
142
|
+
if 'define void @"_ZN6cudapy' in line
|
|
143
|
+
]
|
|
144
|
+
|
|
145
|
+
# Make sure we only found one definition
|
|
146
|
+
self.assertEqual(len(defines), 1)
|
|
147
|
+
|
|
148
|
+
wrapper_define = defines[0]
|
|
149
|
+
self.assertIn("!dbg", wrapper_define)
|
|
150
|
+
|
|
151
|
+
def test_debug_function_calls_internal_impl(self):
|
|
152
|
+
# Calling a function in a module generated from an implementation
|
|
153
|
+
# internal to Numba requires multiple modules to be compiled with NVVM -
|
|
154
|
+
# the internal implementation, and the caller. This example uses two
|
|
155
|
+
# modules because the `in (2, 3)` is implemented with:
|
|
156
|
+
#
|
|
157
|
+
# numba::cpython::listobj::in_seq::$3clocals$3e::seq_contains_impl$242(
|
|
158
|
+
# UniTuple<long long, 2>,
|
|
159
|
+
# int
|
|
160
|
+
# )
|
|
161
|
+
#
|
|
162
|
+
# This is condensed from this reproducer in Issue 5311:
|
|
163
|
+
# https://github.com/numba/numba/issues/5311#issuecomment-674206587
|
|
164
|
+
|
|
165
|
+
@cuda.jit((types.int32[:], types.int32[:]), debug=True, opt=False)
|
|
166
|
+
def f(inp, outp):
|
|
167
|
+
outp[0] = 1 if inp[0] in (2, 3) else 3
|
|
168
|
+
|
|
169
|
+
def test_debug_function_calls_device_function(self):
|
|
170
|
+
# Calling a device function requires compilation of multiple modules
|
|
171
|
+
# with NVVM - one for the caller and one for the callee. This checks
|
|
172
|
+
# that we don't cause an NVVM error in this case.
|
|
173
|
+
|
|
174
|
+
@cuda.jit(device=True, debug=True, opt=0)
|
|
175
|
+
def threadid():
|
|
176
|
+
return cuda.blockDim.x * cuda.blockIdx.x + cuda.threadIdx.x
|
|
177
|
+
|
|
178
|
+
@cuda.jit((types.int32[:],), debug=True, opt=0)
|
|
179
|
+
def kernel(arr):
|
|
180
|
+
i = cuda.grid(1)
|
|
181
|
+
if i < len(arr):
|
|
182
|
+
arr[i] = threadid()
|
|
183
|
+
|
|
184
|
+
def _test_chained_device_function(self, kernel_debug, f1_debug, f2_debug):
|
|
185
|
+
@cuda.jit(device=True, debug=f2_debug, opt=False)
|
|
186
|
+
def f2(x):
|
|
187
|
+
return x + 1
|
|
188
|
+
|
|
189
|
+
@cuda.jit(device=True, debug=f1_debug, opt=False)
|
|
190
|
+
def f1(x, y):
|
|
191
|
+
return x - f2(y)
|
|
192
|
+
|
|
193
|
+
@cuda.jit((types.int32, types.int32), debug=kernel_debug, opt=False)
|
|
194
|
+
def kernel(x, y):
|
|
195
|
+
f1(x, y)
|
|
196
|
+
|
|
197
|
+
kernel[1, 1](1, 2)
|
|
198
|
+
|
|
199
|
+
def test_chained_device_function(self):
|
|
200
|
+
# Calling a device function that calls another device function from a
|
|
201
|
+
# kernel with should succeed regardless of which jit decorators have
|
|
202
|
+
# debug=True. See Issue #7159.
|
|
203
|
+
|
|
204
|
+
debug_opts = itertools.product(*[(True, False)] * 3)
|
|
205
|
+
|
|
206
|
+
for kernel_debug, f1_debug, f2_debug in debug_opts:
|
|
207
|
+
with self.subTest(
|
|
208
|
+
kernel_debug=kernel_debug, f1_debug=f1_debug, f2_debug=f2_debug
|
|
209
|
+
):
|
|
210
|
+
self._test_chained_device_function(
|
|
211
|
+
kernel_debug, f1_debug, f2_debug
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
def _test_chained_device_function_two_calls(
|
|
215
|
+
self, kernel_debug, f1_debug, f2_debug
|
|
216
|
+
):
|
|
217
|
+
@cuda.jit(device=True, debug=f2_debug, opt=False)
|
|
218
|
+
def f2(x):
|
|
219
|
+
return x + 1
|
|
220
|
+
|
|
221
|
+
@cuda.jit(device=True, debug=f1_debug, opt=False)
|
|
222
|
+
def f1(x, y):
|
|
223
|
+
return x - f2(y)
|
|
224
|
+
|
|
225
|
+
@cuda.jit(debug=kernel_debug, opt=False)
|
|
226
|
+
def kernel(x, y):
|
|
227
|
+
f1(x, y)
|
|
228
|
+
f2(x)
|
|
229
|
+
|
|
230
|
+
kernel[1, 1](1, 2)
|
|
231
|
+
|
|
232
|
+
def test_chained_device_function_two_calls(self):
|
|
233
|
+
# Calling a device function that calls a leaf device function from a
|
|
234
|
+
# kernel, and calling the leaf device function from the kernel should
|
|
235
|
+
# succeed, regardless of which jit decorators have debug=True. See
|
|
236
|
+
# Issue #7159.
|
|
237
|
+
|
|
238
|
+
debug_opts = itertools.product(*[(True, False)] * 3)
|
|
239
|
+
|
|
240
|
+
for kernel_debug, f1_debug, f2_debug in debug_opts:
|
|
241
|
+
with self.subTest(
|
|
242
|
+
kernel_debug=kernel_debug, f1_debug=f1_debug, f2_debug=f2_debug
|
|
243
|
+
):
|
|
244
|
+
self._test_chained_device_function_two_calls(
|
|
245
|
+
kernel_debug, f1_debug, f2_debug
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
def test_chained_device_three_functions(self):
|
|
249
|
+
# Like test_chained_device_function, but with enough functions (three)
|
|
250
|
+
# to ensure that the recursion visits all the way down the call tree
|
|
251
|
+
# when fixing linkage of functions for debug.
|
|
252
|
+
def three_device_fns(kernel_debug, leaf_debug):
|
|
253
|
+
@cuda.jit(device=True, debug=leaf_debug, opt=False)
|
|
254
|
+
def f3(x):
|
|
255
|
+
return x * x
|
|
256
|
+
|
|
257
|
+
@cuda.jit(device=True)
|
|
258
|
+
def f2(x):
|
|
259
|
+
return f3(x) + 1
|
|
260
|
+
|
|
261
|
+
@cuda.jit(device=True)
|
|
262
|
+
def f1(x, y):
|
|
263
|
+
return x - f2(y)
|
|
264
|
+
|
|
265
|
+
@cuda.jit(debug=kernel_debug, opt=False)
|
|
266
|
+
def kernel(x, y):
|
|
267
|
+
f1(x, y)
|
|
268
|
+
|
|
269
|
+
kernel[1, 1](1, 2)
|
|
270
|
+
|
|
271
|
+
# Check when debug on the kernel, on the leaf, and not on any function.
|
|
272
|
+
three_device_fns(kernel_debug=True, leaf_debug=True)
|
|
273
|
+
three_device_fns(kernel_debug=True, leaf_debug=False)
|
|
274
|
+
three_device_fns(kernel_debug=False, leaf_debug=True)
|
|
275
|
+
three_device_fns(kernel_debug=False, leaf_debug=False)
|
|
276
|
+
|
|
277
|
+
def _test_kernel_args_types(self):
|
|
278
|
+
sig = (types.int32, types.int32)
|
|
279
|
+
|
|
280
|
+
@cuda.jit("void(int32, int32)", debug=True, opt=False)
|
|
281
|
+
def f(x, y):
|
|
282
|
+
z = x + y # noqa: F841
|
|
283
|
+
|
|
284
|
+
llvm_ir = f.inspect_llvm(sig)
|
|
285
|
+
|
|
286
|
+
# extract the metadata node id from `types` field of DISubroutineType
|
|
287
|
+
pat = r"!DISubroutineType\(types:\s+!(\d+)\)"
|
|
288
|
+
match = re.compile(pat).search(llvm_ir)
|
|
289
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
|
290
|
+
mdnode_id = match.group(1)
|
|
291
|
+
|
|
292
|
+
# extract the metadata node ids from the flexible node of types
|
|
293
|
+
pat = rf"!{mdnode_id}\s+=\s+!{{\s+!(\d+),\s+!(\d+)\s+}}"
|
|
294
|
+
match = re.compile(pat).search(llvm_ir)
|
|
295
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
|
296
|
+
mdnode_id1 = match.group(1)
|
|
297
|
+
mdnode_id2 = match.group(2)
|
|
298
|
+
|
|
299
|
+
# verify each of the two metadata nodes match expected type
|
|
300
|
+
pat = rf'!{mdnode_id1}\s+=\s+!DIBasicType\(.*DW_ATE_signed,\s+name:\s+"int32"' # noqa: E501
|
|
301
|
+
match = re.compile(pat).search(llvm_ir)
|
|
302
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
|
303
|
+
pat = rf'!{mdnode_id2}\s+=\s+!DIBasicType\(.*DW_ATE_signed,\s+name:\s+"int32"' # noqa: E501
|
|
304
|
+
match = re.compile(pat).search(llvm_ir)
|
|
305
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
|
306
|
+
|
|
307
|
+
def test_kernel_args_types(self):
|
|
308
|
+
self._test_kernel_args_types()
|
|
309
|
+
|
|
310
|
+
def test_kernel_args_types_dump(self):
|
|
311
|
+
# see issue#135
|
|
312
|
+
with override_config("DUMP_LLVM", 1):
|
|
313
|
+
with captured_stdout():
|
|
314
|
+
self._test_kernel_args_types()
|
|
315
|
+
|
|
316
|
+
def test_kernel_args_names(self):
|
|
317
|
+
sig = (types.int32,)
|
|
318
|
+
|
|
319
|
+
@cuda.jit("void(int32)", debug=True, opt=False)
|
|
320
|
+
def f(x):
|
|
321
|
+
z = x # noqa: F841
|
|
322
|
+
|
|
323
|
+
llvm_ir = f.inspect_llvm(sig)
|
|
324
|
+
|
|
325
|
+
# Verify argument name is not prefixed with "arg."
|
|
326
|
+
pat = r"define void @.*\(i32 %\"x\"\)"
|
|
327
|
+
match = re.compile(pat).search(llvm_ir)
|
|
328
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
|
329
|
+
pat = r"define void @.*\(i32 %\"arg\.x\"\)"
|
|
330
|
+
match = re.compile(pat).search(llvm_ir)
|
|
331
|
+
self.assertIsNone(match, msg=llvm_ir)
|
|
332
|
+
|
|
333
|
+
def test_llvm_dbg_value(self):
|
|
334
|
+
sig = (types.int32, types.int32)
|
|
335
|
+
|
|
336
|
+
@cuda.jit("void(int32, int32)", debug=True, opt=False)
|
|
337
|
+
def f(x, y):
|
|
338
|
+
z1 = x # noqa: F841
|
|
339
|
+
z2 = 100 # noqa: F841
|
|
340
|
+
z3 = y # noqa: F841
|
|
341
|
+
z4 = True # noqa: F841
|
|
342
|
+
|
|
343
|
+
llvm_ir = f.inspect_llvm(sig)
|
|
344
|
+
# Verify the call to llvm.dbg.declare is replaced by llvm.dbg.value
|
|
345
|
+
pat1 = r'call void @"llvm.dbg.declare"'
|
|
346
|
+
match = re.compile(pat1).search(llvm_ir)
|
|
347
|
+
self.assertIsNone(match, msg=llvm_ir)
|
|
348
|
+
pat2 = r'call void @"llvm.dbg.value"'
|
|
349
|
+
match = re.compile(pat2).search(llvm_ir)
|
|
350
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
|
351
|
+
|
|
352
|
+
def test_llvm_dbg_value_range(self):
|
|
353
|
+
sig = (types.int64,)
|
|
354
|
+
|
|
355
|
+
@cuda.jit("void(int64,)", debug=True, opt=False)
|
|
356
|
+
def foo(x):
|
|
357
|
+
"""
|
|
358
|
+
CHECK: store i1 true, i1* %"second.1"
|
|
359
|
+
CHECK: call void @"llvm.dbg.value"
|
|
360
|
+
CHECK: store i1 true, i1* %"second.2"
|
|
361
|
+
CHECK: call void @"llvm.dbg.value"
|
|
362
|
+
|
|
363
|
+
CHECK: %[[VAL_1:.*]] = load i1, i1* %"second.2"
|
|
364
|
+
CHECK: %[[VAL_2:.*]] = load i1, i1* %[[VAL_3:.*]]
|
|
365
|
+
CHECK: store i1 %[[VAL_1]], i1* %[[VAL_3]]
|
|
366
|
+
CHECK: call void @"llvm.dbg.value"(metadata i1 %[[VAL_1]], metadata ![[VAL_4:[0-9]+]]
|
|
367
|
+
|
|
368
|
+
CHECK: ![[VAL_4]] = !DILocalVariable{{.+}}name: "second"
|
|
369
|
+
"""
|
|
370
|
+
if x > 0:
|
|
371
|
+
second = x > 10
|
|
372
|
+
else:
|
|
373
|
+
second = True
|
|
374
|
+
if second:
|
|
375
|
+
pass
|
|
376
|
+
|
|
377
|
+
ir = foo.inspect_llvm()[sig]
|
|
378
|
+
self.assertFileCheckMatches(ir, foo.__doc__)
|
|
379
|
+
|
|
380
|
+
def test_no_user_var_alias(self):
|
|
381
|
+
sig = (types.int32, types.int32)
|
|
382
|
+
|
|
383
|
+
@cuda.jit("void(int32, int32)", debug=True, opt=False)
|
|
384
|
+
def f(x, y):
|
|
385
|
+
z = x # noqa: F841
|
|
386
|
+
z = y # noqa: F841
|
|
387
|
+
|
|
388
|
+
llvm_ir = f.inspect_llvm(sig)
|
|
389
|
+
pat = r'!DILocalVariable.*name:\s+"z\$1".*'
|
|
390
|
+
match = re.compile(pat).search(llvm_ir)
|
|
391
|
+
self.assertIsNone(match, msg=llvm_ir)
|
|
392
|
+
|
|
393
|
+
def test_no_literal_type(self):
|
|
394
|
+
sig = (types.int32,)
|
|
395
|
+
|
|
396
|
+
@cuda.jit("void(int32)", debug=True, opt=False)
|
|
397
|
+
def f(x):
|
|
398
|
+
z = x # noqa: F841
|
|
399
|
+
z = 100 # noqa: F841
|
|
400
|
+
z = True # noqa: F841
|
|
401
|
+
|
|
402
|
+
llvm_ir = f.inspect_llvm(sig)
|
|
403
|
+
pat = r'!DIBasicType.*name:\s+"Literal.*'
|
|
404
|
+
match = re.compile(pat).search(llvm_ir)
|
|
405
|
+
self.assertIsNone(match, msg=llvm_ir)
|
|
406
|
+
|
|
407
|
+
@unittest.skipIf(
|
|
408
|
+
config.CUDA_DEBUG_POLY, "Uses old union format, not variant_part"
|
|
409
|
+
)
|
|
410
|
+
def test_union_poly_types(self):
|
|
411
|
+
sig = (types.int32, types.int32)
|
|
412
|
+
|
|
413
|
+
@cuda.jit("void(int32, int32)", debug=True, opt=False)
|
|
414
|
+
def f(x, y):
|
|
415
|
+
foo = 100 # noqa: F841
|
|
416
|
+
foo = 2.34 # noqa: F841
|
|
417
|
+
foo = True # noqa: F841
|
|
418
|
+
foo = 200 # noqa: F841
|
|
419
|
+
|
|
420
|
+
llvm_ir = f.inspect_llvm(sig)
|
|
421
|
+
# Extract the type node id
|
|
422
|
+
pat1 = r'!DILocalVariable\(.*name: "foo".*type: !(\d+)\)'
|
|
423
|
+
match = re.compile(pat1).search(llvm_ir)
|
|
424
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
|
425
|
+
mdnode_id = match.group(1)
|
|
426
|
+
# Verify the union type and extract the elements node id
|
|
427
|
+
pat2 = rf"!{mdnode_id} = distinct !DICompositeType\(elements: !(\d+),.*size: 64, tag: DW_TAG_union_type\)" # noqa: E501
|
|
428
|
+
match = re.compile(pat2).search(llvm_ir)
|
|
429
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
|
430
|
+
mdnode_id = match.group(1)
|
|
431
|
+
# Extract the member node ids
|
|
432
|
+
pat3 = r"!{ !(\d+), !(\d+), !(\d+) }"
|
|
433
|
+
match = re.compile(pat3).search(llvm_ir)
|
|
434
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
|
435
|
+
mdnode_id1 = match.group(1)
|
|
436
|
+
mdnode_id2 = match.group(2)
|
|
437
|
+
mdnode_id3 = match.group(3)
|
|
438
|
+
# Verify the member nodes
|
|
439
|
+
pat4 = rf'!{mdnode_id1} = !DIDerivedType(.*name: "_bool", size: 8, tag: DW_TAG_member)' # noqa: E501
|
|
440
|
+
match = re.compile(pat4).search(llvm_ir)
|
|
441
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
|
442
|
+
pat5 = rf'!{mdnode_id2} = !DIDerivedType(.*name: "_float64", size: 64, tag: DW_TAG_member)' # noqa: E501
|
|
443
|
+
match = re.compile(pat5).search(llvm_ir)
|
|
444
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
|
445
|
+
pat6 = rf'!{mdnode_id3} = !DIDerivedType(.*name: "_int64", size: 64, tag: DW_TAG_member)' # noqa: E501
|
|
446
|
+
match = re.compile(pat6).search(llvm_ir)
|
|
447
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
|
448
|
+
|
|
449
|
+
def test_union_debug(self):
|
|
450
|
+
@cuda.jit("void(u8, int64[::1])", debug=True, opt=False)
|
|
451
|
+
def a_union_use_case(arg, results):
|
|
452
|
+
foo = 1
|
|
453
|
+
foo = arg
|
|
454
|
+
if foo < 1:
|
|
455
|
+
foo = 2
|
|
456
|
+
return
|
|
457
|
+
bar = foo == 0
|
|
458
|
+
results[0] = 1 if not bar else 0
|
|
459
|
+
|
|
460
|
+
with captured_stdout() as out:
|
|
461
|
+
results = cuda.to_device(np.zeros(16, dtype=np.int64))
|
|
462
|
+
a_union_use_case[1, 1](100, results)
|
|
463
|
+
print(results.copy_to_host())
|
|
464
|
+
expected = "[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]"
|
|
465
|
+
self.assertIn(expected, out.getvalue())
|
|
466
|
+
|
|
467
|
+
@unittest.skipUnless(config.CUDA_DEBUG_POLY, "CUDA_DEBUG_POLY not enabled")
|
|
468
|
+
def test_poly_variant_part(self):
|
|
469
|
+
"""Test polymorphic variables with DW_TAG_variant_part.
|
|
470
|
+
|
|
471
|
+
This test verifies that when CUDA_DEBUG_POLY is enabled,
|
|
472
|
+
polymorphic variables generate proper DWARF5 variant_part
|
|
473
|
+
debug information with discriminator and variant members.
|
|
474
|
+
"""
|
|
475
|
+
# Typed constant: i8 0, i8 1, etc. | Node reference: !123, !456
|
|
476
|
+
if config.CUDA_DEBUG_POLY_USE_TYPED_CONST:
|
|
477
|
+
extradata_pattern = "i8 {{[0-9]+}}"
|
|
478
|
+
else:
|
|
479
|
+
extradata_pattern = "{{![0-9]+}}"
|
|
480
|
+
|
|
481
|
+
@cuda.jit("void()", debug=True, opt=False)
|
|
482
|
+
def f():
|
|
483
|
+
foo = 100 # noqa: F841
|
|
484
|
+
foo = 3.14 # noqa: F841
|
|
485
|
+
foo = True # noqa: F841
|
|
486
|
+
foo = np.int32(42) # noqa: F841
|
|
487
|
+
|
|
488
|
+
llvm_ir = f.inspect_llvm()[tuple()]
|
|
489
|
+
|
|
490
|
+
# Build FileCheck pattern dynamically based on config
|
|
491
|
+
# Capture node IDs and verify the hierarchical structure
|
|
492
|
+
check_pattern = """
|
|
493
|
+
CHECK-DAG: !DILocalVariable({{.*}}name: "foo"{{.*}}type: [[WRAPPER:![0-9]+]]
|
|
494
|
+
CHECK-DAG: [[WRAPPER]] = !DICompositeType({{.*}}elements: [[ELEMENTS:![0-9]+]]{{.*}}name: "variant_wrapper_struct"{{.*}}size: 128{{.*}}tag: DW_TAG_structure_type)
|
|
495
|
+
CHECK-DAG: [[ELEMENTS]] = !{ [[DISC:![0-9]+]], [[VPART:![0-9]+]] }
|
|
496
|
+
CHECK-DAG: [[DISC]] = !DIDerivedType({{.*}}name: "discriminator-{{[0-9]+}}"{{.*}}size: 8{{.*}}tag: DW_TAG_member)
|
|
497
|
+
CHECK-DAG: [[VPART]] = !DICompositeType({{.*}}discriminator: [[DISC]]{{.*}}elements: [[VMEMBERS:![0-9]+]]{{.*}}tag: DW_TAG_variant_part)
|
|
498
|
+
CHECK-DAG: [[VMEMBERS]] = !{ [[VM1:![0-9]+]], [[VM2:![0-9]+]], [[VM3:![0-9]+]], [[VM4:![0-9]+]] }
|
|
499
|
+
CHECK-DAG: [[VM1]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_bool"{{.*}}offset: 8{{.*}}tag: DW_TAG_member)
|
|
500
|
+
CHECK-DAG: [[VM2]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_float64"{{.*}}offset: 64{{.*}}tag: DW_TAG_member)
|
|
501
|
+
CHECK-DAG: [[VM3]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_int32"{{.*}}offset: 32{{.*}}tag: DW_TAG_member)
|
|
502
|
+
CHECK-DAG: [[VM4]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_int64"{{.*}}offset: 64{{.*}}tag: DW_TAG_member)
|
|
503
|
+
""".replace("EXTRADATA", extradata_pattern)
|
|
504
|
+
|
|
505
|
+
self.assertFileCheckMatches(llvm_ir, check_pattern)
|
|
506
|
+
|
|
507
|
+
def test_DW_LANG(self):
|
|
508
|
+
@cuda.jit(debug=True, opt=False)
|
|
509
|
+
def foo():
|
|
510
|
+
"""
|
|
511
|
+
CHECK: distinct !DICompileUnit
|
|
512
|
+
CHECK-SAME: emissionKind: FullDebug
|
|
513
|
+
CHECK-SAME: isOptimized: true
|
|
514
|
+
CHECK-SAME: language: DW_LANG_C_plus_plus
|
|
515
|
+
CHECK-SAME: producer: "clang (Numba)"
|
|
516
|
+
"""
|
|
517
|
+
pass
|
|
518
|
+
|
|
519
|
+
foo[1, 1]()
|
|
520
|
+
|
|
521
|
+
llvm_ir = foo.inspect_llvm()[tuple()]
|
|
522
|
+
self.assertFileCheckMatches(llvm_ir, foo.__doc__)
|
|
523
|
+
|
|
524
|
+
def test_DILocation(self):
|
|
525
|
+
"""Tests that DILocation information is reasonable.
|
|
526
|
+
|
|
527
|
+
The kernel `foo` produces LLVM like:
|
|
528
|
+
define function() {
|
|
529
|
+
entry:
|
|
530
|
+
alloca
|
|
531
|
+
store 0 to alloca
|
|
532
|
+
<arithmetic for doing the operations on b, c, d>
|
|
533
|
+
setup for print
|
|
534
|
+
branch
|
|
535
|
+
other_labels:
|
|
536
|
+
... <elided>
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
The following checks that:
|
|
540
|
+
* the alloca and store have no !dbg
|
|
541
|
+
* the arithmetic occurs in the order defined and with !dbg
|
|
542
|
+
* that the !dbg entries are monotonically increasing in value with
|
|
543
|
+
source line number
|
|
544
|
+
"""
|
|
545
|
+
sig = (types.float64,)
|
|
546
|
+
|
|
547
|
+
@cuda.jit(sig, debug=True, opt=False)
|
|
548
|
+
def foo(a):
|
|
549
|
+
"""
|
|
550
|
+
CHECK-LABEL: define void @{{.+}}foo
|
|
551
|
+
CHECK: entry:
|
|
552
|
+
|
|
553
|
+
CHECK: %[[VAL_0:.*]] = alloca double
|
|
554
|
+
CHECK-NOT: !dbg
|
|
555
|
+
CHECK: store double 0.0, double* %[[VAL_0]]
|
|
556
|
+
CHECK-NOT: !dbg
|
|
557
|
+
CHECK: %[[VAL_1:.*]] = alloca double
|
|
558
|
+
CHECK-NOT: !dbg
|
|
559
|
+
CHECK: store double 0.0, double* %[[VAL_1]]
|
|
560
|
+
CHECK-NOT: !dbg
|
|
561
|
+
CHECK: %[[VAL_2:.*]] = alloca double
|
|
562
|
+
CHECK-NOT: !dbg
|
|
563
|
+
CHECK: store double 0.0, double* %[[VAL_2]]
|
|
564
|
+
CHECK-NOT: !dbg
|
|
565
|
+
CHECK: %[[VAL_3:.*]] = alloca double
|
|
566
|
+
CHECK-NOT: !dbg
|
|
567
|
+
CHECK: store double 0.0, double* %[[VAL_3]]
|
|
568
|
+
CHECK-NOT: !dbg
|
|
569
|
+
CHECK: %[[VAL_4:.*]] = alloca double
|
|
570
|
+
CHECK-NOT: !dbg
|
|
571
|
+
CHECK: store double 0.0, double* %[[VAL_4]]
|
|
572
|
+
CHECK-NOT: !dbg
|
|
573
|
+
CHECK: %[[VAL_5:.*]] = alloca double
|
|
574
|
+
CHECK-NOT: !dbg
|
|
575
|
+
CHECK: store double 0.0, double* %[[VAL_5]]
|
|
576
|
+
CHECK-NOT: !dbg
|
|
577
|
+
CHECK: %[[VAL_6:.*]] = alloca i8*
|
|
578
|
+
CHECK-NOT: !dbg
|
|
579
|
+
CHECK: store i8* null, i8** %[[VAL_6]]
|
|
580
|
+
CHECK-NOT: !dbg
|
|
581
|
+
CHECK: %[[VAL_7:.*]] = alloca i8*
|
|
582
|
+
CHECK-NOT: !dbg
|
|
583
|
+
CHECK: store i8* null, i8** %[[VAL_7]]
|
|
584
|
+
CHECK-NOT: !dbg
|
|
585
|
+
|
|
586
|
+
CHECK: br label %"[[ENTRY:.+]]"
|
|
587
|
+
CHECK-NOT: !dbg
|
|
588
|
+
CHECK: [[ENTRY]]:
|
|
589
|
+
|
|
590
|
+
CHECK: fadd{{.+}} !dbg ![[DBGADD:[0-9]+]]
|
|
591
|
+
CHECK: fmul{{.+}} !dbg ![[DBGMUL:[0-9]+]]
|
|
592
|
+
CHECK: fdiv{{.+}} !dbg ![[DBGDIV:[0-9]+]]
|
|
593
|
+
|
|
594
|
+
CHECK: ![[DBGADD]] = !DILocation
|
|
595
|
+
CHECK: ![[DBGMUL]] = !DILocation
|
|
596
|
+
CHECK: ![[DBGDIV]] = !DILocation
|
|
597
|
+
"""
|
|
598
|
+
b = a + 1.23
|
|
599
|
+
c = b * 2.34
|
|
600
|
+
a = b / c
|
|
601
|
+
|
|
602
|
+
ir = foo.inspect_llvm()[sig]
|
|
603
|
+
self.assertFileCheckMatches(ir, foo.__doc__)
|
|
604
|
+
|
|
605
|
+
def test_missing_source(self):
|
|
606
|
+
strsrc = """
|
|
607
|
+
def foo():
|
|
608
|
+
pass
|
|
609
|
+
"""
|
|
610
|
+
l = dict()
|
|
611
|
+
exec(dedent(strsrc), {}, l)
|
|
612
|
+
foo = cuda.jit(debug=True, opt=False)(l["foo"])
|
|
613
|
+
|
|
614
|
+
with warnings.catch_warnings(record=True) as w:
|
|
615
|
+
warnings.simplefilter("always", NumbaDebugInfoWarning)
|
|
616
|
+
ignore_internal_warnings()
|
|
617
|
+
foo[1, 1]()
|
|
618
|
+
|
|
619
|
+
self.assertEqual(len(w), 1)
|
|
620
|
+
found = w[0]
|
|
621
|
+
self.assertEqual(found.category, NumbaDebugInfoWarning)
|
|
622
|
+
msg = str(found.message)
|
|
623
|
+
# make sure the warning contains the right message
|
|
624
|
+
self.assertIn("Could not find source for function", msg)
|
|
625
|
+
# and refers to the offending function
|
|
626
|
+
self.assertIn(str(foo.py_func), msg)
|
|
627
|
+
|
|
628
|
+
def test_no_if_op_bools_declared(self):
|
|
629
|
+
@cuda.jit(
|
|
630
|
+
"int64(boolean, boolean)",
|
|
631
|
+
debug=True,
|
|
632
|
+
opt=False,
|
|
633
|
+
_dbg_optnone=True,
|
|
634
|
+
device=True,
|
|
635
|
+
)
|
|
636
|
+
def choice(cond1, cond2):
|
|
637
|
+
"""
|
|
638
|
+
CHECK: define void @{{.+}}choices
|
|
639
|
+
"""
|
|
640
|
+
if cond1 and cond2:
|
|
641
|
+
return 1
|
|
642
|
+
else:
|
|
643
|
+
return 2
|
|
644
|
+
|
|
645
|
+
ir_content = choice.inspect_llvm()[choice.signatures[0]]
|
|
646
|
+
# We should not declare variables used as the condition in if ops.
|
|
647
|
+
# See Numba PR #9888: https://github.com/numba/numba/pull/9888
|
|
648
|
+
|
|
649
|
+
for line in ir_content.splitlines():
|
|
650
|
+
if "llvm.dbg.declare" in line:
|
|
651
|
+
self.assertNotIn("bool", line)
|
|
652
|
+
|
|
653
|
+
def test_llvm_inliner_flag_conflict(self):
|
|
654
|
+
# bar will be marked as 'alwaysinline', but when DEBUGINFO_DEFAULT is
|
|
655
|
+
# set functions are not marked as 'alwaysinline' and this results in a
|
|
656
|
+
# conflict. baz will not be marked as 'alwaysinline' as a result of
|
|
657
|
+
# DEBUGINFO_DEFAULT
|
|
658
|
+
|
|
659
|
+
@cuda.jit(forceinline=True)
|
|
660
|
+
def bar(x):
|
|
661
|
+
return math.sin(x)
|
|
662
|
+
|
|
663
|
+
@cuda.jit(forceinline=False)
|
|
664
|
+
def baz(x):
|
|
665
|
+
return math.cos(x)
|
|
666
|
+
|
|
667
|
+
@cuda.jit(opt=True)
|
|
668
|
+
def foo(x, y):
|
|
669
|
+
"""
|
|
670
|
+
CHECK-LABEL: define void @{{.+}}foo
|
|
671
|
+
CHECK: call i32 @"[[BAR:.+]]"(
|
|
672
|
+
CHECK: call i32 @"[[BAZ:.+]]"(
|
|
673
|
+
|
|
674
|
+
CHECK-DAG: declare i32 @"[[BAR]]"({{.+}}alwaysinline
|
|
675
|
+
CHECK-DAG: declare i32 @"[[BAZ]]"(
|
|
676
|
+
CHECK-DAG: define linkonce_odr i32 @"[[BAR]]"({{.+}}alwaysinline
|
|
677
|
+
CHECK-DAG: define linkonce_odr i32 @"[[BAZ]]"(
|
|
678
|
+
"""
|
|
679
|
+
a = bar(y)
|
|
680
|
+
b = baz(y)
|
|
681
|
+
x[0] = a + b
|
|
682
|
+
|
|
683
|
+
# check it compiles
|
|
684
|
+
with override_config("DEBUGINFO_DEFAULT", 1):
|
|
685
|
+
result = cuda.device_array(1, dtype=np.float32)
|
|
686
|
+
foo[1, 1](result, np.pi)
|
|
687
|
+
result.copy_to_host()
|
|
688
|
+
|
|
689
|
+
result_host = math.sin(np.pi) + math.cos(np.pi)
|
|
690
|
+
self.assertPreciseEqual(result[0], result_host)
|
|
691
|
+
|
|
692
|
+
ir_content = foo.inspect_llvm()[foo.signatures[0]]
|
|
693
|
+
self.assertFileCheckMatches(ir_content, foo.__doc__)
|
|
694
|
+
|
|
695
|
+
# Check that the device functions call the appropriate device
|
|
696
|
+
# math functions and have the correct attributes.
|
|
697
|
+
self.assertFileCheckMatches(
|
|
698
|
+
ir_content,
|
|
699
|
+
"""
|
|
700
|
+
CHECK: define linkonce_odr i32 @{{.+}}bar
|
|
701
|
+
CHECK-SAME: alwaysinline
|
|
702
|
+
CHECK-NEXT: {
|
|
703
|
+
CHECK-NEXT: {{.*}}:
|
|
704
|
+
CHECK-NEXT: br label {{.*}}
|
|
705
|
+
CHECK-NEXT: {{.*}}:
|
|
706
|
+
CHECK-NEXT: call double @"__nv_sin"
|
|
707
|
+
CHECK-NEXT: store double {{.*}}, double* {{.*}}
|
|
708
|
+
CHECK-NEXT: ret i32 0
|
|
709
|
+
CHECK-NEXT: }
|
|
710
|
+
""",
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
self.assertFileCheckMatches(
|
|
714
|
+
ir_content,
|
|
715
|
+
"""
|
|
716
|
+
CHECK: define linkonce_odr i32 @{{.+}}baz
|
|
717
|
+
CHECK-NOT: alwaysinline
|
|
718
|
+
CHECK-NEXT: {
|
|
719
|
+
CHECK-NEXT: {{.*}}:
|
|
720
|
+
CHECK-NEXT: br label {{.*}}
|
|
721
|
+
CHECK-NEXT: {{.*}}:
|
|
722
|
+
CHECK-NEXT: call double @"__nv_cos"
|
|
723
|
+
CHECK-NEXT: store double {{.*}}, double* {{.*}}
|
|
724
|
+
CHECK-NEXT: ret i32 0
|
|
725
|
+
CHECK-NEXT: }
|
|
726
|
+
""",
|
|
727
|
+
)
|
|
728
|
+
|
|
729
|
+
def test_DILocation_versioned_variables(self):
|
|
730
|
+
"""Tests that DILocation information for versions of variables matches
|
|
731
|
+
up to their definition site."""
|
|
732
|
+
|
|
733
|
+
@cuda.jit(debug=True, opt=False)
|
|
734
|
+
def foo(dest, n):
|
|
735
|
+
"""
|
|
736
|
+
CHECK: define void @{{.+}}foo
|
|
737
|
+
CHECK: store i64 5, i64* %"c{{.+}} !dbg ![[STORE5:.+]]
|
|
738
|
+
CHECK: store i64 1, i64* %"c{{.+}} !dbg ![[STORE1:.+]]
|
|
739
|
+
CHECK: [[STORE5]] = !DILocation(
|
|
740
|
+
CHECK: [[STORE1]] = !DILocation(
|
|
741
|
+
"""
|
|
742
|
+
if n:
|
|
743
|
+
c = 5
|
|
744
|
+
else:
|
|
745
|
+
c = 1
|
|
746
|
+
dest[0] = c
|
|
747
|
+
|
|
748
|
+
foo_source_lines, foo_source_lineno = inspect.getsourcelines(
|
|
749
|
+
foo.py_func
|
|
750
|
+
)
|
|
751
|
+
|
|
752
|
+
result = cuda.device_array(1, dtype=np.int32)
|
|
753
|
+
foo[1, 1](result, 1)
|
|
754
|
+
result.copy_to_host()
|
|
755
|
+
self.assertEqual(result[0], 5)
|
|
756
|
+
|
|
757
|
+
ir_content = foo.inspect_llvm()[foo.signatures[0]]
|
|
758
|
+
self.assertFileCheckMatches(ir_content, foo.__doc__)
|
|
759
|
+
|
|
760
|
+
# Collect lines pertaining to the function `foo` and debuginfo
|
|
761
|
+
# metadata
|
|
762
|
+
lines = ir_content.splitlines()
|
|
763
|
+
debuginfo_equals = re.compile(r"!(\d+) = ")
|
|
764
|
+
debug_info_lines = list(
|
|
765
|
+
filter(lambda x: debuginfo_equals.search(x), lines)
|
|
766
|
+
)
|
|
767
|
+
|
|
768
|
+
function_start_regex = re.compile(r"define void @.+foo")
|
|
769
|
+
function_start_lines = list(
|
|
770
|
+
filter(
|
|
771
|
+
lambda x: function_start_regex.search(x[1]), enumerate(lines)
|
|
772
|
+
)
|
|
773
|
+
)
|
|
774
|
+
function_end_lines = list(
|
|
775
|
+
filter(lambda x: x[1] == "}", enumerate(lines))
|
|
776
|
+
)
|
|
777
|
+
foo_ir_lines = lines[
|
|
778
|
+
function_start_lines[0][0] : function_end_lines[0][0]
|
|
779
|
+
]
|
|
780
|
+
|
|
781
|
+
# Check the if condition's debuginfo
|
|
782
|
+
cond_branch = list(filter(lambda x: "br i1" in x, foo_ir_lines))
|
|
783
|
+
self.assertEqual(len(cond_branch), 1)
|
|
784
|
+
self.assertIn("!dbg", cond_branch[0])
|
|
785
|
+
cond_branch_dbginfo_node = cond_branch[0].split("!dbg")[1].strip()
|
|
786
|
+
cond_branch_dbginfos = list(
|
|
787
|
+
filter(
|
|
788
|
+
lambda x: cond_branch_dbginfo_node + " = " in x,
|
|
789
|
+
debug_info_lines,
|
|
790
|
+
)
|
|
791
|
+
)
|
|
792
|
+
self.assertEqual(len(cond_branch_dbginfos), 1)
|
|
793
|
+
cond_branch_dbginfo = cond_branch_dbginfos[0]
|
|
794
|
+
|
|
795
|
+
# Check debuginfo for the store instructions
|
|
796
|
+
store_1_lines = list(filter(lambda x: "store i64 1" in x, foo_ir_lines))
|
|
797
|
+
store_5_lines = list(filter(lambda x: "store i64 5" in x, foo_ir_lines))
|
|
798
|
+
|
|
799
|
+
self.assertEqual(len(store_1_lines), 2)
|
|
800
|
+
self.assertEqual(len(store_5_lines), 2)
|
|
801
|
+
|
|
802
|
+
store_1_dbginfo_set = set(
|
|
803
|
+
map(lambda x: x.split("!dbg")[1].strip(), store_1_lines)
|
|
804
|
+
)
|
|
805
|
+
store_5_dbginfo_set = set(
|
|
806
|
+
map(lambda x: x.split("!dbg")[1].strip(), store_5_lines)
|
|
807
|
+
)
|
|
808
|
+
self.assertEqual(len(store_1_dbginfo_set), 1)
|
|
809
|
+
self.assertEqual(len(store_5_dbginfo_set), 1)
|
|
810
|
+
store_1_dbginfo_node = store_1_dbginfo_set.pop()
|
|
811
|
+
store_5_dbginfo_node = store_5_dbginfo_set.pop()
|
|
812
|
+
store_1_dbginfos = list(
|
|
813
|
+
filter(
|
|
814
|
+
lambda x: store_1_dbginfo_node + " = " in x, debug_info_lines
|
|
815
|
+
)
|
|
816
|
+
)
|
|
817
|
+
store_5_dbginfos = list(
|
|
818
|
+
filter(
|
|
819
|
+
lambda x: store_5_dbginfo_node + " = " in x, debug_info_lines
|
|
820
|
+
)
|
|
821
|
+
)
|
|
822
|
+
self.assertEqual(len(store_1_dbginfos), 1)
|
|
823
|
+
self.assertEqual(len(store_5_dbginfos), 1)
|
|
824
|
+
store_1_dbginfo = store_1_dbginfos[0]
|
|
825
|
+
store_5_dbginfo = store_5_dbginfos[0]
|
|
826
|
+
|
|
827
|
+
# Ensure the line numbers match what we expect based on the Python source
|
|
828
|
+
line_number_regex = re.compile(r"line: (\d+)")
|
|
829
|
+
LineNumbers = namedtuple(
|
|
830
|
+
"LineNumbers", ["cond_branch", "store_5", "store_1"]
|
|
831
|
+
)
|
|
832
|
+
line_number_matches = LineNumbers(
|
|
833
|
+
*map(
|
|
834
|
+
lambda x: line_number_regex.search(x),
|
|
835
|
+
[cond_branch_dbginfo, store_5_dbginfo, store_1_dbginfo],
|
|
836
|
+
)
|
|
837
|
+
)
|
|
838
|
+
self.assertTrue(
|
|
839
|
+
all(
|
|
840
|
+
map(
|
|
841
|
+
lambda x: x is not None,
|
|
842
|
+
line_number_matches,
|
|
843
|
+
)
|
|
844
|
+
)
|
|
845
|
+
)
|
|
846
|
+
line_numbers = LineNumbers(
|
|
847
|
+
*map(
|
|
848
|
+
lambda x: int(x.group(1)),
|
|
849
|
+
line_number_matches,
|
|
850
|
+
)
|
|
851
|
+
)
|
|
852
|
+
source_line_numbers = LineNumbers(
|
|
853
|
+
*map(
|
|
854
|
+
lambda x: x[0] + foo_source_lineno,
|
|
855
|
+
filter(
|
|
856
|
+
lambda x: "c = " in x[1] or "if n:" in x[1],
|
|
857
|
+
enumerate(foo_source_lines),
|
|
858
|
+
),
|
|
859
|
+
)
|
|
860
|
+
)
|
|
861
|
+
self.assertEqual(line_numbers, source_line_numbers)
|
|
862
|
+
|
|
863
|
+
def test_debuginfo_asm(self):
|
|
864
|
+
def foo():
|
|
865
|
+
pass
|
|
866
|
+
|
|
867
|
+
foo_debug = cuda.jit(debug=True, opt=False)(foo)
|
|
868
|
+
foo_debug[1, 1]()
|
|
869
|
+
asm = foo_debug.inspect_asm()[foo_debug.signatures[0]]
|
|
870
|
+
self.assertFileCheckMatches(
|
|
871
|
+
asm,
|
|
872
|
+
"""
|
|
873
|
+
CHECK: .section{{.+}}.debug
|
|
874
|
+
""",
|
|
875
|
+
)
|
|
876
|
+
|
|
877
|
+
foo_nodebug = cuda.jit(debug=False)(foo)
|
|
878
|
+
foo_nodebug[1, 1]()
|
|
879
|
+
asm = foo_nodebug.inspect_asm()[foo_nodebug.signatures[0]]
|
|
880
|
+
self.assertFileCheckMatches(
|
|
881
|
+
asm,
|
|
882
|
+
"""
|
|
883
|
+
CHECK-NOT: .section{{.+}}.debug
|
|
884
|
+
""",
|
|
885
|
+
)
|
|
886
|
+
|
|
887
|
+
|
|
888
|
+
if __name__ == "__main__":
|
|
889
|
+
unittest.main()
|