numba-cuda 0.19.0__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- _numba_cuda_redirector.pth +3 -0
- _numba_cuda_redirector.py +3 -0
- numba_cuda/VERSION +1 -1
- numba_cuda/__init__.py +2 -1
- numba_cuda/_version.py +2 -13
- numba_cuda/numba/cuda/__init__.py +4 -1
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +12708 -1469
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +2656 -8769
- numba_cuda/numba/cuda/api.py +9 -1
- numba_cuda/numba/cuda/api_util.py +3 -0
- numba_cuda/numba/cuda/args.py +3 -0
- numba_cuda/numba/cuda/bf16.py +288 -2
- numba_cuda/numba/cuda/cg.py +3 -0
- numba_cuda/numba/cuda/cgutils.py +5 -2
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +4 -1
- numba_cuda/numba/cuda/compiler.py +376 -30
- numba_cuda/numba/cuda/core/analysis.py +319 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
- numba_cuda/numba/cuda/core/base.py +1289 -0
- numba_cuda/numba/cuda/core/bytecode.py +727 -0
- numba_cuda/numba/cuda/core/caching.py +5 -2
- numba_cuda/numba/cuda/core/callconv.py +3 -0
- numba_cuda/numba/cuda/core/codegen.py +3 -0
- numba_cuda/numba/cuda/core/compiler.py +9 -14
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +747 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/cpu.py +370 -0
- numba_cuda/numba/cuda/core/environment.py +68 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
- numba_cuda/numba/cuda/core/interpreter.py +52 -27
- numba_cuda/numba/cuda/core/ir_utils.py +17 -29
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
- numba_cuda/numba/cuda/core/sigutils.py +3 -0
- numba_cuda/numba/cuda/core/ssa.py +496 -0
- numba_cuda/numba/cuda/core/targetconfig.py +329 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +952 -0
- numba_cuda/numba/cuda/core/typed_passes.py +741 -7
- numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
- numba_cuda/numba/cuda/cuda_paths.py +425 -246
- numba_cuda/numba/cuda/cudadecl.py +4 -1
- numba_cuda/numba/cuda/cudadrv/__init__.py +4 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +5 -1
- numba_cuda/numba/cuda/cudadrv/devices.py +3 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +14 -140
- numba_cuda/numba/cuda/cudadrv/drvapi.py +3 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +114 -24
- numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
- numba_cuda/numba/cuda/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +8 -5
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +3 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +4 -1
- numba_cuda/numba/cuda/cudadrv/ndarray.py +3 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +22 -8
- numba_cuda/numba/cuda/cudadrv/nvvm.py +4 -4
- numba_cuda/numba/cuda/cudadrv/rtapi.py +3 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +4 -1
- numba_cuda/numba/cuda/cudaimpl.py +8 -1
- numba_cuda/numba/cuda/cudamath.py +3 -0
- numba_cuda/numba/cuda/debuginfo.py +88 -2
- numba_cuda/numba/cuda/decorators.py +6 -3
- numba_cuda/numba/cuda/descriptor.py +6 -4
- numba_cuda/numba/cuda/device_init.py +3 -0
- numba_cuda/numba/cuda/deviceufunc.py +69 -2
- numba_cuda/numba/cuda/dispatcher.py +21 -39
- numba_cuda/numba/cuda/errors.py +10 -0
- numba_cuda/numba/cuda/extending.py +3 -0
- numba_cuda/numba/cuda/flags.py +143 -1
- numba_cuda/numba/cuda/fp16.py +3 -2
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +4 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -0
- numba_cuda/numba/cuda/intrinsics.py +3 -0
- numba_cuda/numba/cuda/itanium_mangler.py +3 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +3 -0
- numba_cuda/numba/cuda/kernels/transpose.py +3 -0
- numba_cuda/numba/cuda/libdevice.py +4 -0
- numba_cuda/numba/cuda/libdevicedecl.py +3 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +3 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -0
- numba_cuda/numba/cuda/locks.py +3 -0
- numba_cuda/numba/cuda/lowering.py +59 -159
- numba_cuda/numba/cuda/mathimpl.py +5 -1
- numba_cuda/numba/cuda/memory_management/__init__.py +3 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +5 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +48 -18
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/models.py +12 -1
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
- numba_cuda/numba/cuda/np/numpy_support.py +553 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
- numba_cuda/numba/cuda/nvvmutils.py +4 -1
- numba_cuda/numba/cuda/printimpl.py +15 -1
- numba_cuda/numba/cuda/random.py +4 -1
- numba_cuda/numba/cuda/reshape_funcs.cu +5 -0
- numba_cuda/numba/cuda/serialize.py +4 -1
- numba_cuda/numba/cuda/simulator/__init__.py +4 -1
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +4 -1
- numba_cuda/numba/cuda/simulator/bf16.py +3 -0
- numba_cuda/numba/cuda/simulator/compiler.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +4 -1
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +4 -0
- numba_cuda/numba/cuda/simulator/kernel.py +3 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +4 -1
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +17 -2
- numba_cuda/numba/cuda/simulator/reduction.py +3 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +3 -0
- numba_cuda/numba/cuda/simulator_init.py +3 -0
- numba_cuda/numba/cuda/stubs.py +3 -0
- numba_cuda/numba/cuda/target.py +38 -17
- numba_cuda/numba/cuda/testing.py +7 -19
- numba_cuda/numba/cuda/tests/__init__.py +4 -1
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/complex_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +3 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +7 -4
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +9 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +21 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +5 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +5 -1
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +542 -2
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +84 -1
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +314 -3
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +5 -1
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +21 -8
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +13 -37
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +266 -2
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +115 -6
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +4 -1
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +3 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +5 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +5 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +5 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +5 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +6 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +3 -0
- numba_cuda/numba/cuda/tests/enum_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +6 -1
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +27 -12
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +3 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +5 -1
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +3 -0
- numba_cuda/numba/cuda/tests/support.py +58 -15
- numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +3 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -1
- numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu +5 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +5 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +5 -0
- numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
- numba_cuda/numba/cuda/types.py +59 -0
- numba_cuda/numba/cuda/typing/__init__.py +12 -1
- numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
- numba_cuda/numba/cuda/typing/context.py +751 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/npydecl.py +658 -0
- numba_cuda/numba/cuda/typing/templates.py +10 -14
- numba_cuda/numba/cuda/ufuncs.py +6 -3
- numba_cuda/numba/cuda/utils.py +9 -112
- numba_cuda/numba/cuda/vector_types.py +3 -0
- numba_cuda/numba/cuda/vectorizers.py +3 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/METADATA +6 -2
- numba_cuda-0.20.0.dist-info/RECORD +357 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE +1 -0
- numba_cuda-0.20.0.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -57
- numba_cuda-0.19.0.dist-info/RECORD +0 -301
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/top_level.txt +0 -0
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import math
|
|
2
5
|
import itertools
|
|
3
6
|
|
|
@@ -34,7 +37,7 @@ from numba.cuda.tests.complex_usecases import (
|
|
|
34
37
|
sinh_usecase,
|
|
35
38
|
tanh_usecase,
|
|
36
39
|
)
|
|
37
|
-
from numba.np import numpy_support
|
|
40
|
+
from numba.cuda.np import numpy_support
|
|
38
41
|
|
|
39
42
|
|
|
40
43
|
def compile_scalar_func(pyfunc, argtypes, restype):
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import numpy as np
|
|
2
5
|
|
|
3
6
|
from numba import cuda, complex64, int32, float64
|
|
4
7
|
from numba.cuda.testing import unittest, CUDATestCase
|
|
5
|
-
from numba.core.config import ENABLE_CUDASIM
|
|
8
|
+
from numba.cuda.core.config import ENABLE_CUDASIM
|
|
6
9
|
|
|
7
10
|
CONST_EMPTY = np.array([])
|
|
8
11
|
CONST1D = np.arange(10, dtype=np.float64) / 2.0
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
from __future__ import print_function
|
|
2
5
|
|
|
3
6
|
import os
|
|
@@ -6,7 +9,8 @@ import cffi
|
|
|
6
9
|
|
|
7
10
|
import numpy as np
|
|
8
11
|
|
|
9
|
-
from numba import
|
|
12
|
+
from numba import cuda, int32
|
|
13
|
+
from numba.cuda import config
|
|
10
14
|
from numba.types import CPointer
|
|
11
15
|
from numba.cuda.testing import (
|
|
12
16
|
unittest,
|
|
@@ -14,7 +18,6 @@ from numba.cuda.testing import (
|
|
|
14
18
|
skip_on_cudasim,
|
|
15
19
|
skip_unless_cc_60,
|
|
16
20
|
skip_if_cudadevrt_missing,
|
|
17
|
-
skip_if_mvc_enabled,
|
|
18
21
|
)
|
|
19
22
|
from numba.core.typing import signature
|
|
20
23
|
|
|
@@ -60,7 +63,6 @@ def sequential_rows(M):
|
|
|
60
63
|
|
|
61
64
|
|
|
62
65
|
@skip_if_cudadevrt_missing
|
|
63
|
-
@skip_if_mvc_enabled("CG not supported with MVC")
|
|
64
66
|
class TestCudaCooperativeGroups(CUDATestCase):
|
|
65
67
|
@skip_unless_cc_60
|
|
66
68
|
def test_this_grid(self):
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Copyright (c) 2017 Intel Corporation
|
|
4
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
from numba.core import types, ir, config
|
|
8
|
+
from numba.cuda import compiler
|
|
9
|
+
from numba.cuda.core.annotations import type_annotations
|
|
10
|
+
from numba.cuda.core.ir_utils import (
|
|
11
|
+
copy_propagate,
|
|
12
|
+
apply_copy_propagate,
|
|
13
|
+
get_name_var_table,
|
|
14
|
+
)
|
|
15
|
+
from numba.cuda.core.typed_passes import type_inference_stage
|
|
16
|
+
from numba.cuda.testing import CUDATestCase, skip_on_cudasim
|
|
17
|
+
import unittest
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _test_will_propagate(b, z, w):
|
|
21
|
+
x = 3
|
|
22
|
+
x1 = x
|
|
23
|
+
if b > 0:
|
|
24
|
+
y = z + w # noqa: F821
|
|
25
|
+
else:
|
|
26
|
+
y = 0 # noqa: F841
|
|
27
|
+
a = 2 * x1
|
|
28
|
+
return a < b
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _test_wont_propagate(b, z, w):
|
|
32
|
+
x = 3
|
|
33
|
+
if b > 0:
|
|
34
|
+
y = z + w # noqa: F841
|
|
35
|
+
x = 1
|
|
36
|
+
else:
|
|
37
|
+
y = 0 # noqa: F841
|
|
38
|
+
a = 2 * x
|
|
39
|
+
return a < b
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _in_list_var(list_var, var):
|
|
43
|
+
for i in list_var:
|
|
44
|
+
if i.name == var:
|
|
45
|
+
return True
|
|
46
|
+
return False
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _find_assign(func_ir, var):
|
|
50
|
+
for label, block in func_ir.blocks.items():
|
|
51
|
+
for i, inst in enumerate(block.body):
|
|
52
|
+
if isinstance(inst, ir.Assign) and inst.target.name != var:
|
|
53
|
+
all_var = inst.list_vars()
|
|
54
|
+
if _in_list_var(all_var, var):
|
|
55
|
+
return True
|
|
56
|
+
|
|
57
|
+
return False
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@skip_on_cudasim("cudasim doesn't support run_frontend")
|
|
61
|
+
class TestCopyPropagate(CUDATestCase):
|
|
62
|
+
def test1(self):
|
|
63
|
+
from numba.cuda.descriptor import cuda_target
|
|
64
|
+
|
|
65
|
+
typingctx = cuda_target.typing_context
|
|
66
|
+
targetctx = cuda_target.target_context
|
|
67
|
+
test_ir = compiler.run_frontend(_test_will_propagate)
|
|
68
|
+
typingctx.refresh()
|
|
69
|
+
targetctx.refresh()
|
|
70
|
+
args = (types.int64, types.int64, types.int64)
|
|
71
|
+
typemap, return_type, calltypes, _ = type_inference_stage(
|
|
72
|
+
typingctx, targetctx, test_ir, args, None
|
|
73
|
+
)
|
|
74
|
+
_ = type_annotations.TypeAnnotation(
|
|
75
|
+
func_ir=test_ir,
|
|
76
|
+
typemap=typemap,
|
|
77
|
+
calltypes=calltypes,
|
|
78
|
+
lifted=(),
|
|
79
|
+
lifted_from=None,
|
|
80
|
+
args=args,
|
|
81
|
+
return_type=return_type,
|
|
82
|
+
html_output=config.HTML,
|
|
83
|
+
)
|
|
84
|
+
in_cps, out_cps = copy_propagate(test_ir.blocks, typemap)
|
|
85
|
+
_ = apply_copy_propagate(
|
|
86
|
+
test_ir.blocks,
|
|
87
|
+
in_cps,
|
|
88
|
+
get_name_var_table(test_ir.blocks),
|
|
89
|
+
typemap,
|
|
90
|
+
calltypes,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
self.assertFalse(_find_assign(test_ir, "x1"))
|
|
94
|
+
|
|
95
|
+
def test2(self):
|
|
96
|
+
from numba.cuda.descriptor import cuda_target
|
|
97
|
+
|
|
98
|
+
typingctx = cuda_target.typing_context
|
|
99
|
+
targetctx = cuda_target.target_context
|
|
100
|
+
test_ir = compiler.run_frontend(_test_wont_propagate)
|
|
101
|
+
typingctx.refresh()
|
|
102
|
+
targetctx.refresh()
|
|
103
|
+
args = (types.int64, types.int64, types.int64)
|
|
104
|
+
typemap, return_type, calltypes, _ = type_inference_stage(
|
|
105
|
+
typingctx, targetctx, test_ir, args, None
|
|
106
|
+
)
|
|
107
|
+
_ = type_annotations.TypeAnnotation(
|
|
108
|
+
func_ir=test_ir,
|
|
109
|
+
typemap=typemap,
|
|
110
|
+
calltypes=calltypes,
|
|
111
|
+
lifted=(),
|
|
112
|
+
lifted_from=None,
|
|
113
|
+
args=args,
|
|
114
|
+
return_type=return_type,
|
|
115
|
+
html_output=config.HTML,
|
|
116
|
+
)
|
|
117
|
+
in_cps, out_cps = copy_propagate(test_ir.blocks, typemap)
|
|
118
|
+
_ = apply_copy_propagate(
|
|
119
|
+
test_ir.blocks,
|
|
120
|
+
in_cps,
|
|
121
|
+
get_name_var_table(test_ir.blocks),
|
|
122
|
+
typemap,
|
|
123
|
+
calltypes,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
self.assertTrue(_find_assign(test_ir, "x"))
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
if __name__ == "__main__":
|
|
130
|
+
unittest.main()
|
|
@@ -1,7 +1,10 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import numpy as np
|
|
2
5
|
|
|
3
6
|
from numba import cuda, vectorize, guvectorize
|
|
4
|
-
from numba.np.numpy_support import from_dtype
|
|
7
|
+
from numba.cuda.np.numpy_support import from_dtype
|
|
5
8
|
from numba.cuda.testing import CUDATestCase, skip_on_cudasim
|
|
6
9
|
import unittest
|
|
7
10
|
|
|
@@ -1,6 +1,9 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import numpy as np
|
|
2
5
|
|
|
3
|
-
from numba.
|
|
6
|
+
from numba.cuda.utils import PYVERSION
|
|
4
7
|
from numba.cuda.testing import skip_on_cudasim, CUDATestCase
|
|
5
8
|
from numba.cuda.tests.support import (
|
|
6
9
|
override_config,
|
|
@@ -1,11 +1,22 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from collections import namedtuple
|
|
1
5
|
from numba.cuda.tests.support import override_config, captured_stdout
|
|
2
6
|
from numba.cuda.testing import skip_on_cudasim
|
|
3
7
|
from numba import cuda
|
|
4
8
|
from numba.core import types
|
|
5
9
|
from numba.cuda.testing import CUDATestCase
|
|
10
|
+
from textwrap import dedent
|
|
11
|
+
import math
|
|
6
12
|
import itertools
|
|
7
13
|
import re
|
|
8
14
|
import unittest
|
|
15
|
+
import warnings
|
|
16
|
+
from numba.core.errors import NumbaDebugInfoWarning
|
|
17
|
+
from numba.cuda.tests.support import ignore_internal_warnings
|
|
18
|
+
import numpy as np
|
|
19
|
+
import inspect
|
|
9
20
|
|
|
10
21
|
|
|
11
22
|
@skip_on_cudasim("Simulator does not produce debug dumps")
|
|
@@ -26,7 +37,7 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
|
26
37
|
assertfn(match, msg=asm)
|
|
27
38
|
|
|
28
39
|
def test_no_debuginfo_in_asm(self):
|
|
29
|
-
@cuda.jit(debug=False)
|
|
40
|
+
@cuda.jit(debug=False, opt=False)
|
|
30
41
|
def foo(x):
|
|
31
42
|
x[0] = 1
|
|
32
43
|
|
|
@@ -403,8 +414,26 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
|
403
414
|
match = re.compile(pat6).search(llvm_ir)
|
|
404
415
|
self.assertIsNotNone(match, msg=llvm_ir)
|
|
405
416
|
|
|
417
|
+
def test_union_debug(self):
|
|
418
|
+
@cuda.jit("void(u8, int64[::1])", debug=True, opt=False)
|
|
419
|
+
def a_union_use_case(arg, results):
|
|
420
|
+
foo = 1
|
|
421
|
+
foo = arg
|
|
422
|
+
if foo < 1:
|
|
423
|
+
foo = 2
|
|
424
|
+
return
|
|
425
|
+
bar = foo == 0
|
|
426
|
+
results[0] = 1 if not bar else 0
|
|
427
|
+
|
|
428
|
+
with captured_stdout() as out:
|
|
429
|
+
results = cuda.to_device(np.zeros(16, dtype=np.int64))
|
|
430
|
+
a_union_use_case[1, 1](100, results)
|
|
431
|
+
print(results.copy_to_host())
|
|
432
|
+
expected = "[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]"
|
|
433
|
+
self.assertIn(expected, out.getvalue())
|
|
434
|
+
|
|
406
435
|
def test_DW_LANG(self):
|
|
407
|
-
@cuda.jit(debug=True)
|
|
436
|
+
@cuda.jit(debug=True, opt=False)
|
|
408
437
|
def foo():
|
|
409
438
|
"""
|
|
410
439
|
CHECK: distinct !DICompileUnit
|
|
@@ -443,7 +472,7 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
|
443
472
|
"""
|
|
444
473
|
sig = (types.float64,)
|
|
445
474
|
|
|
446
|
-
@cuda.jit(sig, debug=True)
|
|
475
|
+
@cuda.jit(sig, debug=True, opt=False)
|
|
447
476
|
def foo(a):
|
|
448
477
|
"""
|
|
449
478
|
CHECK-LABEL: define void @{{.+}}foo
|
|
@@ -501,6 +530,288 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
|
501
530
|
ir = foo.inspect_llvm()[sig]
|
|
502
531
|
self.assertFileCheckMatches(ir, foo.__doc__)
|
|
503
532
|
|
|
533
|
+
def test_missing_source(self):
|
|
534
|
+
strsrc = """
|
|
535
|
+
def foo():
|
|
536
|
+
pass
|
|
537
|
+
"""
|
|
538
|
+
l = dict()
|
|
539
|
+
exec(dedent(strsrc), {}, l)
|
|
540
|
+
foo = cuda.jit(debug=True, opt=False)(l["foo"])
|
|
541
|
+
|
|
542
|
+
with warnings.catch_warnings(record=True) as w:
|
|
543
|
+
warnings.simplefilter("always", NumbaDebugInfoWarning)
|
|
544
|
+
ignore_internal_warnings()
|
|
545
|
+
foo[1, 1]()
|
|
546
|
+
|
|
547
|
+
self.assertEqual(len(w), 1)
|
|
548
|
+
found = w[0]
|
|
549
|
+
self.assertEqual(found.category, NumbaDebugInfoWarning)
|
|
550
|
+
msg = str(found.message)
|
|
551
|
+
# make sure the warning contains the right message
|
|
552
|
+
self.assertIn("Could not find source for function", msg)
|
|
553
|
+
# and refers to the offending function
|
|
554
|
+
self.assertIn(str(foo.py_func), msg)
|
|
555
|
+
|
|
556
|
+
def test_no_if_op_bools_declared(self):
|
|
557
|
+
@cuda.jit(
|
|
558
|
+
"int64(boolean, boolean)",
|
|
559
|
+
debug=True,
|
|
560
|
+
opt=False,
|
|
561
|
+
_dbg_optnone=True,
|
|
562
|
+
device=True,
|
|
563
|
+
)
|
|
564
|
+
def choice(cond1, cond2):
|
|
565
|
+
"""
|
|
566
|
+
CHECK: define void @{{.+}}choices
|
|
567
|
+
"""
|
|
568
|
+
if cond1 and cond2:
|
|
569
|
+
return 1
|
|
570
|
+
else:
|
|
571
|
+
return 2
|
|
572
|
+
|
|
573
|
+
ir_content = choice.inspect_llvm()[choice.signatures[0]]
|
|
574
|
+
# We should not declare variables used as the condition in if ops.
|
|
575
|
+
# See Numba PR #9888: https://github.com/numba/numba/pull/9888
|
|
576
|
+
|
|
577
|
+
for line in ir_content.splitlines():
|
|
578
|
+
if "llvm.dbg.declare" in line:
|
|
579
|
+
self.assertNotIn("bool", line)
|
|
580
|
+
|
|
581
|
+
def test_llvm_inliner_flag_conflict(self):
|
|
582
|
+
# bar will be marked as 'alwaysinline', but when DEBUGINFO_DEFAULT is
|
|
583
|
+
# set functions are not marked as 'alwaysinline' and this results in a
|
|
584
|
+
# conflict. baz will not be marked as 'alwaysinline' as a result of
|
|
585
|
+
# DEBUGINFO_DEFAULT
|
|
586
|
+
|
|
587
|
+
@cuda.jit(forceinline=True)
|
|
588
|
+
def bar(x):
|
|
589
|
+
return math.sin(x)
|
|
590
|
+
|
|
591
|
+
@cuda.jit(forceinline=False)
|
|
592
|
+
def baz(x):
|
|
593
|
+
return math.cos(x)
|
|
594
|
+
|
|
595
|
+
@cuda.jit(opt=True)
|
|
596
|
+
def foo(x, y):
|
|
597
|
+
"""
|
|
598
|
+
CHECK-LABEL: define void @{{.+}}foo
|
|
599
|
+
CHECK: call i32 @"[[BAR:.+]]"(
|
|
600
|
+
CHECK: call i32 @"[[BAZ:.+]]"(
|
|
601
|
+
|
|
602
|
+
CHECK-DAG: declare i32 @"[[BAR]]"({{.+}}alwaysinline
|
|
603
|
+
CHECK-DAG: declare i32 @"[[BAZ]]"(
|
|
604
|
+
CHECK-DAG: define linkonce_odr i32 @"[[BAR]]"({{.+}}alwaysinline
|
|
605
|
+
CHECK-DAG: define linkonce_odr i32 @"[[BAZ]]"(
|
|
606
|
+
"""
|
|
607
|
+
a = bar(y)
|
|
608
|
+
b = baz(y)
|
|
609
|
+
x[0] = a + b
|
|
610
|
+
|
|
611
|
+
# check it compiles
|
|
612
|
+
with override_config("DEBUGINFO_DEFAULT", 1):
|
|
613
|
+
result = cuda.device_array(1, dtype=np.float32)
|
|
614
|
+
foo[1, 1](result, np.pi)
|
|
615
|
+
result.copy_to_host()
|
|
616
|
+
|
|
617
|
+
result_host = math.sin(np.pi) + math.cos(np.pi)
|
|
618
|
+
self.assertPreciseEqual(result[0], result_host)
|
|
619
|
+
|
|
620
|
+
ir_content = foo.inspect_llvm()[foo.signatures[0]]
|
|
621
|
+
self.assertFileCheckMatches(ir_content, foo.__doc__)
|
|
622
|
+
|
|
623
|
+
# Check that the device functions call the appropriate device
|
|
624
|
+
# math functions and have the correct attributes.
|
|
625
|
+
self.assertFileCheckMatches(
|
|
626
|
+
ir_content,
|
|
627
|
+
"""
|
|
628
|
+
CHECK: define linkonce_odr i32 @{{.+}}bar
|
|
629
|
+
CHECK-SAME: alwaysinline
|
|
630
|
+
CHECK-NEXT: {
|
|
631
|
+
CHECK-NEXT: {{.*}}:
|
|
632
|
+
CHECK-NEXT: br label {{.*}}
|
|
633
|
+
CHECK-NEXT: {{.*}}:
|
|
634
|
+
CHECK-NEXT: call double @"__nv_sin"
|
|
635
|
+
CHECK-NEXT: store double {{.*}}, double* {{.*}}
|
|
636
|
+
CHECK-NEXT: ret i32 0
|
|
637
|
+
CHECK-NEXT: }
|
|
638
|
+
""",
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
self.assertFileCheckMatches(
|
|
642
|
+
ir_content,
|
|
643
|
+
"""
|
|
644
|
+
CHECK: define linkonce_odr i32 @{{.+}}baz
|
|
645
|
+
CHECK-NOT: alwaysinline
|
|
646
|
+
CHECK-NEXT: {
|
|
647
|
+
CHECK-NEXT: {{.*}}:
|
|
648
|
+
CHECK-NEXT: br label {{.*}}
|
|
649
|
+
CHECK-NEXT: {{.*}}:
|
|
650
|
+
CHECK-NEXT: call double @"__nv_cos"
|
|
651
|
+
CHECK-NEXT: store double {{.*}}, double* {{.*}}
|
|
652
|
+
CHECK-NEXT: ret i32 0
|
|
653
|
+
CHECK-NEXT: }
|
|
654
|
+
""",
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
def test_DILocation_versioned_variables(self):
|
|
658
|
+
"""Tests that DILocation information for versions of variables matches
|
|
659
|
+
up to their definition site."""
|
|
660
|
+
|
|
661
|
+
@cuda.jit(debug=True, opt=False)
|
|
662
|
+
def foo(dest, n):
|
|
663
|
+
"""
|
|
664
|
+
CHECK: define void @{{.+}}foo
|
|
665
|
+
CHECK: store i64 5, i64* %"c{{.+}} !dbg ![[STORE5:.+]]
|
|
666
|
+
CHECK: store i64 1, i64* %"c{{.+}} !dbg ![[STORE1:.+]]
|
|
667
|
+
CHECK: [[STORE5]] = !DILocation(
|
|
668
|
+
CHECK: [[STORE1]] = !DILocation(
|
|
669
|
+
"""
|
|
670
|
+
if n:
|
|
671
|
+
c = 5
|
|
672
|
+
else:
|
|
673
|
+
c = 1
|
|
674
|
+
dest[0] = c
|
|
675
|
+
|
|
676
|
+
foo_source_lines, foo_source_lineno = inspect.getsourcelines(
|
|
677
|
+
foo.py_func
|
|
678
|
+
)
|
|
679
|
+
|
|
680
|
+
result = cuda.device_array(1, dtype=np.int32)
|
|
681
|
+
foo[1, 1](result, 1)
|
|
682
|
+
result.copy_to_host()
|
|
683
|
+
self.assertEqual(result[0], 5)
|
|
684
|
+
|
|
685
|
+
ir_content = foo.inspect_llvm()[foo.signatures[0]]
|
|
686
|
+
self.assertFileCheckMatches(ir_content, foo.__doc__)
|
|
687
|
+
|
|
688
|
+
# Collect lines pertaining to the function `foo` and debuginfo
|
|
689
|
+
# metadata
|
|
690
|
+
lines = ir_content.splitlines()
|
|
691
|
+
debuginfo_equals = re.compile(r"!(\d+) = ")
|
|
692
|
+
debug_info_lines = list(
|
|
693
|
+
filter(lambda x: debuginfo_equals.search(x), lines)
|
|
694
|
+
)
|
|
695
|
+
|
|
696
|
+
function_start_regex = re.compile(r"define void @.+foo")
|
|
697
|
+
function_start_lines = list(
|
|
698
|
+
filter(
|
|
699
|
+
lambda x: function_start_regex.search(x[1]), enumerate(lines)
|
|
700
|
+
)
|
|
701
|
+
)
|
|
702
|
+
function_end_lines = list(
|
|
703
|
+
filter(lambda x: x[1] == "}", enumerate(lines))
|
|
704
|
+
)
|
|
705
|
+
foo_ir_lines = lines[
|
|
706
|
+
function_start_lines[0][0] : function_end_lines[0][0]
|
|
707
|
+
]
|
|
708
|
+
|
|
709
|
+
# Check the if condition's debuginfo
|
|
710
|
+
cond_branch = list(filter(lambda x: "br i1" in x, foo_ir_lines))
|
|
711
|
+
self.assertEqual(len(cond_branch), 1)
|
|
712
|
+
self.assertIn("!dbg", cond_branch[0])
|
|
713
|
+
cond_branch_dbginfo_node = cond_branch[0].split("!dbg")[1].strip()
|
|
714
|
+
cond_branch_dbginfos = list(
|
|
715
|
+
filter(
|
|
716
|
+
lambda x: cond_branch_dbginfo_node + " = " in x,
|
|
717
|
+
debug_info_lines,
|
|
718
|
+
)
|
|
719
|
+
)
|
|
720
|
+
self.assertEqual(len(cond_branch_dbginfos), 1)
|
|
721
|
+
cond_branch_dbginfo = cond_branch_dbginfos[0]
|
|
722
|
+
|
|
723
|
+
# Check debuginfo for the store instructions
|
|
724
|
+
store_1_lines = list(filter(lambda x: "store i64 1" in x, foo_ir_lines))
|
|
725
|
+
store_5_lines = list(filter(lambda x: "store i64 5" in x, foo_ir_lines))
|
|
726
|
+
|
|
727
|
+
self.assertEqual(len(store_1_lines), 2)
|
|
728
|
+
self.assertEqual(len(store_5_lines), 2)
|
|
729
|
+
|
|
730
|
+
store_1_dbginfo_set = set(
|
|
731
|
+
map(lambda x: x.split("!dbg")[1].strip(), store_1_lines)
|
|
732
|
+
)
|
|
733
|
+
store_5_dbginfo_set = set(
|
|
734
|
+
map(lambda x: x.split("!dbg")[1].strip(), store_5_lines)
|
|
735
|
+
)
|
|
736
|
+
self.assertEqual(len(store_1_dbginfo_set), 1)
|
|
737
|
+
self.assertEqual(len(store_5_dbginfo_set), 1)
|
|
738
|
+
store_1_dbginfo_node = store_1_dbginfo_set.pop()
|
|
739
|
+
store_5_dbginfo_node = store_5_dbginfo_set.pop()
|
|
740
|
+
store_1_dbginfos = list(
|
|
741
|
+
filter(
|
|
742
|
+
lambda x: store_1_dbginfo_node + " = " in x, debug_info_lines
|
|
743
|
+
)
|
|
744
|
+
)
|
|
745
|
+
store_5_dbginfos = list(
|
|
746
|
+
filter(
|
|
747
|
+
lambda x: store_5_dbginfo_node + " = " in x, debug_info_lines
|
|
748
|
+
)
|
|
749
|
+
)
|
|
750
|
+
self.assertEqual(len(store_1_dbginfos), 1)
|
|
751
|
+
self.assertEqual(len(store_5_dbginfos), 1)
|
|
752
|
+
store_1_dbginfo = store_1_dbginfos[0]
|
|
753
|
+
store_5_dbginfo = store_5_dbginfos[0]
|
|
754
|
+
|
|
755
|
+
# Ensure the line numbers match what we expect based on the Python source
|
|
756
|
+
line_number_regex = re.compile(r"line: (\d+)")
|
|
757
|
+
LineNumbers = namedtuple(
|
|
758
|
+
"LineNumbers", ["cond_branch", "store_5", "store_1"]
|
|
759
|
+
)
|
|
760
|
+
line_number_matches = LineNumbers(
|
|
761
|
+
*map(
|
|
762
|
+
lambda x: line_number_regex.search(x),
|
|
763
|
+
[cond_branch_dbginfo, store_5_dbginfo, store_1_dbginfo],
|
|
764
|
+
)
|
|
765
|
+
)
|
|
766
|
+
self.assertTrue(
|
|
767
|
+
all(
|
|
768
|
+
map(
|
|
769
|
+
lambda x: x is not None,
|
|
770
|
+
line_number_matches,
|
|
771
|
+
)
|
|
772
|
+
)
|
|
773
|
+
)
|
|
774
|
+
line_numbers = LineNumbers(
|
|
775
|
+
*map(
|
|
776
|
+
lambda x: int(x.group(1)),
|
|
777
|
+
line_number_matches,
|
|
778
|
+
)
|
|
779
|
+
)
|
|
780
|
+
source_line_numbers = LineNumbers(
|
|
781
|
+
*map(
|
|
782
|
+
lambda x: x[0] + foo_source_lineno,
|
|
783
|
+
filter(
|
|
784
|
+
lambda x: "c = " in x[1] or "if n:" in x[1],
|
|
785
|
+
enumerate(foo_source_lines),
|
|
786
|
+
),
|
|
787
|
+
)
|
|
788
|
+
)
|
|
789
|
+
self.assertEqual(line_numbers, source_line_numbers)
|
|
790
|
+
|
|
791
|
+
def test_debuginfo_asm(self):
|
|
792
|
+
def foo():
|
|
793
|
+
pass
|
|
794
|
+
|
|
795
|
+
foo_debug = cuda.jit(debug=True, opt=False)(foo)
|
|
796
|
+
foo_debug[1, 1]()
|
|
797
|
+
asm = foo_debug.inspect_asm()[foo_debug.signatures[0]]
|
|
798
|
+
self.assertFileCheckMatches(
|
|
799
|
+
asm,
|
|
800
|
+
"""
|
|
801
|
+
CHECK: .section{{.+}}.debug
|
|
802
|
+
""",
|
|
803
|
+
)
|
|
804
|
+
|
|
805
|
+
foo_nodebug = cuda.jit(debug=False)(foo)
|
|
806
|
+
foo_nodebug[1, 1]()
|
|
807
|
+
asm = foo_nodebug.inspect_asm()[foo_nodebug.signatures[0]]
|
|
808
|
+
self.assertFileCheckMatches(
|
|
809
|
+
asm,
|
|
810
|
+
"""
|
|
811
|
+
CHECK-NOT: .section{{.+}}.debug
|
|
812
|
+
""",
|
|
813
|
+
)
|
|
814
|
+
|
|
504
815
|
|
|
505
816
|
if __name__ == "__main__":
|
|
506
817
|
unittest.main()
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import numba.cuda as cuda
|
|
2
5
|
from numba.cuda.testing import CUDATestCase, skip_on_cudasim
|
|
3
6
|
import llvmlite
|
|
@@ -465,7 +468,7 @@ class TestCudaDebugInfoTypes(CUDATestCase):
|
|
|
465
468
|
f"Test DITypes for {sanitize_name(numba_type.name)}"
|
|
466
469
|
):
|
|
467
470
|
|
|
468
|
-
@cuda.jit((numba_type,), debug=True)
|
|
471
|
+
@cuda.jit((numba_type,), debug=True, opt=False)
|
|
469
472
|
def foo(a):
|
|
470
473
|
pass
|
|
471
474
|
|
|
@@ -1,10 +1,12 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
from numba.cuda.cudadrv.driver import CudaAPIError
|
|
2
5
|
import numpy as np
|
|
3
6
|
import threading
|
|
4
7
|
|
|
5
8
|
from numba import (
|
|
6
9
|
boolean,
|
|
7
|
-
config,
|
|
8
10
|
cuda,
|
|
9
11
|
float32,
|
|
10
12
|
float64,
|
|
@@ -14,6 +16,7 @@ from numba import (
|
|
|
14
16
|
uint32,
|
|
15
17
|
void,
|
|
16
18
|
)
|
|
19
|
+
from numba.cuda import config
|
|
17
20
|
from numba.core.errors import TypingError
|
|
18
21
|
from numba.cuda.testing import (
|
|
19
22
|
cc_X_or_above,
|
|
@@ -1,7 +1,10 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
from numba import cuda
|
|
2
5
|
from numba.core.errors import TypingError
|
|
3
6
|
from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
|
|
4
|
-
from numba import config
|
|
7
|
+
from numba.cuda import config
|
|
5
8
|
|
|
6
9
|
|
|
7
10
|
def noop(x):
|