numba-cuda 0.5.0__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/PKG-INFO +1 -1
- numba_cuda-0.6.0/numba_cuda/VERSION +1 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/compiler.py +78 -2
- numba_cuda-0.6.0/numba_cuda/numba/cuda/debuginfo.py +44 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/dispatcher.py +58 -11
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/target.py +4 -134
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +81 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +125 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nrt/test_nrt.py +4 -1
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +4 -1
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda.egg-info/PKG-INFO +1 -1
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda.egg-info/SOURCES.txt +1 -0
- numba_cuda-0.5.0/numba_cuda/VERSION +0 -1
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/LICENSE +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/README.md +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/__init__.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/_version.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/__init__.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/api.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/api_util.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/args.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cg.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/codegen.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cuda_fp16.h +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cuda_fp16.hpp +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cuda_paths.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadecl.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/__init__.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/devicearray.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/devices.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/driver.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/drvapi.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/dummyarray.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/enums.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/error.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/libs.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/linkable_code.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/mappings.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/ndarray.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/nvrtc.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/nvvm.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/rtapi.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/runtime.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudaimpl.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudamath.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/decorators.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/descriptor.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/device_init.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/deviceufunc.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/errors.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/extending.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/initialize.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/intrinsics.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/kernels/__init__.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/kernels/reduction.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/kernels/transpose.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/libdevice.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/libdevicedecl.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/libdevicefuncs.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/libdeviceimpl.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/mathimpl.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/models.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/nvvmutils.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/printimpl.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/random.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/reshape_funcs.cu +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/runtime/__init__.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/runtime/memsys.cu +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/runtime/memsys.cuh +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/runtime/nrt.cu +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/runtime/nrt.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/__init__.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/api.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/compiler.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/devices.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/driver.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/error.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/libs.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/kernel.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/kernelapi.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/reduction.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/vector_types.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator_init.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/stubs.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/testing.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/__init__.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_events.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_init.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_array.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_caching.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_casting.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_complex.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_debug.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_enums.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_errors.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_extending.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_forall.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_globals.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_lang.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_math.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_operator.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_overload.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_powi.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_print.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_random.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_sm.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_sync.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_warning.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudasim/__init__.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudasim/support.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/cuda_include.cu +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/error.cu +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/jitlink.cu +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/warn.cu +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/__init__.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_random.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/test_import.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nrt/mock_numpy.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/support.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/types.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/ufuncs.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/utils.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/vector_types.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/vectorizers.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda.egg-info/dependency_links.txt +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda.egg-info/requires.txt +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda.egg-info/top_level.txt +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/pyproject.toml +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/setup.cfg +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/setup.py +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/site-packages/_numba_cuda_redirector.pth +0 -0
- {numba_cuda-0.5.0 → numba_cuda-0.6.0}/site-packages/_numba_cuda_redirector.py +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
0.6.0
|
@@ -1,14 +1,17 @@
|
|
1
1
|
from llvmlite import ir
|
2
2
|
from numba.core.typing.templates import ConcreteTemplate
|
3
|
+
from numba.core import ir as numba_ir
|
3
4
|
from numba.core import (cgutils, types, typing, funcdesc, config, compiler,
|
4
5
|
sigutils, utils)
|
5
6
|
from numba.core.compiler import (sanitize_compile_result_entries, CompilerBase,
|
6
7
|
DefaultPassBuilder, Flags, Option,
|
7
8
|
CompileResult)
|
8
9
|
from numba.core.compiler_lock import global_compiler_lock
|
9
|
-
from numba.core.compiler_machinery import (LoweringPass,
|
10
|
+
from numba.core.compiler_machinery import (FunctionPass, LoweringPass,
|
10
11
|
PassManager, register_pass)
|
12
|
+
from numba.core.interpreter import Interpreter
|
11
13
|
from numba.core.errors import NumbaInvalidConfigWarning
|
14
|
+
from numba.core.untyped_passes import TranslateByteCode
|
12
15
|
from numba.core.typed_passes import (IRLegalization, NativeLowering,
|
13
16
|
AnnotateTypes)
|
14
17
|
from warnings import warn
|
@@ -143,13 +146,74 @@ class CreateLibrary(LoweringPass):
|
|
143
146
|
return True
|
144
147
|
|
145
148
|
|
149
|
+
class CUDABytecodeInterpreter(Interpreter):
|
150
|
+
# Based on the superclass implementation, but names the resulting variable
|
151
|
+
# "$bool<N>" instead of "bool<N>" - see Numba PR #9888:
|
152
|
+
# https://github.com/numba/numba/pull/9888
|
153
|
+
#
|
154
|
+
# This can be removed once that PR is available in an upstream Numba
|
155
|
+
# release.
|
156
|
+
def _op_JUMP_IF(self, inst, pred, iftrue):
|
157
|
+
brs = {
|
158
|
+
True: inst.get_jump_target(),
|
159
|
+
False: inst.next,
|
160
|
+
}
|
161
|
+
truebr = brs[iftrue]
|
162
|
+
falsebr = brs[not iftrue]
|
163
|
+
|
164
|
+
name = "$bool%s" % (inst.offset)
|
165
|
+
gv_fn = numba_ir.Global("bool", bool, loc=self.loc)
|
166
|
+
self.store(value=gv_fn, name=name)
|
167
|
+
|
168
|
+
callres = numba_ir.Expr.call(self.get(name), (self.get(pred),), (),
|
169
|
+
loc=self.loc)
|
170
|
+
|
171
|
+
pname = "$%spred" % (inst.offset)
|
172
|
+
predicate = self.store(value=callres, name=pname)
|
173
|
+
bra = numba_ir.Branch(cond=predicate, truebr=truebr, falsebr=falsebr,
|
174
|
+
loc=self.loc)
|
175
|
+
self.current_block.append(bra)
|
176
|
+
|
177
|
+
|
178
|
+
@register_pass(mutates_CFG=True, analysis_only=False)
|
179
|
+
class CUDATranslateBytecode(FunctionPass):
|
180
|
+
_name = "cuda_translate_bytecode"
|
181
|
+
|
182
|
+
def __init__(self):
|
183
|
+
FunctionPass.__init__(self)
|
184
|
+
|
185
|
+
def run_pass(self, state):
|
186
|
+
func_id = state['func_id']
|
187
|
+
bc = state['bc']
|
188
|
+
interp = CUDABytecodeInterpreter(func_id)
|
189
|
+
func_ir = interp.interpret(bc)
|
190
|
+
state['func_ir'] = func_ir
|
191
|
+
return True
|
192
|
+
|
193
|
+
|
146
194
|
class CUDACompiler(CompilerBase):
|
147
195
|
def define_pipelines(self):
|
148
196
|
dpb = DefaultPassBuilder
|
149
197
|
pm = PassManager('cuda')
|
150
198
|
|
151
199
|
untyped_passes = dpb.define_untyped_pipeline(self.state)
|
152
|
-
|
200
|
+
|
201
|
+
# Rather than replicating the whole untyped passes definition in
|
202
|
+
# numba-cuda, it seems cleaner to take the pass list and replace the
|
203
|
+
# TranslateBytecode pass with our own.
|
204
|
+
|
205
|
+
def replace_translate_pass(implementation, description):
|
206
|
+
if implementation is TranslateByteCode:
|
207
|
+
return (CUDATranslateBytecode, description)
|
208
|
+
else:
|
209
|
+
return (implementation, description)
|
210
|
+
|
211
|
+
cuda_untyped_passes = [
|
212
|
+
replace_translate_pass(implementation, description)
|
213
|
+
for implementation, description in untyped_passes.passes
|
214
|
+
]
|
215
|
+
|
216
|
+
pm.passes.extend(cuda_untyped_passes)
|
153
217
|
|
154
218
|
typed_passes = dpb.define_typed_pipeline(self.state)
|
155
219
|
pm.passes.extend(typed_passes.passes)
|
@@ -352,6 +416,18 @@ def kernel_fixup(kernel, debug):
|
|
352
416
|
kernel.return_value = ir.ReturnValue(kernel, ir.VoidType())
|
353
417
|
kernel.args = kernel.args[1:]
|
354
418
|
|
419
|
+
# If debug metadata is present, remove the return value from it
|
420
|
+
|
421
|
+
if kernel_metadata := getattr(kernel, 'metadata', None):
|
422
|
+
if dbg_metadata := kernel_metadata.get('dbg', None):
|
423
|
+
for name, value in dbg_metadata.operands:
|
424
|
+
if name == "type":
|
425
|
+
type_metadata = value
|
426
|
+
for tm_name, tm_value in type_metadata.operands:
|
427
|
+
if tm_name == 'types':
|
428
|
+
types = tm_value
|
429
|
+
types.operands = types.operands[1:]
|
430
|
+
|
355
431
|
# Mark as a kernel for NVVM
|
356
432
|
|
357
433
|
nvvm.set_cuda_kernel(kernel)
|
@@ -0,0 +1,44 @@
|
|
1
|
+
from llvmlite import ir
|
2
|
+
from numba.core import types
|
3
|
+
from numba.core.debuginfo import DIBuilder
|
4
|
+
from numba.cuda.types import GridGroup
|
5
|
+
|
6
|
+
_BYTE_SIZE = 8
|
7
|
+
|
8
|
+
|
9
|
+
class CUDADIBuilder(DIBuilder):
|
10
|
+
|
11
|
+
def _var_type(self, lltype, size, datamodel=None):
|
12
|
+
is_bool = False
|
13
|
+
is_grid_group = False
|
14
|
+
|
15
|
+
if isinstance(lltype, ir.IntType):
|
16
|
+
if datamodel is None:
|
17
|
+
if size == 1:
|
18
|
+
name = str(lltype)
|
19
|
+
is_bool = True
|
20
|
+
else:
|
21
|
+
name = str(datamodel.fe_type)
|
22
|
+
if isinstance(datamodel.fe_type, types.Boolean):
|
23
|
+
is_bool = True
|
24
|
+
elif isinstance(datamodel.fe_type, GridGroup):
|
25
|
+
is_grid_group = True
|
26
|
+
|
27
|
+
if is_bool or is_grid_group:
|
28
|
+
m = self.module
|
29
|
+
bitsize = _BYTE_SIZE * size
|
30
|
+
# Boolean type workaround until upstream Numba is fixed
|
31
|
+
if is_bool:
|
32
|
+
ditok = "DW_ATE_boolean"
|
33
|
+
# GridGroup type should use numba.cuda implementation
|
34
|
+
elif is_grid_group:
|
35
|
+
ditok = "DW_ATE_unsigned"
|
36
|
+
|
37
|
+
return m.add_debug_info('DIBasicType', {
|
38
|
+
'name': name,
|
39
|
+
'size': bitsize,
|
40
|
+
'encoding': ir.DIToken(ditok),
|
41
|
+
})
|
42
|
+
|
43
|
+
# For other cases, use upstream Numba implementation
|
44
|
+
return super()._var_type(lltype, size, datamodel=datamodel)
|
@@ -4,8 +4,9 @@ import re
|
|
4
4
|
import sys
|
5
5
|
import ctypes
|
6
6
|
import functools
|
7
|
+
from collections import defaultdict
|
7
8
|
|
8
|
-
from numba.core import config, serialize, sigutils, types, typing, utils
|
9
|
+
from numba.core import config, ir, serialize, sigutils, types, typing, utils
|
9
10
|
from numba.core.caching import Cache, CacheImpl
|
10
11
|
from numba.core.compiler_lock import global_compiler_lock
|
11
12
|
from numba.core.dispatcher import Dispatcher
|
@@ -42,6 +43,55 @@ cuda_fp16_math_funcs = ['hsin', 'hcos',
|
|
42
43
|
reshape_funcs = ['nocopy_empty_reshape', 'numba_attempt_nocopy_reshape']
|
43
44
|
|
44
45
|
|
46
|
+
def get_cres_link_objects(cres):
|
47
|
+
"""Given a compile result, return a set of all linkable code objects that
|
48
|
+
are required for it to be fully linked."""
|
49
|
+
|
50
|
+
link_objects = set()
|
51
|
+
|
52
|
+
# List of calls into declared device functions
|
53
|
+
device_func_calls = [
|
54
|
+
(name, v) for name, v in cres.fndesc.typemap.items() if (
|
55
|
+
isinstance(v, cuda_types.CUDADispatcher)
|
56
|
+
)
|
57
|
+
]
|
58
|
+
|
59
|
+
# List of tuples with SSA name of calls and corresponding signature
|
60
|
+
call_signatures = [
|
61
|
+
(call.func.name, sig)
|
62
|
+
for call, sig in cres.fndesc.calltypes.items() if (
|
63
|
+
isinstance(call, ir.Expr) and call.op == 'call'
|
64
|
+
)
|
65
|
+
]
|
66
|
+
|
67
|
+
# Map SSA names to all invoked signatures
|
68
|
+
call_signature_d = defaultdict(list)
|
69
|
+
for name, sig in call_signatures:
|
70
|
+
call_signature_d[name].append(sig)
|
71
|
+
|
72
|
+
# Add the link objects from the current function's callees
|
73
|
+
for name, v in device_func_calls:
|
74
|
+
for sig in call_signature_d.get(name, []):
|
75
|
+
called_cres = v.dispatcher.overloads[sig.args]
|
76
|
+
called_link_objects = get_cres_link_objects(called_cres)
|
77
|
+
link_objects.update(called_link_objects)
|
78
|
+
|
79
|
+
# From this point onwards, we are only interested in ExternFunction
|
80
|
+
# declarations - these are the calls made directly in this function to
|
81
|
+
# them.
|
82
|
+
for name, v in cres.fndesc.typemap.items():
|
83
|
+
if not isinstance(v, Function):
|
84
|
+
continue
|
85
|
+
|
86
|
+
if not isinstance(v.typing_key, ExternFunction):
|
87
|
+
continue
|
88
|
+
|
89
|
+
for obj in v.typing_key.link:
|
90
|
+
link_objects.add(obj)
|
91
|
+
|
92
|
+
return link_objects
|
93
|
+
|
94
|
+
|
45
95
|
class _Kernel(serialize.ReduceMixin):
|
46
96
|
'''
|
47
97
|
CUDA Kernel specialized for a given set of argument types. When called, this
|
@@ -159,15 +209,8 @@ class _Kernel(serialize.ReduceMixin):
|
|
159
209
|
|
160
210
|
self.maybe_link_nrt(link, tgt_ctx, asm)
|
161
211
|
|
162
|
-
for
|
163
|
-
|
164
|
-
continue
|
165
|
-
|
166
|
-
if not isinstance(v.typing_key, ExternFunction):
|
167
|
-
continue
|
168
|
-
|
169
|
-
for obj in v.typing_key.link:
|
170
|
-
lib.add_linking_file(obj)
|
212
|
+
for obj in get_cres_link_objects(cres):
|
213
|
+
lib.add_linking_file(obj)
|
171
214
|
|
172
215
|
for filepath in link:
|
173
216
|
lib.add_linking_file(filepath)
|
@@ -267,7 +310,11 @@ class _Kernel(serialize.ReduceMixin):
|
|
267
310
|
"""
|
268
311
|
cufunc = self._codelibrary.get_cufunc()
|
269
312
|
|
270
|
-
if
|
313
|
+
if (
|
314
|
+
hasattr(self, "target_context")
|
315
|
+
and self.target_context.enable_nrt
|
316
|
+
and config.CUDA_NRT_STATS
|
317
|
+
):
|
271
318
|
rtsys.ensure_initialized()
|
272
319
|
rtsys.set_memsys_to_module(cufunc.module)
|
273
320
|
# We don't know which stream the kernel will be launched on, so
|
@@ -3,8 +3,7 @@ from functools import cached_property
|
|
3
3
|
import llvmlite.binding as ll
|
4
4
|
from llvmlite import ir
|
5
5
|
|
6
|
-
from numba.core import
|
7
|
-
typing, utils)
|
6
|
+
from numba.core import cgutils, config, itanium_mangler, types, typing
|
8
7
|
from numba.core.dispatcher import Dispatcher
|
9
8
|
from numba.core.base import BaseContext
|
10
9
|
from numba.core.callconv import BaseCallConv, MinimalCallConv
|
@@ -12,7 +11,8 @@ from numba.core.typing import cmathdecl
|
|
12
11
|
from numba.core import datamodel
|
13
12
|
|
14
13
|
from .cudadrv import nvvm
|
15
|
-
from numba.cuda import codegen,
|
14
|
+
from numba.cuda import codegen, ufuncs
|
15
|
+
from numba.cuda.debuginfo import CUDADIBuilder
|
16
16
|
from numba.cuda.models import cuda_data_manager
|
17
17
|
|
18
18
|
# -----------------------------------------------------------------------------
|
@@ -80,7 +80,7 @@ class CUDATargetContext(BaseContext):
|
|
80
80
|
|
81
81
|
@property
|
82
82
|
def DIBuilder(self):
|
83
|
-
return
|
83
|
+
return CUDADIBuilder
|
84
84
|
|
85
85
|
@property
|
86
86
|
def enable_boundscheck(self):
|
@@ -150,136 +150,6 @@ class CUDATargetContext(BaseContext):
|
|
150
150
|
return itanium_mangler.mangle(name, argtypes, abi_tags=abi_tags,
|
151
151
|
uid=uid)
|
152
152
|
|
153
|
-
def prepare_cuda_kernel(self, codelib, fndesc, debug, lineinfo,
|
154
|
-
nvvm_options, filename, linenum,
|
155
|
-
max_registers=None, lto=False):
|
156
|
-
"""
|
157
|
-
Adapt a code library ``codelib`` with the numba compiled CUDA kernel
|
158
|
-
with name ``fname`` and arguments ``argtypes`` for NVVM.
|
159
|
-
A new library is created with a wrapper function that can be used as
|
160
|
-
the kernel entry point for the given kernel.
|
161
|
-
|
162
|
-
Returns the new code library and the wrapper function.
|
163
|
-
|
164
|
-
Parameters:
|
165
|
-
|
166
|
-
codelib: The CodeLibrary containing the device function to wrap
|
167
|
-
in a kernel call.
|
168
|
-
fndesc: The FunctionDescriptor of the source function.
|
169
|
-
debug: Whether to compile with debug.
|
170
|
-
lineinfo: Whether to emit line info.
|
171
|
-
nvvm_options: Dict of NVVM options used when compiling the new library.
|
172
|
-
filename: The source filename that the function is contained in.
|
173
|
-
linenum: The source line that the function is on.
|
174
|
-
max_registers: The max_registers argument for the code library.
|
175
|
-
"""
|
176
|
-
kernel_name = itanium_mangler.prepend_namespace(
|
177
|
-
fndesc.llvm_func_name, ns='cudapy',
|
178
|
-
)
|
179
|
-
library = self.codegen().create_library(f'{codelib.name}_kernel_',
|
180
|
-
entry_name=kernel_name,
|
181
|
-
nvvm_options=nvvm_options,
|
182
|
-
max_registers=max_registers,
|
183
|
-
lto=lto
|
184
|
-
)
|
185
|
-
library.add_linking_library(codelib)
|
186
|
-
wrapper = self.generate_kernel_wrapper(library, fndesc, kernel_name,
|
187
|
-
debug, lineinfo, filename,
|
188
|
-
linenum)
|
189
|
-
return library, wrapper
|
190
|
-
|
191
|
-
def generate_kernel_wrapper(self, library, fndesc, kernel_name, debug,
|
192
|
-
lineinfo, filename, linenum):
|
193
|
-
"""
|
194
|
-
Generate the kernel wrapper in the given ``library``.
|
195
|
-
The function being wrapped is described by ``fndesc``.
|
196
|
-
The wrapper function is returned.
|
197
|
-
"""
|
198
|
-
|
199
|
-
argtypes = fndesc.argtypes
|
200
|
-
arginfo = self.get_arg_packer(argtypes)
|
201
|
-
argtys = list(arginfo.argument_types)
|
202
|
-
wrapfnty = ir.FunctionType(ir.VoidType(), argtys)
|
203
|
-
wrapper_module = self.create_module("cuda.kernel.wrapper")
|
204
|
-
fnty = ir.FunctionType(ir.IntType(32),
|
205
|
-
[self.call_conv.get_return_type(types.pyobject)]
|
206
|
-
+ argtys)
|
207
|
-
func = ir.Function(wrapper_module, fnty, fndesc.llvm_func_name)
|
208
|
-
|
209
|
-
prefixed = itanium_mangler.prepend_namespace(func.name, ns='cudapy')
|
210
|
-
wrapfn = ir.Function(wrapper_module, wrapfnty, prefixed)
|
211
|
-
builder = ir.IRBuilder(wrapfn.append_basic_block(''))
|
212
|
-
|
213
|
-
if debug or lineinfo:
|
214
|
-
directives_only = lineinfo and not debug
|
215
|
-
debuginfo = self.DIBuilder(module=wrapper_module,
|
216
|
-
filepath=filename,
|
217
|
-
cgctx=self,
|
218
|
-
directives_only=directives_only)
|
219
|
-
debuginfo.mark_subprogram(
|
220
|
-
wrapfn, kernel_name, fndesc.args, argtypes, linenum,
|
221
|
-
)
|
222
|
-
debuginfo.mark_location(builder, linenum)
|
223
|
-
|
224
|
-
# Define error handling variable
|
225
|
-
def define_error_gv(postfix):
|
226
|
-
name = wrapfn.name + postfix
|
227
|
-
gv = cgutils.add_global_variable(wrapper_module, ir.IntType(32),
|
228
|
-
name)
|
229
|
-
gv.initializer = ir.Constant(gv.type.pointee, None)
|
230
|
-
return gv
|
231
|
-
|
232
|
-
gv_exc = define_error_gv("__errcode__")
|
233
|
-
gv_tid = []
|
234
|
-
gv_ctaid = []
|
235
|
-
for i in 'xyz':
|
236
|
-
gv_tid.append(define_error_gv("__tid%s__" % i))
|
237
|
-
gv_ctaid.append(define_error_gv("__ctaid%s__" % i))
|
238
|
-
|
239
|
-
callargs = arginfo.from_arguments(builder, wrapfn.args)
|
240
|
-
status, _ = self.call_conv.call_function(
|
241
|
-
builder, func, types.void, argtypes, callargs)
|
242
|
-
|
243
|
-
if debug:
|
244
|
-
# Check error status
|
245
|
-
with cgutils.if_likely(builder, status.is_ok):
|
246
|
-
builder.ret_void()
|
247
|
-
|
248
|
-
with builder.if_then(builder.not_(status.is_python_exc)):
|
249
|
-
# User exception raised
|
250
|
-
old = ir.Constant(gv_exc.type.pointee, None)
|
251
|
-
|
252
|
-
# Use atomic cmpxchg to prevent rewriting the error status
|
253
|
-
# Only the first error is recorded
|
254
|
-
|
255
|
-
xchg = builder.cmpxchg(gv_exc, old, status.code,
|
256
|
-
'monotonic', 'monotonic')
|
257
|
-
changed = builder.extract_value(xchg, 1)
|
258
|
-
|
259
|
-
# If the xchange is successful, save the thread ID.
|
260
|
-
sreg = nvvmutils.SRegBuilder(builder)
|
261
|
-
with builder.if_then(changed):
|
262
|
-
for dim, ptr, in zip("xyz", gv_tid):
|
263
|
-
val = sreg.tid(dim)
|
264
|
-
builder.store(val, ptr)
|
265
|
-
|
266
|
-
for dim, ptr, in zip("xyz", gv_ctaid):
|
267
|
-
val = sreg.ctaid(dim)
|
268
|
-
builder.store(val, ptr)
|
269
|
-
|
270
|
-
builder.ret_void()
|
271
|
-
|
272
|
-
nvvm.set_cuda_kernel(wrapfn)
|
273
|
-
library.add_ir_module(wrapper_module)
|
274
|
-
if debug or lineinfo:
|
275
|
-
debuginfo.finalize()
|
276
|
-
library.finalize()
|
277
|
-
|
278
|
-
if config.DUMP_LLVM:
|
279
|
-
utils.dump_llvm(fndesc, wrapper_module)
|
280
|
-
|
281
|
-
return library.get_function(wrapfn.name)
|
282
|
-
|
283
153
|
def make_constant_array(self, builder, aryty, arr):
|
284
154
|
"""
|
285
155
|
Unlike the parent version. This returns a a pointer in the constant
|
@@ -72,6 +72,57 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
72
72
|
def f(x):
|
73
73
|
x[0] = 0
|
74
74
|
|
75
|
+
def test_issue_9888(self):
|
76
|
+
# Compiler created symbol should not be emitted in DILocalVariable
|
77
|
+
# See Numba Issue #9888 https://github.com/numba/numba/pull/9888
|
78
|
+
sig = (types.boolean,)
|
79
|
+
|
80
|
+
@cuda.jit(sig, debug=True, opt=False)
|
81
|
+
def f(cond):
|
82
|
+
if cond:
|
83
|
+
x = 1 # noqa: F841
|
84
|
+
else:
|
85
|
+
x = 0 # noqa: F841
|
86
|
+
|
87
|
+
llvm_ir = f.inspect_llvm(sig)
|
88
|
+
# A varible name starting with "bool" in the debug metadata
|
89
|
+
pat = r'!DILocalVariable\(.*name:\s+\"bool'
|
90
|
+
match = re.compile(pat).search(llvm_ir)
|
91
|
+
self.assertIsNone(match, msg=llvm_ir)
|
92
|
+
|
93
|
+
def test_bool_type(self):
|
94
|
+
sig = (types.int32, types.int32)
|
95
|
+
|
96
|
+
@cuda.jit("void(int32, int32)", debug=True, opt=False)
|
97
|
+
def f(x, y):
|
98
|
+
z = x == y # noqa: F841
|
99
|
+
|
100
|
+
llvm_ir = f.inspect_llvm(sig)
|
101
|
+
|
102
|
+
# extract the metadata node id from `type` field of DILocalVariable
|
103
|
+
pat = r'!DILocalVariable\(.*name:\s+"z".*type:\s+!(\d+)'
|
104
|
+
match = re.compile(pat).search(llvm_ir)
|
105
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
106
|
+
mdnode_id = match.group(1)
|
107
|
+
|
108
|
+
# verify the DIBasicType has correct encoding attribute DW_ATE_boolean
|
109
|
+
pat = rf'!{mdnode_id}\s+=\s+!DIBasicType\(.*DW_ATE_boolean'
|
110
|
+
match = re.compile(pat).search(llvm_ir)
|
111
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
112
|
+
|
113
|
+
def test_grid_group_type(self):
|
114
|
+
sig = (types.int32,)
|
115
|
+
|
116
|
+
@cuda.jit(sig, debug=True, opt=False)
|
117
|
+
def f(x):
|
118
|
+
grid = cuda.cg.this_grid() # noqa: F841
|
119
|
+
|
120
|
+
llvm_ir = f.inspect_llvm(sig)
|
121
|
+
|
122
|
+
pat = r'!DIBasicType\(.*DW_ATE_unsigned, name: "GridGroup", size: 64'
|
123
|
+
match = re.compile(pat).search(llvm_ir)
|
124
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
125
|
+
|
75
126
|
@unittest.skip("Wrappers no longer exist")
|
76
127
|
def test_wrapper_has_debuginfo(self):
|
77
128
|
sig = (types.int32[::1],)
|
@@ -217,6 +268,36 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
217
268
|
three_device_fns(kernel_debug=False, leaf_debug=True)
|
218
269
|
three_device_fns(kernel_debug=False, leaf_debug=False)
|
219
270
|
|
271
|
+
def test_kernel_args_types(self):
|
272
|
+
sig = (types.int32, types.int32)
|
273
|
+
|
274
|
+
@cuda.jit("void(int32, int32)", debug=True, opt=False)
|
275
|
+
def f(x, y):
|
276
|
+
z = x + y # noqa: F841
|
277
|
+
|
278
|
+
llvm_ir = f.inspect_llvm(sig)
|
279
|
+
|
280
|
+
# extract the metadata node id from `types` field of DISubroutineType
|
281
|
+
pat = r'!DISubroutineType\(types:\s+!(\d+)\)'
|
282
|
+
match = re.compile(pat).search(llvm_ir)
|
283
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
284
|
+
mdnode_id = match.group(1)
|
285
|
+
|
286
|
+
# extract the metadata node ids from the flexible node of types
|
287
|
+
pat = rf'!{mdnode_id}\s+=\s+!{{\s+!(\d+),\s+!(\d+)\s+}}'
|
288
|
+
match = re.compile(pat).search(llvm_ir)
|
289
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
290
|
+
mdnode_id1 = match.group(1)
|
291
|
+
mdnode_id2 = match.group(2)
|
292
|
+
|
293
|
+
# verify each of the two metadata nodes match expected type
|
294
|
+
pat = rf'!{mdnode_id1}\s+=\s+!DIBasicType\(.*DW_ATE_signed,\s+name:\s+"int32"' # noqa: E501
|
295
|
+
match = re.compile(pat).search(llvm_ir)
|
296
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
297
|
+
pat = rf'!{mdnode_id2}\s+=\s+!DIBasicType\(.*DW_ATE_signed,\s+name:\s+"int32"' # noqa: E501
|
298
|
+
match = re.compile(pat).search(llvm_ir)
|
299
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
300
|
+
|
220
301
|
|
221
302
|
if __name__ == '__main__':
|
222
303
|
unittest.main()
|
{numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
RENAMED
@@ -205,6 +205,14 @@ int times2(int *out, int a)
|
|
205
205
|
}
|
206
206
|
""")
|
207
207
|
|
208
|
+
times3_cu = cuda.CUSource("""
|
209
|
+
extern "C" __device__
|
210
|
+
int times3(int *out, int a)
|
211
|
+
{
|
212
|
+
*out = a * 3;
|
213
|
+
return 0;
|
214
|
+
}
|
215
|
+
""")
|
208
216
|
|
209
217
|
times4_cu = cuda.CUSource("""
|
210
218
|
extern "C" __device__
|
@@ -351,6 +359,123 @@ class TestDeclareDevice(CUDATestCase):
|
|
351
359
|
kernel[1, 1](x, 1)
|
352
360
|
np.testing.assert_equal(x[0], 323845807)
|
353
361
|
|
362
|
+
def test_declared_in_called_function(self):
|
363
|
+
times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
|
364
|
+
|
365
|
+
@cuda.jit
|
366
|
+
def device_func(x):
|
367
|
+
return times2(x)
|
368
|
+
|
369
|
+
@cuda.jit
|
370
|
+
def kernel(r, x):
|
371
|
+
i = cuda.grid(1)
|
372
|
+
if i < len(r):
|
373
|
+
r[i] = device_func(x[i])
|
374
|
+
|
375
|
+
x = np.arange(10, dtype=np.int32)
|
376
|
+
r = np.empty_like(x)
|
377
|
+
|
378
|
+
kernel[1, 32](r, x)
|
379
|
+
|
380
|
+
np.testing.assert_equal(r, x * 2)
|
381
|
+
|
382
|
+
def test_declared_in_called_function_twice(self):
|
383
|
+
times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
|
384
|
+
|
385
|
+
@cuda.jit
|
386
|
+
def device_func_1(x):
|
387
|
+
return times2(x)
|
388
|
+
|
389
|
+
@cuda.jit
|
390
|
+
def device_func_2(x):
|
391
|
+
return device_func_1(x)
|
392
|
+
|
393
|
+
@cuda.jit
|
394
|
+
def kernel(r, x):
|
395
|
+
i = cuda.grid(1)
|
396
|
+
if i < len(r):
|
397
|
+
r[i] = device_func_2(x[i])
|
398
|
+
|
399
|
+
x = np.arange(10, dtype=np.int32)
|
400
|
+
r = np.empty_like(x)
|
401
|
+
|
402
|
+
kernel[1, 32](r, x)
|
403
|
+
|
404
|
+
np.testing.assert_equal(r, x * 2)
|
405
|
+
|
406
|
+
def test_declared_in_called_function_two_calls(self):
|
407
|
+
times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
|
408
|
+
|
409
|
+
@cuda.jit
|
410
|
+
def device_func(x):
|
411
|
+
return times2(x)
|
412
|
+
|
413
|
+
@cuda.jit
|
414
|
+
def kernel(r, x):
|
415
|
+
i = cuda.grid(1)
|
416
|
+
if i < len(r):
|
417
|
+
r[i] = device_func(x[i]) + device_func(x[i] + i)
|
418
|
+
|
419
|
+
x = np.arange(10, dtype=np.int32)
|
420
|
+
r = np.empty_like(x)
|
421
|
+
|
422
|
+
kernel[1, 32](r, x)
|
423
|
+
|
424
|
+
np.testing.assert_equal(r, x * 6)
|
425
|
+
|
426
|
+
def test_call_declared_function_twice(self):
|
427
|
+
times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
|
428
|
+
|
429
|
+
@cuda.jit
|
430
|
+
def kernel(r, x):
|
431
|
+
i = cuda.grid(1)
|
432
|
+
if i < len(r):
|
433
|
+
r[i] = times2(x[i]) + times2(x[i] + i)
|
434
|
+
|
435
|
+
x = np.arange(10, dtype=np.int32)
|
436
|
+
r = np.empty_like(x)
|
437
|
+
|
438
|
+
kernel[1, 32](r, x)
|
439
|
+
|
440
|
+
np.testing.assert_equal(r, x * 6)
|
441
|
+
|
442
|
+
def test_declared_in_called_function_and_parent(self):
|
443
|
+
times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
|
444
|
+
|
445
|
+
@cuda.jit
|
446
|
+
def device_func(x):
|
447
|
+
return times2(x)
|
448
|
+
|
449
|
+
@cuda.jit
|
450
|
+
def kernel(r, x):
|
451
|
+
i = cuda.grid(1)
|
452
|
+
if i < len(r):
|
453
|
+
r[i] = device_func(x[i]) + times2(x[i])
|
454
|
+
|
455
|
+
x = np.arange(10, dtype=np.int32)
|
456
|
+
r = np.empty_like(x)
|
457
|
+
|
458
|
+
kernel[1, 32](r, x)
|
459
|
+
|
460
|
+
np.testing.assert_equal(r, x * 4)
|
461
|
+
|
462
|
+
def test_call_two_different_declared_functions(self):
|
463
|
+
times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
|
464
|
+
times3 = cuda.declare_device('times3', 'int32(int32)', link=times3_cu)
|
465
|
+
|
466
|
+
@cuda.jit
|
467
|
+
def kernel(r, x):
|
468
|
+
i = cuda.grid(1)
|
469
|
+
if i < len(r):
|
470
|
+
r[i] = times2(x[i]) + times3(x[i])
|
471
|
+
|
472
|
+
x = np.arange(10, dtype=np.int32)
|
473
|
+
r = np.empty_like(x)
|
474
|
+
|
475
|
+
kernel[1, 32](r, x)
|
476
|
+
|
477
|
+
np.testing.assert_equal(r, x * 5)
|
478
|
+
|
354
479
|
|
355
480
|
if __name__ == '__main__':
|
356
481
|
unittest.main()
|
@@ -171,7 +171,10 @@ class TestNrtStatistics(CUDATestCase):
|
|
171
171
|
arr = cuda_arange(5 * tmp[0]) # noqa: F841
|
172
172
|
return None
|
173
173
|
|
174
|
-
with
|
174
|
+
with (
|
175
|
+
override_config('CUDA_ENABLE_NRT', True),
|
176
|
+
override_config('CUDA_NRT_STATS', True)
|
177
|
+
):
|
175
178
|
# Switch on stats
|
176
179
|
rtsys.memsys_enable_stats()
|
177
180
|
# check the stats are on
|
@@ -18,7 +18,10 @@ class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
|
|
18
18
|
super(TestNrtRefCt, self).tearDown()
|
19
19
|
|
20
20
|
def run(self, result=None):
|
21
|
-
with
|
21
|
+
with (
|
22
|
+
override_config("CUDA_ENABLE_NRT", True),
|
23
|
+
override_config('CUDA_NRT_STATS', True)
|
24
|
+
):
|
22
25
|
super(TestNrtRefCt, self).run(result)
|
23
26
|
|
24
27
|
def test_no_return(self):
|