numba-cuda 0.6.0__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/PKG-INFO +3 -2
- numba_cuda-0.8.0/numba_cuda/VERSION +1 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/compiler.py +2 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cudadrv/nvvm.py +72 -53
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/dispatcher.py +4 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/runtime/nrt.py +24 -5
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +2 -1
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +11 -2
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_overload.py +23 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/nrt/test_nrt.py +39 -8
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +4 -5
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda.egg-info/PKG-INFO +3 -2
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda.egg-info/SOURCES.txt +0 -1
- numba_cuda-0.6.0/numba_cuda/VERSION +0 -1
- numba_cuda-0.6.0/numba_cuda/numba/cuda/tests/nrt/mock_numpy.py +0 -146
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/LICENSE +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/README.md +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/__init__.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/_version.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/__init__.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/api.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/api_util.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/args.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cg.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/codegen.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cuda_fp16.h +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cuda_fp16.hpp +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cuda_paths.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cudadecl.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cudadrv/__init__.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cudadrv/devicearray.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cudadrv/devices.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cudadrv/driver.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cudadrv/drvapi.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cudadrv/dummyarray.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cudadrv/enums.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cudadrv/error.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cudadrv/libs.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cudadrv/linkable_code.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cudadrv/mappings.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cudadrv/ndarray.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cudadrv/nvrtc.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cudadrv/rtapi.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cudadrv/runtime.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cudaimpl.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/cudamath.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/debuginfo.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/decorators.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/descriptor.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/device_init.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/deviceufunc.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/errors.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/extending.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/initialize.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/intrinsics.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/kernels/__init__.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/kernels/reduction.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/kernels/transpose.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/libdevice.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/libdevicedecl.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/libdevicefuncs.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/libdeviceimpl.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/mathimpl.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/models.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/nvvmutils.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/printimpl.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/random.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/reshape_funcs.cu +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/runtime/__init__.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/runtime/memsys.cu +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/runtime/memsys.cuh +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/runtime/nrt.cu +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/simulator/__init__.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/simulator/api.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/simulator/compiler.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/simulator/cudadrv/devices.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/simulator/cudadrv/driver.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/simulator/cudadrv/error.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/simulator/cudadrv/libs.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/simulator/kernel.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/simulator/kernelapi.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/simulator/reduction.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/simulator/vector_types.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/simulator_init.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/stubs.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/target.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/testing.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/__init__.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_events.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_init.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_array.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_caching.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_casting.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_complex.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_debug.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_enums.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_errors.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_extending.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_forall.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_globals.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_lang.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_math.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_operator.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_powi.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_print.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_random.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_sm.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_sync.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_warning.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudasim/__init__.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudasim/support.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/data/cuda_include.cu +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/data/error.cu +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/data/jitlink.cu +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/data/warn.cu +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/doc_examples/__init__.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/doc_examples/test_random.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/nocuda/test_import.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/support.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/types.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/ufuncs.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/utils.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/vector_types.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda/numba/cuda/vectorizers.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda.egg-info/dependency_links.txt +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda.egg-info/requires.txt +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/numba_cuda.egg-info/top_level.txt +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/pyproject.toml +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/setup.cfg +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/setup.py +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/site-packages/_numba_cuda_redirector.pth +0 -0
- {numba_cuda-0.6.0 → numba_cuda-0.8.0}/site-packages/_numba_cuda_redirector.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: numba-cuda
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.8.0
|
4
4
|
Summary: CUDA target for Numba
|
5
5
|
Author: Anaconda Inc., NVIDIA Corporation
|
6
6
|
License: BSD 2-clause
|
@@ -12,6 +12,7 @@ Requires-Python: >=3.9
|
|
12
12
|
Description-Content-Type: text/markdown
|
13
13
|
License-File: LICENSE
|
14
14
|
Requires-Dist: numba>=0.59.1
|
15
|
+
Dynamic: license-file
|
15
16
|
|
16
17
|
<div align="center"><img src="docs/source/_static/numba-green-icon-rgb.svg" width="200"/></div>
|
17
18
|
|
@@ -0,0 +1 @@
|
|
1
|
+
0.8.0
|
@@ -199,12 +199,52 @@ class NVVM(object):
|
|
199
199
|
|
200
200
|
|
201
201
|
class CompilationUnit(object):
|
202
|
-
|
202
|
+
"""
|
203
|
+
A CompilationUnit is a set of LLVM modules that are compiled to PTX or
|
204
|
+
LTO-IR with NVVM.
|
205
|
+
|
206
|
+
Compilation options are accepted as a dict mapping option names to values,
|
207
|
+
with the following considerations:
|
208
|
+
|
209
|
+
- Underscores (`_`) in option names are converted to dashes (`-`), to match
|
210
|
+
NVVM's option name format.
|
211
|
+
- Options that take a value will be emitted in the form "-<name>=<value>".
|
212
|
+
- Booleans passed as option values will be converted to integers.
|
213
|
+
- Options which take no value (such as `-gen-lto`) should have a value of
|
214
|
+
`None` and will be emitted in the form "-<name>".
|
215
|
+
|
216
|
+
For documentation on NVVM compilation options, see the CUDA Toolkit
|
217
|
+
Documentation:
|
218
|
+
|
219
|
+
https://docs.nvidia.com/cuda/libnvvm-api/index.html#_CPPv418nvvmCompileProgram11nvvmProgramiPPKc
|
220
|
+
"""
|
221
|
+
|
222
|
+
def __init__(self, options):
|
203
223
|
self.driver = NVVM()
|
204
224
|
self._handle = nvvm_program()
|
205
225
|
err = self.driver.nvvmCreateProgram(byref(self._handle))
|
206
226
|
self.driver.check_error(err, 'Failed to create CU')
|
207
227
|
|
228
|
+
def stringify_option(k, v):
|
229
|
+
k = k.replace('_', '-')
|
230
|
+
|
231
|
+
if v is None:
|
232
|
+
return f'-{k}'.encode('utf-8')
|
233
|
+
|
234
|
+
if isinstance(v, bool):
|
235
|
+
v = int(v)
|
236
|
+
|
237
|
+
return f'-{k}={v}'.encode('utf-8')
|
238
|
+
|
239
|
+
options = [stringify_option(k, v) for k, v in options.items()]
|
240
|
+
option_ptrs = (c_char_p * len(options))(*[c_char_p(x) for x in options])
|
241
|
+
|
242
|
+
# We keep both the options and the pointers to them so that options are
|
243
|
+
# not destroyed before we've used their values
|
244
|
+
self.options = options
|
245
|
+
self.option_ptrs = option_ptrs
|
246
|
+
self.n_options = len(options)
|
247
|
+
|
208
248
|
def __del__(self):
|
209
249
|
driver = NVVM()
|
210
250
|
err = driver.nvvmDestroyProgram(byref(self._handle))
|
@@ -230,60 +270,35 @@ class CompilationUnit(object):
|
|
230
270
|
len(buffer), None)
|
231
271
|
self.driver.check_error(err, 'Failed to add module')
|
232
272
|
|
233
|
-
def
|
234
|
-
"""Perform Compilation.
|
235
|
-
|
236
|
-
Compilation options are accepted as keyword arguments, with the
|
237
|
-
following considerations:
|
238
|
-
|
239
|
-
- Underscores (`_`) in option names are converted to dashes (`-`), to
|
240
|
-
match NVVM's option name format.
|
241
|
-
- Options that take a value will be emitted in the form
|
242
|
-
"-<name>=<value>".
|
243
|
-
- Booleans passed as option values will be converted to integers.
|
244
|
-
- Options which take no value (such as `-gen-lto`) should have a value
|
245
|
-
of `None` passed in and will be emitted in the form "-<name>".
|
246
|
-
|
247
|
-
For documentation on NVVM compilation options, see the CUDA Toolkit
|
248
|
-
Documentation:
|
249
|
-
|
250
|
-
https://docs.nvidia.com/cuda/libnvvm-api/index.html#_CPPv418nvvmCompileProgram11nvvmProgramiPPKc
|
273
|
+
def verify(self):
|
251
274
|
"""
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
if v is None:
|
257
|
-
return f'-{k}'
|
258
|
-
|
259
|
-
if isinstance(v, bool):
|
260
|
-
v = int(v)
|
261
|
-
|
262
|
-
return f'-{k}={v}'
|
263
|
-
|
264
|
-
options = [stringify_option(k, v) for k, v in options.items()]
|
265
|
-
|
266
|
-
c_opts = (c_char_p * len(options))(*[c_char_p(x.encode('utf8'))
|
267
|
-
for x in options])
|
268
|
-
# verify
|
269
|
-
err = self.driver.nvvmVerifyProgram(self._handle, len(options), c_opts)
|
275
|
+
Run the NVVM verifier on all code added to the compilation unit.
|
276
|
+
"""
|
277
|
+
err = self.driver.nvvmVerifyProgram(self._handle, self.n_options,
|
278
|
+
self.option_ptrs)
|
270
279
|
self._try_error(err, 'Failed to verify\n')
|
271
280
|
|
272
|
-
|
273
|
-
|
281
|
+
def compile(self):
|
282
|
+
"""
|
283
|
+
Compile all modules added to the compilation unit and return the
|
284
|
+
resulting PTX or LTO-IR (depending on the options).
|
285
|
+
"""
|
286
|
+
err = self.driver.nvvmCompileProgram(self._handle, self.n_options,
|
287
|
+
self.option_ptrs)
|
274
288
|
self._try_error(err, 'Failed to compile\n')
|
275
289
|
|
276
|
-
#
|
277
|
-
|
278
|
-
err = self.driver.nvvmGetCompiledResultSize(self._handle,
|
290
|
+
# Get result
|
291
|
+
result_size = c_size_t()
|
292
|
+
err = self.driver.nvvmGetCompiledResultSize(self._handle,
|
293
|
+
byref(result_size))
|
279
294
|
|
280
295
|
self._try_error(err, 'Failed to get size of compiled result.')
|
281
296
|
|
282
|
-
output_buffer = (c_char *
|
297
|
+
output_buffer = (c_char * result_size.value)()
|
283
298
|
err = self.driver.nvvmGetCompiledResult(self._handle, output_buffer)
|
284
299
|
self._try_error(err, 'Failed to get compiled result.')
|
285
300
|
|
286
|
-
#
|
301
|
+
# Get log
|
287
302
|
self.log = self.get_log()
|
288
303
|
if self.log:
|
289
304
|
warnings.warn(self.log, category=NvvmWarning)
|
@@ -615,40 +630,44 @@ def llvm_replace(llvmir):
|
|
615
630
|
for decl, fn in replacements:
|
616
631
|
llvmir = llvmir.replace(decl, fn)
|
617
632
|
|
618
|
-
llvmir =
|
633
|
+
llvmir = llvm150_to_70_ir(llvmir)
|
619
634
|
|
620
635
|
return llvmir
|
621
636
|
|
622
637
|
|
623
|
-
def compile_ir(llvmir, **
|
638
|
+
def compile_ir(llvmir, **options):
|
624
639
|
if isinstance(llvmir, str):
|
625
640
|
llvmir = [llvmir]
|
626
641
|
|
627
|
-
if
|
628
|
-
|
642
|
+
if options.pop('fastmath', False):
|
643
|
+
options.update({
|
629
644
|
'ftz': True,
|
630
645
|
'fma': True,
|
631
646
|
'prec_div': False,
|
632
647
|
'prec_sqrt': False,
|
633
648
|
})
|
634
649
|
|
635
|
-
cu = CompilationUnit()
|
636
|
-
libdevice = LibDevice()
|
650
|
+
cu = CompilationUnit(options)
|
637
651
|
|
638
652
|
for mod in llvmir:
|
639
653
|
mod = llvm_replace(mod)
|
640
654
|
cu.add_module(mod.encode('utf8'))
|
655
|
+
cu.verify()
|
656
|
+
|
657
|
+
# We add libdevice following verification so that it is not subject to the
|
658
|
+
# verifier's requirements
|
659
|
+
libdevice = LibDevice()
|
641
660
|
cu.lazy_add_module(libdevice.get())
|
642
661
|
|
643
|
-
return cu.compile(
|
662
|
+
return cu.compile()
|
644
663
|
|
645
664
|
|
646
665
|
re_attributes_def = re.compile(r"^attributes #\d+ = \{ ([\w\s]+)\ }")
|
647
666
|
|
648
667
|
|
649
|
-
def
|
668
|
+
def llvm150_to_70_ir(ir):
|
650
669
|
"""
|
651
|
-
Convert LLVM
|
670
|
+
Convert LLVM 15.0 IR for LLVM 7.0.
|
652
671
|
"""
|
653
672
|
buf = []
|
654
673
|
for line in ir.splitlines():
|
@@ -968,6 +968,10 @@ class CUDADispatcher(Dispatcher, serialize.ReduceMixin):
|
|
968
968
|
|
969
969
|
A (template, pysig, args, kws) tuple is returned.
|
970
970
|
"""
|
971
|
+
# Fold keyword arguments and resolve default values
|
972
|
+
pysig, args = self._compiler.fold_argument_types(args, kws)
|
973
|
+
kws = {}
|
974
|
+
|
971
975
|
# Ensure an exactly-matching overload is available if we can
|
972
976
|
# compile. We proceed with the typing even if we can't compile
|
973
977
|
# because we may be able to force a cast on the caller side.
|
@@ -5,7 +5,8 @@ import numpy as np
|
|
5
5
|
|
6
6
|
from numba import cuda, config
|
7
7
|
from numba.core.runtime.nrt import _nrt_mstats
|
8
|
-
from numba.cuda.cudadrv.driver import Linker, driver, launch_kernel
|
8
|
+
from numba.cuda.cudadrv.driver import (Linker, driver, launch_kernel,
|
9
|
+
USE_NV_BINDING)
|
9
10
|
from numba.cuda.cudadrv import devices
|
10
11
|
from numba.cuda.api import get_current_device
|
11
12
|
from numba.cuda.utils import _readenv
|
@@ -128,6 +129,18 @@ class _Runtime:
|
|
128
129
|
cooperative=False
|
129
130
|
)
|
130
131
|
|
132
|
+
def _ctypes_pointer(self, array):
|
133
|
+
"""
|
134
|
+
Given an array, return a ctypes pointer to the data suitable for
|
135
|
+
passing to ``launch_kernel``.
|
136
|
+
"""
|
137
|
+
ptr = array.device_ctypes_pointer
|
138
|
+
|
139
|
+
if USE_NV_BINDING:
|
140
|
+
ptr = ctypes.c_void_p(int(ptr))
|
141
|
+
|
142
|
+
return ptr
|
143
|
+
|
131
144
|
def ensure_initialized(self, stream=None):
|
132
145
|
"""
|
133
146
|
If memsys is not initialized, initialize memsys
|
@@ -174,12 +187,13 @@ class _Runtime:
|
|
174
187
|
context
|
175
188
|
"""
|
176
189
|
enabled_ar = cuda.managed_array(1, np.uint8)
|
190
|
+
enabled_ptr = self._ctypes_pointer(enabled_ar)
|
177
191
|
|
178
192
|
self._single_thread_launch(
|
179
193
|
self._memsys_module,
|
180
194
|
stream,
|
181
195
|
"NRT_MemSys_stats_enabled",
|
182
|
-
(
|
196
|
+
(enabled_ptr,)
|
183
197
|
)
|
184
198
|
|
185
199
|
cuda.synchronize()
|
@@ -198,12 +212,13 @@ class _Runtime:
|
|
198
212
|
])
|
199
213
|
|
200
214
|
stats_for_read = cuda.managed_array(1, dt)
|
215
|
+
stats_ptr = self._ctypes_pointer(stats_for_read)
|
201
216
|
|
202
217
|
self._single_thread_launch(
|
203
218
|
self._memsys_module,
|
204
219
|
stream,
|
205
220
|
"NRT_MemSys_read",
|
206
|
-
[
|
221
|
+
[stats_ptr]
|
207
222
|
)
|
208
223
|
cuda.synchronize()
|
209
224
|
|
@@ -231,11 +246,13 @@ class _Runtime:
|
|
231
246
|
Get a single stat from the memsys
|
232
247
|
"""
|
233
248
|
got = cuda.managed_array(1, np.uint64)
|
249
|
+
got_ptr = self._ctypes_pointer(got)
|
250
|
+
|
234
251
|
self._single_thread_launch(
|
235
252
|
self._memsys_module,
|
236
253
|
stream,
|
237
254
|
f"NRT_MemSys_read_{stat}",
|
238
|
-
[
|
255
|
+
[got_ptr]
|
239
256
|
)
|
240
257
|
|
241
258
|
cuda.synchronize()
|
@@ -294,11 +311,13 @@ class _Runtime:
|
|
294
311
|
raise RuntimeError(
|
295
312
|
"Please allocate NRT Memsys first before setting to module.")
|
296
313
|
|
314
|
+
memsys_ptr = self._ctypes_pointer(self._memsys)
|
315
|
+
|
297
316
|
self._single_thread_launch(
|
298
317
|
module,
|
299
318
|
stream,
|
300
319
|
"NRT_MemSys_set",
|
301
|
-
[
|
320
|
+
[memsys_ptr]
|
302
321
|
)
|
303
322
|
|
304
323
|
@_alloc_init_guard
|
@@ -261,7 +261,8 @@ class TestLinker(CUDATestCase):
|
|
261
261
|
|
262
262
|
|
263
263
|
@unittest.skipIf(
|
264
|
-
not PYNVJITLINK_INSTALLED
|
264
|
+
not PYNVJITLINK_INSTALLED or not TEST_BIN_DIR,
|
265
|
+
reason="pynvjitlink not enabled"
|
265
266
|
)
|
266
267
|
class TestLinkerUsage(CUDATestCase):
|
267
268
|
"""Test that whether pynvjitlink can be enabled by both environment variable
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from numba.tests.support import override_config
|
1
|
+
from numba.tests.support import (override_config, captured_stdout)
|
2
2
|
from numba.cuda.testing import skip_on_cudasim
|
3
3
|
from numba import cuda
|
4
4
|
from numba.core import types
|
@@ -268,7 +268,7 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
268
268
|
three_device_fns(kernel_debug=False, leaf_debug=True)
|
269
269
|
three_device_fns(kernel_debug=False, leaf_debug=False)
|
270
270
|
|
271
|
-
def
|
271
|
+
def _test_kernel_args_types(self):
|
272
272
|
sig = (types.int32, types.int32)
|
273
273
|
|
274
274
|
@cuda.jit("void(int32, int32)", debug=True, opt=False)
|
@@ -298,6 +298,15 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
298
298
|
match = re.compile(pat).search(llvm_ir)
|
299
299
|
self.assertIsNotNone(match, msg=llvm_ir)
|
300
300
|
|
301
|
+
def test_kernel_args_types(self):
|
302
|
+
self._test_kernel_args_types()
|
303
|
+
|
304
|
+
def test_kernel_args_types_dump(self):
|
305
|
+
# see issue#135
|
306
|
+
with override_config('DUMP_LLVM', 1):
|
307
|
+
with captured_stdout():
|
308
|
+
self._test_kernel_args_types()
|
309
|
+
|
301
310
|
|
302
311
|
if __name__ == '__main__':
|
303
312
|
unittest.main()
|
@@ -56,6 +56,10 @@ def target_overloaded_calls_target_overloaded():
|
|
56
56
|
pass
|
57
57
|
|
58
58
|
|
59
|
+
def default_values_and_kwargs():
|
60
|
+
pass
|
61
|
+
|
62
|
+
|
59
63
|
# To recognise which functions are resolved for a call, we identify each with a
|
60
64
|
# prime number. Each function called multiplies a value by its prime (starting
|
61
65
|
# with the value 1), and we can check that the result is as expected based on
|
@@ -185,6 +189,13 @@ def ol_generic_calls_target_overloaded_cuda(x):
|
|
185
189
|
return impl
|
186
190
|
|
187
191
|
|
192
|
+
@overload(default_values_and_kwargs)
|
193
|
+
def ol_default_values_and_kwargs(out, x, y=5, z=6):
|
194
|
+
def impl(out, x, y=5, z=6):
|
195
|
+
out[0], out[1] = x + y, z
|
196
|
+
return impl
|
197
|
+
|
198
|
+
|
188
199
|
@skip_on_cudasim('Overloading not supported in cudasim')
|
189
200
|
class TestOverload(CUDATestCase):
|
190
201
|
def check_overload(self, kernel, expected):
|
@@ -330,6 +341,18 @@ class TestOverload(CUDATestCase):
|
|
330
341
|
def cuda_target_attr_use(res, dummy):
|
331
342
|
res[0] = dummy.cuda_only
|
332
343
|
|
344
|
+
def test_default_values_and_kwargs(self):
|
345
|
+
"""
|
346
|
+
Test default values and kwargs.
|
347
|
+
"""
|
348
|
+
@cuda.jit()
|
349
|
+
def kernel(a, b, out):
|
350
|
+
default_values_and_kwargs(out, a, z=b)
|
351
|
+
|
352
|
+
out = np.empty(2, dtype=np.int64)
|
353
|
+
kernel[1,1](1, 2, out)
|
354
|
+
self.assertEqual(tuple(out), (6, 2))
|
355
|
+
|
333
356
|
|
334
357
|
if __name__ == '__main__':
|
335
358
|
unittest.main()
|
@@ -5,7 +5,6 @@ import numpy as np
|
|
5
5
|
import unittest
|
6
6
|
from numba.cuda.testing import CUDATestCase
|
7
7
|
|
8
|
-
from numba.cuda.tests.nrt.mock_numpy import cuda_empty, cuda_ones, cuda_arange
|
9
8
|
from numba.tests.support import run_in_subprocess, override_config
|
10
9
|
|
11
10
|
from numba import cuda
|
@@ -24,7 +23,7 @@ class TestNrtBasic(CUDATestCase):
|
|
24
23
|
|
25
24
|
@cuda.jit
|
26
25
|
def g():
|
27
|
-
x =
|
26
|
+
x = np.empty(10, np.int64)
|
28
27
|
f(x)
|
29
28
|
|
30
29
|
g[1,1]()
|
@@ -37,7 +36,7 @@ class TestNrtBasic(CUDATestCase):
|
|
37
36
|
|
38
37
|
@cuda.jit
|
39
38
|
def g():
|
40
|
-
x =
|
39
|
+
x = np.empty(10, np.int64)
|
41
40
|
f(x)
|
42
41
|
|
43
42
|
g[1,1]()
|
@@ -66,7 +65,7 @@ class TestNrtBasic(CUDATestCase):
|
|
66
65
|
|
67
66
|
@cuda.jit
|
68
67
|
def g(out_ary):
|
69
|
-
x =
|
68
|
+
x = np.empty(10, np.int64)
|
70
69
|
x[5] = 1
|
71
70
|
y = f(x)
|
72
71
|
out_ary[0] = y[0]
|
@@ -97,11 +96,11 @@ class TestNrtStatistics(CUDATestCase):
|
|
97
96
|
src = """if 1:
|
98
97
|
from numba import cuda
|
99
98
|
from numba.cuda.runtime import rtsys
|
100
|
-
|
99
|
+
import numpy as np
|
101
100
|
|
102
101
|
@cuda.jit
|
103
102
|
def foo():
|
104
|
-
x =
|
103
|
+
x = np.arange(10)[0]
|
105
104
|
|
106
105
|
# initialize the NRT before use
|
107
106
|
rtsys.initialize()
|
@@ -167,8 +166,8 @@ class TestNrtStatistics(CUDATestCase):
|
|
167
166
|
|
168
167
|
@cuda.jit
|
169
168
|
def foo():
|
170
|
-
tmp =
|
171
|
-
arr =
|
169
|
+
tmp = np.ones(3)
|
170
|
+
arr = np.arange(5 * tmp[0]) # noqa: F841
|
172
171
|
return None
|
173
172
|
|
174
173
|
with (
|
@@ -230,6 +229,38 @@ class TestNrtStatistics(CUDATestCase):
|
|
230
229
|
stats_func()
|
231
230
|
self.assertIn("NRT stats are disabled.", str(raises.exception))
|
232
231
|
|
232
|
+
def test_read_one_stat(self):
|
233
|
+
@cuda.jit
|
234
|
+
def foo():
|
235
|
+
tmp = np.ones(3)
|
236
|
+
arr = np.arange(5 * tmp[0]) # noqa: F841
|
237
|
+
return None
|
238
|
+
|
239
|
+
with (
|
240
|
+
override_config('CUDA_ENABLE_NRT', True),
|
241
|
+
override_config('CUDA_NRT_STATS', True)
|
242
|
+
):
|
243
|
+
|
244
|
+
# Switch on stats
|
245
|
+
rtsys.memsys_enable_stats()
|
246
|
+
|
247
|
+
# Launch the kernel a couple of times to increase stats
|
248
|
+
foo[1, 1]()
|
249
|
+
foo[1, 1]()
|
250
|
+
|
251
|
+
# Get stats struct and individual stats
|
252
|
+
stats = rtsys.get_allocation_stats()
|
253
|
+
stats_alloc = rtsys.memsys_get_stats_alloc()
|
254
|
+
stats_mi_alloc = rtsys.memsys_get_stats_mi_alloc()
|
255
|
+
stats_free = rtsys.memsys_get_stats_free()
|
256
|
+
stats_mi_free = rtsys.memsys_get_stats_mi_free()
|
257
|
+
|
258
|
+
# Check individual stats match stats struct
|
259
|
+
self.assertEqual(stats.alloc, stats_alloc)
|
260
|
+
self.assertEqual(stats.mi_alloc, stats_mi_alloc)
|
261
|
+
self.assertEqual(stats.free, stats_free)
|
262
|
+
self.assertEqual(stats.mi_free, stats_mi_free)
|
263
|
+
|
233
264
|
|
234
265
|
if __name__ == '__main__':
|
235
266
|
unittest.main()
|
@@ -4,7 +4,6 @@ from numba.tests.support import override_config
|
|
4
4
|
from numba.cuda.runtime import rtsys
|
5
5
|
from numba.cuda.tests.support import EnableNRTStatsMixin
|
6
6
|
from numba.cuda.testing import CUDATestCase
|
7
|
-
from numba.cuda.tests.nrt.mock_numpy import cuda_empty, cuda_empty_like
|
8
7
|
|
9
8
|
from numba import cuda
|
10
9
|
|
@@ -34,7 +33,7 @@ class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
|
|
34
33
|
@cuda.jit
|
35
34
|
def kernel():
|
36
35
|
for i in range(n):
|
37
|
-
temp =
|
36
|
+
temp = np.empty(2) # noqa: F841
|
38
37
|
return None
|
39
38
|
|
40
39
|
init_stats = rtsys.get_allocation_stats()
|
@@ -51,7 +50,7 @@ class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
|
|
51
50
|
@cuda.jit
|
52
51
|
def g(n):
|
53
52
|
|
54
|
-
x =
|
53
|
+
x = np.empty((n, 2))
|
55
54
|
|
56
55
|
for i in range(n):
|
57
56
|
y = x[i]
|
@@ -73,13 +72,13 @@ class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
|
|
73
72
|
"""
|
74
73
|
@cuda.jit
|
75
74
|
def if_with_allocation_and_initialization(arr1, test1):
|
76
|
-
tmp_arr =
|
75
|
+
tmp_arr = np.empty_like(arr1)
|
77
76
|
|
78
77
|
for i in range(tmp_arr.shape[0]):
|
79
78
|
pass
|
80
79
|
|
81
80
|
if test1:
|
82
|
-
|
81
|
+
np.empty_like(arr1)
|
83
82
|
|
84
83
|
arr = np.random.random((5, 5)) # the values are not consumed
|
85
84
|
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: numba-cuda
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.8.0
|
4
4
|
Summary: CUDA target for Numba
|
5
5
|
Author: Anaconda Inc., NVIDIA Corporation
|
6
6
|
License: BSD 2-clause
|
@@ -12,6 +12,7 @@ Requires-Python: >=3.9
|
|
12
12
|
Description-Content-Type: text/markdown
|
13
13
|
License-File: LICENSE
|
14
14
|
Requires-Dist: numba>=0.59.1
|
15
|
+
Dynamic: license-file
|
15
16
|
|
16
17
|
<div align="center"><img src="docs/source/_static/numba-green-icon-rgb.svg" width="200"/></div>
|
17
18
|
|
@@ -245,7 +245,6 @@ numba_cuda/numba/cuda/tests/nocuda/test_import.py
|
|
245
245
|
numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py
|
246
246
|
numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py
|
247
247
|
numba_cuda/numba/cuda/tests/nrt/__init__.py
|
248
|
-
numba_cuda/numba/cuda/tests/nrt/mock_numpy.py
|
249
248
|
numba_cuda/numba/cuda/tests/nrt/test_nrt.py
|
250
249
|
numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py
|
251
250
|
numba_cuda/numba/cuda/tests/test_binary_generation/Makefile
|
@@ -1 +0,0 @@
|
|
1
|
-
0.6.0
|