numba-cuda 0.0.19__tar.gz → 0.0.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/PKG-INFO +12 -8
- numba_cuda-0.0.20/README.md +40 -0
- numba_cuda-0.0.20/numba_cuda/VERSION +1 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/compiler.py +180 -10
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cuda_paths.py +3 -1
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/dispatcher.py +8 -9
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_debug.py +2 -4
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +1 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +3 -10
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +1 -2
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -2
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +6 -2
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda.egg-info/PKG-INFO +12 -8
- numba_cuda-0.0.19/README.md +0 -36
- numba_cuda-0.0.19/numba_cuda/VERSION +0 -1
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/LICENSE +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/__init__.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/_version.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/__init__.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/api.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/api_util.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/args.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cg.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/codegen.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cuda_fp16.h +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cuda_fp16.hpp +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cudadecl.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cudadrv/__init__.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cudadrv/devicearray.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cudadrv/devices.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cudadrv/driver.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cudadrv/drvapi.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cudadrv/dummyarray.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cudadrv/error.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cudadrv/libs.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cudadrv/linkable_code.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cudadrv/mappings.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cudadrv/ndarray.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cudadrv/nvrtc.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cudadrv/nvvm.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cudadrv/rtapi.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cudadrv/runtime.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cudaimpl.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/cudamath.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/decorators.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/descriptor.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/device_init.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/deviceufunc.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/errors.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/extending.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/initialize.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/intrinsics.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/kernels/__init__.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/kernels/reduction.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/kernels/transpose.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/libdevice.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/libdevicedecl.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/libdevicefuncs.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/libdeviceimpl.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/mathimpl.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/models.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/nvvmutils.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/printimpl.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/random.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/runtime/nrt.cu +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/simulator/__init__.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/simulator/api.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/simulator/compiler.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/simulator/cudadrv/devices.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/simulator/cudadrv/driver.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/simulator/cudadrv/error.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/simulator/cudadrv/libs.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/simulator/kernel.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/simulator/kernelapi.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/simulator/reduction.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/simulator/vector_types.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/simulator_init.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/stubs.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/target.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/testing.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/__init__.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_events.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_init.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_array.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_caching.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_casting.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_complex.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_enums.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_errors.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_extending.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_forall.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_globals.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_lang.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_math.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_operator.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_overload.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_powi.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_print.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_random.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_sm.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_sync.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_warning.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudasim/__init__.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudasim/support.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/data/cuda_include.cu +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/data/error.cu +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/data/jitlink.cu +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/data/warn.cu +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/doc_examples/__init__.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/doc_examples/test_random.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/nocuda/test_import.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/nrt/mock_numpy.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/nrt/test_nrt.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/types.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/ufuncs.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/vector_types.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/vectorizers.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda.egg-info/SOURCES.txt +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda.egg-info/dependency_links.txt +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda.egg-info/requires.txt +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda.egg-info/top_level.txt +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/pyproject.toml +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/setup.cfg +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/setup.py +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/site-packages/_numba_cuda_redirector.pth +0 -0
- {numba_cuda-0.0.19 → numba_cuda-0.0.20}/site-packages/_numba_cuda_redirector.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: numba-cuda
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.20
|
4
4
|
Summary: CUDA target for Numba
|
5
5
|
Author: Anaconda Inc., NVIDIA Corporation
|
6
6
|
License: BSD 2-clause
|
@@ -13,17 +13,21 @@ Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
14
14
|
Requires-Dist: numba>=0.59.1
|
15
15
|
|
16
|
+
<div align="center"><img src="docs/source/_static/numba-green-icon-rgb.svg" width="200"/></div>
|
17
|
+
|
16
18
|
# Numba CUDA Target
|
17
19
|
|
18
|
-
|
20
|
+
The CUDA target for Numba. Please visit the [official
|
21
|
+
documentation](https://nvidia.github.io/numba-cuda) to get started!
|
22
|
+
|
19
23
|
|
20
|
-
|
21
|
-
|
22
|
-
used as the `numba.cuda` module instead of the code from the `numba` package.
|
24
|
+
To report issues or file feature requests, please use the [issue
|
25
|
+
tracker](https://github.com/NVIDIA/numba-cuda/issues).
|
23
26
|
|
24
|
-
|
27
|
+
To raise questions or initiate discussions, please use the [Numba Discourse
|
28
|
+
forum](https://numba.discourse.group).
|
25
29
|
|
26
|
-
## Building
|
30
|
+
## Building from source
|
27
31
|
|
28
32
|
Install as an editable install:
|
29
33
|
|
@@ -31,7 +35,7 @@ Install as an editable install:
|
|
31
35
|
pip install -e .
|
32
36
|
```
|
33
37
|
|
34
|
-
Running tests
|
38
|
+
## Running tests
|
35
39
|
|
36
40
|
```
|
37
41
|
python -m numba.runtests numba.cuda.tests
|
@@ -0,0 +1,40 @@
|
|
1
|
+
<div align="center"><img src="docs/source/_static/numba-green-icon-rgb.svg" width="200"/></div>
|
2
|
+
|
3
|
+
# Numba CUDA Target
|
4
|
+
|
5
|
+
The CUDA target for Numba. Please visit the [official
|
6
|
+
documentation](https://nvidia.github.io/numba-cuda) to get started!
|
7
|
+
|
8
|
+
|
9
|
+
To report issues or file feature requests, please use the [issue
|
10
|
+
tracker](https://github.com/NVIDIA/numba-cuda/issues).
|
11
|
+
|
12
|
+
To raise questions or initiate discussions, please use the [Numba Discourse
|
13
|
+
forum](https://numba.discourse.group).
|
14
|
+
|
15
|
+
## Building from source
|
16
|
+
|
17
|
+
Install as an editable install:
|
18
|
+
|
19
|
+
```
|
20
|
+
pip install -e .
|
21
|
+
```
|
22
|
+
|
23
|
+
## Running tests
|
24
|
+
|
25
|
+
```
|
26
|
+
python -m numba.runtests numba.cuda.tests
|
27
|
+
```
|
28
|
+
|
29
|
+
This should discover the`numba.cuda` module from the `numba_cuda` package. You
|
30
|
+
can check where `numba.cuda` files are being located by running
|
31
|
+
|
32
|
+
```
|
33
|
+
python -c "from numba import cuda; print(cuda.__file__)"
|
34
|
+
```
|
35
|
+
|
36
|
+
which will show a path like:
|
37
|
+
|
38
|
+
```
|
39
|
+
<path to numba-cuda repo>/numba_cuda/numba/cuda/__init__.py
|
40
|
+
```
|
@@ -0,0 +1 @@
|
|
1
|
+
0.0.20
|
@@ -1,6 +1,7 @@
|
|
1
1
|
from llvmlite import ir
|
2
2
|
from numba.core.typing.templates import ConcreteTemplate
|
3
|
-
from numba.core import types, typing, funcdesc, config, compiler,
|
3
|
+
from numba.core import (cgutils, types, typing, funcdesc, config, compiler,
|
4
|
+
sigutils, utils)
|
4
5
|
from numba.core.compiler import (sanitize_compile_result_entries, CompilerBase,
|
5
6
|
DefaultPassBuilder, Flags, Option,
|
6
7
|
CompileResult)
|
@@ -11,7 +12,10 @@ from numba.core.errors import NumbaInvalidConfigWarning
|
|
11
12
|
from numba.core.typed_passes import (IRLegalization, NativeLowering,
|
12
13
|
AnnotateTypes)
|
13
14
|
from warnings import warn
|
15
|
+
from numba.cuda import nvvmutils
|
14
16
|
from numba.cuda.api import get_current_device
|
17
|
+
from numba.cuda.cudadrv import nvvm
|
18
|
+
from numba.cuda.descriptor import cuda_target
|
15
19
|
from numba.cuda.target import CUDACABICallConv
|
16
20
|
|
17
21
|
|
@@ -24,6 +28,15 @@ def _nvvm_options_type(x):
|
|
24
28
|
return x
|
25
29
|
|
26
30
|
|
31
|
+
def _optional_int_type(x):
|
32
|
+
if x is None:
|
33
|
+
return None
|
34
|
+
|
35
|
+
else:
|
36
|
+
assert isinstance(x, int)
|
37
|
+
return x
|
38
|
+
|
39
|
+
|
27
40
|
class CUDAFlags(Flags):
|
28
41
|
nvvm_options = Option(
|
29
42
|
type=_nvvm_options_type,
|
@@ -35,6 +48,16 @@ class CUDAFlags(Flags):
|
|
35
48
|
default=None,
|
36
49
|
doc="Compute Capability",
|
37
50
|
)
|
51
|
+
max_registers = Option(
|
52
|
+
type=_optional_int_type,
|
53
|
+
default=None,
|
54
|
+
doc="Max registers"
|
55
|
+
)
|
56
|
+
lto = Option(
|
57
|
+
type=bool,
|
58
|
+
default=False,
|
59
|
+
doc="Enable Link-time Optimization"
|
60
|
+
)
|
38
61
|
|
39
62
|
|
40
63
|
# The CUDACompileResult (CCR) has a specially-defined entry point equal to its
|
@@ -109,7 +132,11 @@ class CreateLibrary(LoweringPass):
|
|
109
132
|
codegen = state.targetctx.codegen()
|
110
133
|
name = state.func_id.func_qualname
|
111
134
|
nvvm_options = state.flags.nvvm_options
|
112
|
-
|
135
|
+
max_registers = state.flags.max_registers
|
136
|
+
lto = state.flags.lto
|
137
|
+
state.library = codegen.create_library(name, nvvm_options=nvvm_options,
|
138
|
+
max_registers=max_registers,
|
139
|
+
lto=lto)
|
113
140
|
# Enable object caching upfront so that the library can be serialized.
|
114
141
|
state.library.enable_object_caching()
|
115
142
|
|
@@ -152,7 +179,7 @@ class CUDACompiler(CompilerBase):
|
|
152
179
|
@global_compiler_lock
|
153
180
|
def compile_cuda(pyfunc, return_type, args, debug=False, lineinfo=False,
|
154
181
|
inline=False, fastmath=False, nvvm_options=None,
|
155
|
-
cc=None):
|
182
|
+
cc=None, max_registers=None, lto=False):
|
156
183
|
if cc is None:
|
157
184
|
raise ValueError('Compute Capability must be supplied')
|
158
185
|
|
@@ -189,6 +216,8 @@ def compile_cuda(pyfunc, return_type, args, debug=False, lineinfo=False,
|
|
189
216
|
if nvvm_options:
|
190
217
|
flags.nvvm_options = nvvm_options
|
191
218
|
flags.compute_capability = cc
|
219
|
+
flags.max_registers = max_registers
|
220
|
+
flags.lto = lto
|
192
221
|
|
193
222
|
# Run compilation pipeline
|
194
223
|
from numba.core.target_extension import target_override
|
@@ -247,11 +276,155 @@ def cabi_wrap_function(context, lib, fndesc, wrapper_function_name,
|
|
247
276
|
builder, func, restype, argtypes, callargs)
|
248
277
|
builder.ret(return_value)
|
249
278
|
|
279
|
+
if config.DUMP_LLVM:
|
280
|
+
utils.dump_llvm(fndesc, wrapper_module)
|
281
|
+
|
250
282
|
library.add_ir_module(wrapper_module)
|
251
283
|
library.finalize()
|
252
284
|
return library
|
253
285
|
|
254
286
|
|
287
|
+
def kernel_fixup(kernel, debug):
|
288
|
+
if debug:
|
289
|
+
exc_helper = add_exception_store_helper(kernel)
|
290
|
+
|
291
|
+
# Pass 1 - replace:
|
292
|
+
#
|
293
|
+
# ret <value>
|
294
|
+
#
|
295
|
+
# with:
|
296
|
+
#
|
297
|
+
# exc_helper(<value>)
|
298
|
+
# ret void
|
299
|
+
|
300
|
+
for block in kernel.blocks:
|
301
|
+
for i, inst in enumerate(block.instructions):
|
302
|
+
if isinstance(inst, ir.Ret):
|
303
|
+
old_ret = block.instructions.pop()
|
304
|
+
block.terminator = None
|
305
|
+
|
306
|
+
# The original return's metadata will be set on the new
|
307
|
+
# instructions in order to preserve debug info
|
308
|
+
metadata = old_ret.metadata
|
309
|
+
|
310
|
+
builder = ir.IRBuilder(block)
|
311
|
+
if debug:
|
312
|
+
status_code = old_ret.operands[0]
|
313
|
+
exc_helper_call = builder.call(exc_helper, (status_code,))
|
314
|
+
exc_helper_call.metadata = metadata
|
315
|
+
|
316
|
+
new_ret = builder.ret_void()
|
317
|
+
new_ret.metadata = old_ret.metadata
|
318
|
+
|
319
|
+
# Need to break out so we don't carry on modifying what we are
|
320
|
+
# iterating over. There can only be one return in a block
|
321
|
+
# anyway.
|
322
|
+
break
|
323
|
+
|
324
|
+
# Pass 2: remove stores of null pointer to return value argument pointer
|
325
|
+
|
326
|
+
return_value = kernel.args[0]
|
327
|
+
|
328
|
+
for block in kernel.blocks:
|
329
|
+
remove_list = []
|
330
|
+
|
331
|
+
# Find all stores first
|
332
|
+
for inst in block.instructions:
|
333
|
+
if (isinstance(inst, ir.StoreInstr)
|
334
|
+
and inst.operands[1] == return_value):
|
335
|
+
remove_list.append(inst)
|
336
|
+
|
337
|
+
# Remove all stores
|
338
|
+
for to_remove in remove_list:
|
339
|
+
block.instructions.remove(to_remove)
|
340
|
+
|
341
|
+
# Replace non-void return type with void return type and remove return
|
342
|
+
# value
|
343
|
+
|
344
|
+
if isinstance(kernel.type, ir.PointerType):
|
345
|
+
new_type = ir.PointerType(ir.FunctionType(ir.VoidType(),
|
346
|
+
kernel.type.pointee.args[1:]))
|
347
|
+
else:
|
348
|
+
new_type = ir.FunctionType(ir.VoidType(), kernel.type.args[1:])
|
349
|
+
|
350
|
+
kernel.type = new_type
|
351
|
+
kernel.return_value = ir.ReturnValue(kernel, ir.VoidType())
|
352
|
+
kernel.args = kernel.args[1:]
|
353
|
+
|
354
|
+
# Mark as a kernel for NVVM
|
355
|
+
|
356
|
+
nvvm.set_cuda_kernel(kernel)
|
357
|
+
|
358
|
+
if config.DUMP_LLVM:
|
359
|
+
print(f"LLVM DUMP: Post kernel fixup {kernel.name}".center(80, '-'))
|
360
|
+
print(kernel.module)
|
361
|
+
print('=' * 80)
|
362
|
+
|
363
|
+
|
364
|
+
def add_exception_store_helper(kernel):
|
365
|
+
|
366
|
+
# Create global variables for exception state
|
367
|
+
|
368
|
+
def define_error_gv(postfix):
|
369
|
+
name = kernel.name + postfix
|
370
|
+
gv = cgutils.add_global_variable(kernel.module, ir.IntType(32),
|
371
|
+
name)
|
372
|
+
gv.initializer = ir.Constant(gv.type.pointee, None)
|
373
|
+
return gv
|
374
|
+
|
375
|
+
gv_exc = define_error_gv("__errcode__")
|
376
|
+
gv_tid = []
|
377
|
+
gv_ctaid = []
|
378
|
+
for i in 'xyz':
|
379
|
+
gv_tid.append(define_error_gv("__tid%s__" % i))
|
380
|
+
gv_ctaid.append(define_error_gv("__ctaid%s__" % i))
|
381
|
+
|
382
|
+
# Create exception store helper function
|
383
|
+
|
384
|
+
helper_name = kernel.name + "__exc_helper__"
|
385
|
+
helper_type = ir.FunctionType(ir.VoidType(), (ir.IntType(32),))
|
386
|
+
helper_func = ir.Function(kernel.module, helper_type, helper_name)
|
387
|
+
|
388
|
+
block = helper_func.append_basic_block(name="entry")
|
389
|
+
builder = ir.IRBuilder(block)
|
390
|
+
|
391
|
+
# Implement status check / exception store logic
|
392
|
+
|
393
|
+
status_code = helper_func.args[0]
|
394
|
+
call_conv = cuda_target.target_context.call_conv
|
395
|
+
status = call_conv._get_return_status(builder, status_code)
|
396
|
+
|
397
|
+
# Check error status
|
398
|
+
with cgutils.if_likely(builder, status.is_ok):
|
399
|
+
builder.ret_void()
|
400
|
+
|
401
|
+
with builder.if_then(builder.not_(status.is_python_exc)):
|
402
|
+
# User exception raised
|
403
|
+
old = ir.Constant(gv_exc.type.pointee, None)
|
404
|
+
|
405
|
+
# Use atomic cmpxchg to prevent rewriting the error status
|
406
|
+
# Only the first error is recorded
|
407
|
+
|
408
|
+
xchg = builder.cmpxchg(gv_exc, old, status.code,
|
409
|
+
'monotonic', 'monotonic')
|
410
|
+
changed = builder.extract_value(xchg, 1)
|
411
|
+
|
412
|
+
# If the xchange is successful, save the thread ID.
|
413
|
+
sreg = nvvmutils.SRegBuilder(builder)
|
414
|
+
with builder.if_then(changed):
|
415
|
+
for dim, ptr, in zip("xyz", gv_tid):
|
416
|
+
val = sreg.tid(dim)
|
417
|
+
builder.store(val, ptr)
|
418
|
+
|
419
|
+
for dim, ptr, in zip("xyz", gv_ctaid):
|
420
|
+
val = sreg.ctaid(dim)
|
421
|
+
builder.store(val, ptr)
|
422
|
+
|
423
|
+
builder.ret_void()
|
424
|
+
|
425
|
+
return helper_func
|
426
|
+
|
427
|
+
|
255
428
|
@global_compiler_lock
|
256
429
|
def compile(pyfunc, sig, debug=None, lineinfo=False, device=True,
|
257
430
|
fastmath=False, cc=None, opt=None, abi="c", abi_info=None,
|
@@ -347,13 +520,10 @@ def compile(pyfunc, sig, debug=None, lineinfo=False, device=True,
|
|
347
520
|
lib = cabi_wrap_function(tgt, lib, cres.fndesc, wrapper_name,
|
348
521
|
nvvm_options)
|
349
522
|
else:
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
lib, kernel = tgt.prepare_cuda_kernel(cres.library, cres.fndesc, debug,
|
355
|
-
lineinfo, nvvm_options, filename,
|
356
|
-
linenum)
|
523
|
+
lib = cres.library
|
524
|
+
kernel = lib.get_function(cres.fndesc.llvm_func_name)
|
525
|
+
lib._entry_name = cres.fndesc.llvm_func_name
|
526
|
+
kernel_fixup(kernel, debug)
|
357
527
|
|
358
528
|
if lto:
|
359
529
|
code = lib.get_ltoir(cc=cc)
|
@@ -310,7 +310,9 @@ def get_conda_include_dir():
|
|
310
310
|
# though usually it shouldn't.
|
311
311
|
include_dir = os.path.join(sys.prefix, 'include')
|
312
312
|
|
313
|
-
if os.path.exists(include_dir)
|
313
|
+
if (os.path.exists(include_dir) and os.path.isdir(include_dir)
|
314
|
+
and os.path.exists(os.path.join(include_dir,
|
315
|
+
'cuda_device_runtime_api.h'))):
|
314
316
|
return include_dir
|
315
317
|
return
|
316
318
|
|
@@ -55,7 +55,7 @@ CUDA_ERROR_INVALID_HANDLE = 400
|
|
55
55
|
CUDA_ERROR_ILLEGAL_STATE = 401
|
56
56
|
CUDA_ERROR_NOT_FOUND = 500
|
57
57
|
CUDA_ERROR_NOT_READY = 600
|
58
|
-
|
58
|
+
CUDA_ERROR_ILLEGAL_ADDRESS = 700
|
59
59
|
CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701
|
60
60
|
CUDA_ERROR_LAUNCH_TIMEOUT = 702
|
61
61
|
CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703
|
@@ -14,7 +14,7 @@ from numba.core.typing.typeof import Purpose, typeof
|
|
14
14
|
|
15
15
|
from numba.cuda.api import get_current_device
|
16
16
|
from numba.cuda.args import wrap_arg
|
17
|
-
from numba.cuda.compiler import compile_cuda, CUDACompiler
|
17
|
+
from numba.cuda.compiler import compile_cuda, CUDACompiler, kernel_fixup
|
18
18
|
from numba.cuda.cudadrv import driver
|
19
19
|
from numba.cuda.cudadrv.devices import get_context
|
20
20
|
from numba.cuda.descriptor import cuda_target
|
@@ -102,15 +102,14 @@ class _Kernel(serialize.ReduceMixin):
|
|
102
102
|
inline=inline,
|
103
103
|
fastmath=fastmath,
|
104
104
|
nvvm_options=nvvm_options,
|
105
|
-
cc=cc
|
105
|
+
cc=cc,
|
106
|
+
max_registers=max_registers,
|
107
|
+
lto=lto)
|
106
108
|
tgt_ctx = cres.target_context
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
debug, lineinfo, nvvm_options,
|
112
|
-
filename, linenum,
|
113
|
-
max_registers, lto)
|
109
|
+
lib = cres.library
|
110
|
+
kernel = lib.get_function(cres.fndesc.llvm_func_name)
|
111
|
+
lib._entry_name = cres.fndesc.llvm_func_name
|
112
|
+
kernel_fixup(kernel, self.debug)
|
114
113
|
|
115
114
|
if not link:
|
116
115
|
link = []
|
@@ -48,13 +48,11 @@ class TestDebugOutput(CUDATestCase):
|
|
48
48
|
self.assertRaises(AssertionError, check_meth, out)
|
49
49
|
|
50
50
|
def _check_dump_bytecode(self, out):
|
51
|
-
if PYVERSION
|
51
|
+
if PYVERSION > (3, 10):
|
52
52
|
# binop with arg=0 is binary add, see CPython dis.py and opcode.py
|
53
53
|
self.assertIn('BINARY_OP(arg=0', out)
|
54
|
-
elif PYVERSION in ((3, 9), (3, 10)):
|
55
|
-
self.assertIn('BINARY_ADD', out)
|
56
54
|
else:
|
57
|
-
|
55
|
+
self.assertIn('BINARY_ADD', out)
|
58
56
|
|
59
57
|
def _check_dump_cfg(self, out):
|
60
58
|
self.assertIn('CFG dominators', out)
|
@@ -33,10 +33,7 @@ class TestInspect(CUDATestCase):
|
|
33
33
|
self.assertIn("foo", llvm)
|
34
34
|
|
35
35
|
# Kernel in LLVM
|
36
|
-
self.assertIn(
|
37
|
-
|
38
|
-
# Wrapped device function body in LLVM
|
39
|
-
self.assertIn("define linkonce_odr i32", llvm)
|
36
|
+
self.assertIn("define void @", llvm)
|
40
37
|
|
41
38
|
asm = foo.inspect_asm(sig)
|
42
39
|
|
@@ -72,12 +69,8 @@ class TestInspect(CUDATestCase):
|
|
72
69
|
self.assertIn("foo", llvmirs[float64, float64])
|
73
70
|
|
74
71
|
# Kernels in LLVM
|
75
|
-
self.assertIn(
|
76
|
-
self.assertIn(
|
77
|
-
|
78
|
-
# Wrapped device function bodies in LLVM
|
79
|
-
self.assertIn("define linkonce_odr i32", llvmirs[intp, intp])
|
80
|
-
self.assertIn("define linkonce_odr i32", llvmirs[float64, float64])
|
72
|
+
self.assertIn("define void @", llvmirs[intp, intp])
|
73
|
+
self.assertIn("define void @", llvmirs[float64, float64])
|
81
74
|
|
82
75
|
asmdict = foo.inspect_asm()
|
83
76
|
|
@@ -170,10 +170,9 @@ class TestCudaLineInfo(CUDATestCase):
|
|
170
170
|
subprograms += 1
|
171
171
|
|
172
172
|
# One DISubprogram for each of:
|
173
|
-
# - The kernel wrapper
|
174
173
|
# - The caller
|
175
174
|
# - The callee
|
176
|
-
expected_subprograms =
|
175
|
+
expected_subprograms = 2
|
177
176
|
|
178
177
|
self.assertEqual(subprograms, expected_subprograms,
|
179
178
|
f'"Expected {expected_subprograms} DISubprograms; '
|
{numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_optimization.py
RENAMED
@@ -14,8 +14,11 @@ def device_func(x, y, z):
|
|
14
14
|
|
15
15
|
|
16
16
|
# Fragments of code that are removed from kernel_func's PTX when optimization
|
17
|
-
# is on
|
18
|
-
|
17
|
+
# is on. Previously this list was longer when kernel wrappers were used - if
|
18
|
+
# the test function were more complex it may be possible to isolate additional
|
19
|
+
# fragments of PTX we could check for the absence / presence of, but removal of
|
20
|
+
# the use of local memory is a good indicator that optimization was applied.
|
21
|
+
removed_by_opt = ( '__local_depot0',)
|
19
22
|
|
20
23
|
|
21
24
|
@skip_on_cudasim('Simulator does not optimize code')
|
{numba_cuda-0.0.19 → numba_cuda-0.0.20}/numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
from numba.cuda.testing import (skip_on_cudasim, skip_unless_cudasim, unittest,
|
2
2
|
CUDATestCase)
|
3
|
-
from numba import cuda
|
3
|
+
from numba import config, cuda
|
4
4
|
|
5
5
|
# Basic tests that stream APIs execute on the hardware and in the simulator.
|
6
6
|
#
|
@@ -34,7 +34,11 @@ class TestStreamAPI(CUDATestCase):
|
|
34
34
|
# We don't test synchronization on the stream because it's not a real
|
35
35
|
# stream - we used a dummy pointer for testing the API, so we just
|
36
36
|
# ensure that the stream handle matches the external stream pointer.
|
37
|
-
|
37
|
+
if config.CUDA_USE_NVIDIA_BINDING:
|
38
|
+
value = int(s.handle)
|
39
|
+
else:
|
40
|
+
value = s.handle.value
|
41
|
+
self.assertEqual(ptr, value)
|
38
42
|
|
39
43
|
@skip_unless_cudasim("External streams are usable with hardware")
|
40
44
|
def test_external_stream_simulator_unavailable(self):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: numba-cuda
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.20
|
4
4
|
Summary: CUDA target for Numba
|
5
5
|
Author: Anaconda Inc., NVIDIA Corporation
|
6
6
|
License: BSD 2-clause
|
@@ -13,17 +13,21 @@ Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
14
14
|
Requires-Dist: numba>=0.59.1
|
15
15
|
|
16
|
+
<div align="center"><img src="docs/source/_static/numba-green-icon-rgb.svg" width="200"/></div>
|
17
|
+
|
16
18
|
# Numba CUDA Target
|
17
19
|
|
18
|
-
|
20
|
+
The CUDA target for Numba. Please visit the [official
|
21
|
+
documentation](https://nvidia.github.io/numba-cuda) to get started!
|
22
|
+
|
19
23
|
|
20
|
-
|
21
|
-
|
22
|
-
used as the `numba.cuda` module instead of the code from the `numba` package.
|
24
|
+
To report issues or file feature requests, please use the [issue
|
25
|
+
tracker](https://github.com/NVIDIA/numba-cuda/issues).
|
23
26
|
|
24
|
-
|
27
|
+
To raise questions or initiate discussions, please use the [Numba Discourse
|
28
|
+
forum](https://numba.discourse.group).
|
25
29
|
|
26
|
-
## Building
|
30
|
+
## Building from source
|
27
31
|
|
28
32
|
Install as an editable install:
|
29
33
|
|
@@ -31,7 +35,7 @@ Install as an editable install:
|
|
31
35
|
pip install -e .
|
32
36
|
```
|
33
37
|
|
34
|
-
Running tests
|
38
|
+
## Running tests
|
35
39
|
|
36
40
|
```
|
37
41
|
python -m numba.runtests numba.cuda.tests
|
numba_cuda-0.0.19/README.md
DELETED
@@ -1,36 +0,0 @@
|
|
1
|
-
# Numba CUDA Target
|
2
|
-
|
3
|
-
An out-of-tree CUDA target for Numba.
|
4
|
-
|
5
|
-
This contains an entire copy of Numba's CUDA target (the `numba.cuda` module),
|
6
|
-
and a mechanism to ensure the code from this module (`numba_cuda.numba.cuda`) is
|
7
|
-
used as the `numba.cuda` module instead of the code from the `numba` package.
|
8
|
-
|
9
|
-
This is presently in an early state and is published for testing and feedback.
|
10
|
-
|
11
|
-
## Building / testing
|
12
|
-
|
13
|
-
Install as an editable install:
|
14
|
-
|
15
|
-
```
|
16
|
-
pip install -e .
|
17
|
-
```
|
18
|
-
|
19
|
-
Running tests:
|
20
|
-
|
21
|
-
```
|
22
|
-
python -m numba.runtests numba.cuda.tests
|
23
|
-
```
|
24
|
-
|
25
|
-
This should discover the`numba.cuda` module from the `numba_cuda` package. You
|
26
|
-
can check where `numba.cuda` files are being located by running
|
27
|
-
|
28
|
-
```
|
29
|
-
python -c "from numba import cuda; print(cuda.__file__)"
|
30
|
-
```
|
31
|
-
|
32
|
-
which will show a path like:
|
33
|
-
|
34
|
-
```
|
35
|
-
<path to numba-cuda repo>/numba_cuda/numba/cuda/__init__.py
|
36
|
-
```
|
@@ -1 +0,0 @@
|
|
1
|
-
0.0.19
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|