numba-cuda 0.4.0__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/PKG-INFO +20 -2
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/README.md +19 -1
- numba_cuda-0.6.0/numba_cuda/VERSION +1 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/compiler.py +85 -8
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadecl.py +6 -2
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/linkable_code.py +13 -9
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/nvvm.py +6 -1
- numba_cuda-0.6.0/numba_cuda/numba/cuda/debuginfo.py +44 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/decorators.py +9 -2
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/dispatcher.py +62 -4
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/target.py +4 -134
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/testing.py +11 -1
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +81 -0
- numba_cuda-0.6.0/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +481 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +10 -7
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nrt/test_nrt.py +4 -1
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +4 -1
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda.egg-info/PKG-INFO +20 -2
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda.egg-info/SOURCES.txt +1 -0
- numba_cuda-0.4.0/numba_cuda/VERSION +0 -1
- numba_cuda-0.4.0/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +0 -222
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/LICENSE +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/__init__.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/_version.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/__init__.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/api.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/api_util.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/args.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cg.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/codegen.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cuda_fp16.h +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cuda_fp16.hpp +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cuda_paths.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/__init__.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/devicearray.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/devices.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/driver.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/drvapi.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/dummyarray.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/enums.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/error.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/libs.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/mappings.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/ndarray.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/nvrtc.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/rtapi.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/runtime.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudaimpl.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudamath.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/descriptor.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/device_init.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/deviceufunc.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/errors.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/extending.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/initialize.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/intrinsics.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/kernels/__init__.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/kernels/reduction.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/kernels/transpose.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/libdevice.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/libdevicedecl.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/libdevicefuncs.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/libdeviceimpl.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/mathimpl.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/models.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/nvvmutils.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/printimpl.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/random.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/reshape_funcs.cu +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/runtime/__init__.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/runtime/memsys.cu +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/runtime/memsys.cuh +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/runtime/nrt.cu +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/runtime/nrt.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/__init__.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/api.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/compiler.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/devices.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/driver.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/error.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/libs.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/kernel.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/kernelapi.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/reduction.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/vector_types.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator_init.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/stubs.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/__init__.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_events.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_init.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_array.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_caching.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_casting.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_complex.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_debug.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_enums.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_errors.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_extending.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_forall.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_globals.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_lang.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_math.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_operator.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_overload.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_powi.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_print.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_random.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_sm.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_sync.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_warning.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudasim/__init__.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudasim/support.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/cuda_include.cu +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/error.cu +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/jitlink.cu +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/warn.cu +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/__init__.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_random.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/test_import.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nrt/mock_numpy.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/support.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/types.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/ufuncs.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/utils.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/vector_types.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/vectorizers.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda.egg-info/dependency_links.txt +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda.egg-info/requires.txt +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda.egg-info/top_level.txt +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/pyproject.toml +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/setup.cfg +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/setup.py +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/site-packages/_numba_cuda_redirector.pth +0 -0
- {numba_cuda-0.4.0 → numba_cuda-0.6.0}/site-packages/_numba_cuda_redirector.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: numba-cuda
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.6.0
|
4
4
|
Summary: CUDA target for Numba
|
5
5
|
Author: Anaconda Inc., NVIDIA Corporation
|
6
6
|
License: BSD 2-clause
|
@@ -27,7 +27,19 @@ tracker](https://github.com/NVIDIA/numba-cuda/issues).
|
|
27
27
|
To raise questions or initiate discussions, please use the [Numba Discourse
|
28
28
|
forum](https://numba.discourse.group).
|
29
29
|
|
30
|
-
##
|
30
|
+
## Installation with pip
|
31
|
+
|
32
|
+
```shell
|
33
|
+
pip install numba-cuda
|
34
|
+
```
|
35
|
+
|
36
|
+
## Installation with Conda
|
37
|
+
|
38
|
+
```shell
|
39
|
+
conda install -c conda-forge numba-cuda
|
40
|
+
```
|
41
|
+
|
42
|
+
## Installation from source
|
31
43
|
|
32
44
|
Install as an editable install:
|
33
45
|
|
@@ -53,3 +65,9 @@ which will show a path like:
|
|
53
65
|
```
|
54
66
|
<path to numba-cuda repo>/numba_cuda/numba/cuda/__init__.py
|
55
67
|
```
|
68
|
+
|
69
|
+
## Contributing Guide
|
70
|
+
|
71
|
+
Review the
|
72
|
+
[CONTRIBUTING.md](https://github.com/NVIDIA/numba-cuda/blob/main/CONTRIBUTING.md)
|
73
|
+
file for information on how to contribute code and issues to the project.
|
@@ -12,7 +12,19 @@ tracker](https://github.com/NVIDIA/numba-cuda/issues).
|
|
12
12
|
To raise questions or initiate discussions, please use the [Numba Discourse
|
13
13
|
forum](https://numba.discourse.group).
|
14
14
|
|
15
|
-
##
|
15
|
+
## Installation with pip
|
16
|
+
|
17
|
+
```shell
|
18
|
+
pip install numba-cuda
|
19
|
+
```
|
20
|
+
|
21
|
+
## Installation with Conda
|
22
|
+
|
23
|
+
```shell
|
24
|
+
conda install -c conda-forge numba-cuda
|
25
|
+
```
|
26
|
+
|
27
|
+
## Installation from source
|
16
28
|
|
17
29
|
Install as an editable install:
|
18
30
|
|
@@ -38,3 +50,9 @@ which will show a path like:
|
|
38
50
|
```
|
39
51
|
<path to numba-cuda repo>/numba_cuda/numba/cuda/__init__.py
|
40
52
|
```
|
53
|
+
|
54
|
+
## Contributing Guide
|
55
|
+
|
56
|
+
Review the
|
57
|
+
[CONTRIBUTING.md](https://github.com/NVIDIA/numba-cuda/blob/main/CONTRIBUTING.md)
|
58
|
+
file for information on how to contribute code and issues to the project.
|
@@ -0,0 +1 @@
|
|
1
|
+
0.6.0
|
@@ -1,14 +1,17 @@
|
|
1
1
|
from llvmlite import ir
|
2
2
|
from numba.core.typing.templates import ConcreteTemplate
|
3
|
+
from numba.core import ir as numba_ir
|
3
4
|
from numba.core import (cgutils, types, typing, funcdesc, config, compiler,
|
4
5
|
sigutils, utils)
|
5
6
|
from numba.core.compiler import (sanitize_compile_result_entries, CompilerBase,
|
6
7
|
DefaultPassBuilder, Flags, Option,
|
7
8
|
CompileResult)
|
8
9
|
from numba.core.compiler_lock import global_compiler_lock
|
9
|
-
from numba.core.compiler_machinery import (LoweringPass,
|
10
|
+
from numba.core.compiler_machinery import (FunctionPass, LoweringPass,
|
10
11
|
PassManager, register_pass)
|
12
|
+
from numba.core.interpreter import Interpreter
|
11
13
|
from numba.core.errors import NumbaInvalidConfigWarning
|
14
|
+
from numba.core.untyped_passes import TranslateByteCode
|
12
15
|
from numba.core.typed_passes import (IRLegalization, NativeLowering,
|
13
16
|
AnnotateTypes)
|
14
17
|
from warnings import warn
|
@@ -143,13 +146,74 @@ class CreateLibrary(LoweringPass):
|
|
143
146
|
return True
|
144
147
|
|
145
148
|
|
149
|
+
class CUDABytecodeInterpreter(Interpreter):
|
150
|
+
# Based on the superclass implementation, but names the resulting variable
|
151
|
+
# "$bool<N>" instead of "bool<N>" - see Numba PR #9888:
|
152
|
+
# https://github.com/numba/numba/pull/9888
|
153
|
+
#
|
154
|
+
# This can be removed once that PR is available in an upstream Numba
|
155
|
+
# release.
|
156
|
+
def _op_JUMP_IF(self, inst, pred, iftrue):
|
157
|
+
brs = {
|
158
|
+
True: inst.get_jump_target(),
|
159
|
+
False: inst.next,
|
160
|
+
}
|
161
|
+
truebr = brs[iftrue]
|
162
|
+
falsebr = brs[not iftrue]
|
163
|
+
|
164
|
+
name = "$bool%s" % (inst.offset)
|
165
|
+
gv_fn = numba_ir.Global("bool", bool, loc=self.loc)
|
166
|
+
self.store(value=gv_fn, name=name)
|
167
|
+
|
168
|
+
callres = numba_ir.Expr.call(self.get(name), (self.get(pred),), (),
|
169
|
+
loc=self.loc)
|
170
|
+
|
171
|
+
pname = "$%spred" % (inst.offset)
|
172
|
+
predicate = self.store(value=callres, name=pname)
|
173
|
+
bra = numba_ir.Branch(cond=predicate, truebr=truebr, falsebr=falsebr,
|
174
|
+
loc=self.loc)
|
175
|
+
self.current_block.append(bra)
|
176
|
+
|
177
|
+
|
178
|
+
@register_pass(mutates_CFG=True, analysis_only=False)
|
179
|
+
class CUDATranslateBytecode(FunctionPass):
|
180
|
+
_name = "cuda_translate_bytecode"
|
181
|
+
|
182
|
+
def __init__(self):
|
183
|
+
FunctionPass.__init__(self)
|
184
|
+
|
185
|
+
def run_pass(self, state):
|
186
|
+
func_id = state['func_id']
|
187
|
+
bc = state['bc']
|
188
|
+
interp = CUDABytecodeInterpreter(func_id)
|
189
|
+
func_ir = interp.interpret(bc)
|
190
|
+
state['func_ir'] = func_ir
|
191
|
+
return True
|
192
|
+
|
193
|
+
|
146
194
|
class CUDACompiler(CompilerBase):
|
147
195
|
def define_pipelines(self):
|
148
196
|
dpb = DefaultPassBuilder
|
149
197
|
pm = PassManager('cuda')
|
150
198
|
|
151
199
|
untyped_passes = dpb.define_untyped_pipeline(self.state)
|
152
|
-
|
200
|
+
|
201
|
+
# Rather than replicating the whole untyped passes definition in
|
202
|
+
# numba-cuda, it seems cleaner to take the pass list and replace the
|
203
|
+
# TranslateBytecode pass with our own.
|
204
|
+
|
205
|
+
def replace_translate_pass(implementation, description):
|
206
|
+
if implementation is TranslateByteCode:
|
207
|
+
return (CUDATranslateBytecode, description)
|
208
|
+
else:
|
209
|
+
return (implementation, description)
|
210
|
+
|
211
|
+
cuda_untyped_passes = [
|
212
|
+
replace_translate_pass(implementation, description)
|
213
|
+
for implementation, description in untyped_passes.passes
|
214
|
+
]
|
215
|
+
|
216
|
+
pm.passes.extend(cuda_untyped_passes)
|
153
217
|
|
154
218
|
typed_passes = dpb.define_typed_pipeline(self.state)
|
155
219
|
pm.passes.extend(typed_passes.passes)
|
@@ -352,6 +416,18 @@ def kernel_fixup(kernel, debug):
|
|
352
416
|
kernel.return_value = ir.ReturnValue(kernel, ir.VoidType())
|
353
417
|
kernel.args = kernel.args[1:]
|
354
418
|
|
419
|
+
# If debug metadata is present, remove the return value from it
|
420
|
+
|
421
|
+
if kernel_metadata := getattr(kernel, 'metadata', None):
|
422
|
+
if dbg_metadata := kernel_metadata.get('dbg', None):
|
423
|
+
for name, value in dbg_metadata.operands:
|
424
|
+
if name == "type":
|
425
|
+
type_metadata = value
|
426
|
+
for tm_name, tm_value in type_metadata.operands:
|
427
|
+
if tm_name == 'types':
|
428
|
+
types = tm_value
|
429
|
+
types.operands = types.operands[1:]
|
430
|
+
|
355
431
|
# Mark as a kernel for NVVM
|
356
432
|
|
357
433
|
nvvm.set_cuda_kernel(kernel)
|
@@ -570,16 +646,16 @@ def compile_ptx_for_current_device(pyfunc, sig, debug=None, lineinfo=False,
|
|
570
646
|
abi=abi, abi_info=abi_info)
|
571
647
|
|
572
648
|
|
573
|
-
def declare_device_function(name, restype, argtypes):
|
574
|
-
return declare_device_function_template(name, restype, argtypes).key
|
649
|
+
def declare_device_function(name, restype, argtypes, link):
|
650
|
+
return declare_device_function_template(name, restype, argtypes, link).key
|
575
651
|
|
576
652
|
|
577
|
-
def declare_device_function_template(name, restype, argtypes):
|
653
|
+
def declare_device_function_template(name, restype, argtypes, link):
|
578
654
|
from .descriptor import cuda_target
|
579
655
|
typingctx = cuda_target.typing_context
|
580
656
|
targetctx = cuda_target.target_context
|
581
657
|
sig = typing.signature(restype, *argtypes)
|
582
|
-
extfn = ExternFunction(name, sig)
|
658
|
+
extfn = ExternFunction(name, sig, link)
|
583
659
|
|
584
660
|
class device_function_template(ConcreteTemplate):
|
585
661
|
key = extfn
|
@@ -593,7 +669,8 @@ def declare_device_function_template(name, restype, argtypes):
|
|
593
669
|
return device_function_template
|
594
670
|
|
595
671
|
|
596
|
-
class ExternFunction
|
597
|
-
def __init__(self, name, sig):
|
672
|
+
class ExternFunction:
|
673
|
+
def __init__(self, name, sig, link):
|
598
674
|
self.name = name
|
599
675
|
self.sig = sig
|
676
|
+
self.link = link
|
@@ -403,16 +403,20 @@ _genfp16_binary_operator(operator.itruediv)
|
|
403
403
|
|
404
404
|
|
405
405
|
def _resolve_wrapped_unary(fname):
|
406
|
+
link = tuple()
|
406
407
|
decl = declare_device_function_template(f'__numba_wrapper_{fname}',
|
407
408
|
types.float16,
|
408
|
-
(types.float16,)
|
409
|
+
(types.float16,),
|
410
|
+
link)
|
409
411
|
return types.Function(decl)
|
410
412
|
|
411
413
|
|
412
414
|
def _resolve_wrapped_binary(fname):
|
415
|
+
link = tuple()
|
413
416
|
decl = declare_device_function_template(f'__numba_wrapper_{fname}',
|
414
417
|
types.float16,
|
415
|
-
(types.float16, types.float16,)
|
418
|
+
(types.float16, types.float16,),
|
419
|
+
link)
|
416
420
|
return types.Function(decl)
|
417
421
|
|
418
422
|
|
@@ -2,8 +2,12 @@ from .mappings import FILE_EXTENSION_MAP
|
|
2
2
|
|
3
3
|
|
4
4
|
class LinkableCode:
|
5
|
-
"""An object that
|
6
|
-
|
5
|
+
"""An object that holds code to be linked from memory.
|
6
|
+
|
7
|
+
:param data: A buffer containing the data to link.
|
8
|
+
:param name: The name of the file to be referenced in any compilation or
|
9
|
+
linking errors that may be produced.
|
10
|
+
"""
|
7
11
|
|
8
12
|
def __init__(self, data, name=None):
|
9
13
|
self.data = data
|
@@ -15,49 +19,49 @@ class LinkableCode:
|
|
15
19
|
|
16
20
|
|
17
21
|
class PTXSource(LinkableCode):
|
18
|
-
"""PTX
|
22
|
+
"""PTX source code in memory."""
|
19
23
|
|
20
24
|
kind = FILE_EXTENSION_MAP["ptx"]
|
21
25
|
default_name = "<unnamed-ptx>"
|
22
26
|
|
23
27
|
|
24
28
|
class CUSource(LinkableCode):
|
25
|
-
"""CUDA C/C++
|
29
|
+
"""CUDA C/C++ source code in memory."""
|
26
30
|
|
27
31
|
kind = "cu"
|
28
32
|
default_name = "<unnamed-cu>"
|
29
33
|
|
30
34
|
|
31
35
|
class Fatbin(LinkableCode):
|
32
|
-
"""
|
36
|
+
"""An ELF Fatbin in memory."""
|
33
37
|
|
34
38
|
kind = FILE_EXTENSION_MAP["fatbin"]
|
35
39
|
default_name = "<unnamed-fatbin>"
|
36
40
|
|
37
41
|
|
38
42
|
class Cubin(LinkableCode):
|
39
|
-
"""
|
43
|
+
"""An ELF Cubin in memory."""
|
40
44
|
|
41
45
|
kind = FILE_EXTENSION_MAP["cubin"]
|
42
46
|
default_name = "<unnamed-cubin>"
|
43
47
|
|
44
48
|
|
45
49
|
class Archive(LinkableCode):
|
46
|
-
"""An archive of objects in memory"""
|
50
|
+
"""An archive of objects in memory."""
|
47
51
|
|
48
52
|
kind = FILE_EXTENSION_MAP["a"]
|
49
53
|
default_name = "<unnamed-archive>"
|
50
54
|
|
51
55
|
|
52
56
|
class Object(LinkableCode):
|
53
|
-
"""An object file in memory"""
|
57
|
+
"""An object file in memory."""
|
54
58
|
|
55
59
|
kind = FILE_EXTENSION_MAP["o"]
|
56
60
|
default_name = "<unnamed-object>"
|
57
61
|
|
58
62
|
|
59
63
|
class LTOIR(LinkableCode):
|
60
|
-
"""An LTOIR file in memory"""
|
64
|
+
"""An LTOIR file in memory."""
|
61
65
|
|
62
66
|
kind = "ltoir"
|
63
67
|
default_name = "<unnamed-ltoir>"
|
@@ -314,7 +314,9 @@ COMPUTE_CAPABILITIES = (
|
|
314
314
|
(6, 0), (6, 1), (6, 2),
|
315
315
|
(7, 0), (7, 2), (7, 5),
|
316
316
|
(8, 0), (8, 6), (8, 7), (8, 9),
|
317
|
-
(9, 0)
|
317
|
+
(9, 0),
|
318
|
+
(10, 0), (10, 1),
|
319
|
+
(12, 0),
|
318
320
|
)
|
319
321
|
|
320
322
|
# Maps CTK version -> (min supported cc, max supported cc) inclusive
|
@@ -331,6 +333,9 @@ CTK_SUPPORTED = {
|
|
331
333
|
(12, 2): ((5, 0), (9, 0)),
|
332
334
|
(12, 3): ((5, 0), (9, 0)),
|
333
335
|
(12, 4): ((5, 0), (9, 0)),
|
336
|
+
(12, 5): ((5, 0), (9, 0)),
|
337
|
+
(12, 6): ((5, 0), (9, 0)),
|
338
|
+
(12, 8): ((5, 0), (12, 0)),
|
334
339
|
}
|
335
340
|
|
336
341
|
|
@@ -0,0 +1,44 @@
|
|
1
|
+
from llvmlite import ir
|
2
|
+
from numba.core import types
|
3
|
+
from numba.core.debuginfo import DIBuilder
|
4
|
+
from numba.cuda.types import GridGroup
|
5
|
+
|
6
|
+
_BYTE_SIZE = 8
|
7
|
+
|
8
|
+
|
9
|
+
class CUDADIBuilder(DIBuilder):
|
10
|
+
|
11
|
+
def _var_type(self, lltype, size, datamodel=None):
|
12
|
+
is_bool = False
|
13
|
+
is_grid_group = False
|
14
|
+
|
15
|
+
if isinstance(lltype, ir.IntType):
|
16
|
+
if datamodel is None:
|
17
|
+
if size == 1:
|
18
|
+
name = str(lltype)
|
19
|
+
is_bool = True
|
20
|
+
else:
|
21
|
+
name = str(datamodel.fe_type)
|
22
|
+
if isinstance(datamodel.fe_type, types.Boolean):
|
23
|
+
is_bool = True
|
24
|
+
elif isinstance(datamodel.fe_type, GridGroup):
|
25
|
+
is_grid_group = True
|
26
|
+
|
27
|
+
if is_bool or is_grid_group:
|
28
|
+
m = self.module
|
29
|
+
bitsize = _BYTE_SIZE * size
|
30
|
+
# Boolean type workaround until upstream Numba is fixed
|
31
|
+
if is_bool:
|
32
|
+
ditok = "DW_ATE_boolean"
|
33
|
+
# GridGroup type should use numba.cuda implementation
|
34
|
+
elif is_grid_group:
|
35
|
+
ditok = "DW_ATE_unsigned"
|
36
|
+
|
37
|
+
return m.add_debug_info('DIBasicType', {
|
38
|
+
'name': name,
|
39
|
+
'size': bitsize,
|
40
|
+
'encoding': ir.DIToken(ditok),
|
41
|
+
})
|
42
|
+
|
43
|
+
# For other cases, use upstream Numba implementation
|
44
|
+
return super()._var_type(lltype, size, datamodel=datamodel)
|
@@ -173,7 +173,7 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
|
|
173
173
|
return disp
|
174
174
|
|
175
175
|
|
176
|
-
def declare_device(name, sig):
|
176
|
+
def declare_device(name, sig, link=None):
|
177
177
|
"""
|
178
178
|
Declare the signature of a foreign function. Returns a descriptor that can
|
179
179
|
be used to call the function from a Python kernel.
|
@@ -181,10 +181,17 @@ def declare_device(name, sig):
|
|
181
181
|
:param name: The name of the foreign function.
|
182
182
|
:type name: str
|
183
183
|
:param sig: The Numba signature of the function.
|
184
|
+
:param link: External code to link when calling the function.
|
184
185
|
"""
|
186
|
+
if link is None:
|
187
|
+
link = tuple()
|
188
|
+
else:
|
189
|
+
if not isinstance(link, (list, tuple, set)):
|
190
|
+
link = (link,)
|
191
|
+
|
185
192
|
argtypes, restype = sigutils.normalize_signature(sig)
|
186
193
|
if restype is None:
|
187
194
|
msg = 'Return type must be provided for device declarations'
|
188
195
|
raise TypeError(msg)
|
189
196
|
|
190
|
-
return declare_device_function(name, restype, argtypes)
|
197
|
+
return declare_device_function(name, restype, argtypes, link)
|
@@ -4,17 +4,19 @@ import re
|
|
4
4
|
import sys
|
5
5
|
import ctypes
|
6
6
|
import functools
|
7
|
+
from collections import defaultdict
|
7
8
|
|
8
|
-
from numba.core import config, serialize, sigutils, types, typing, utils
|
9
|
+
from numba.core import config, ir, serialize, sigutils, types, typing, utils
|
9
10
|
from numba.core.caching import Cache, CacheImpl
|
10
11
|
from numba.core.compiler_lock import global_compiler_lock
|
11
12
|
from numba.core.dispatcher import Dispatcher
|
12
13
|
from numba.core.errors import NumbaPerformanceWarning
|
13
14
|
from numba.core.typing.typeof import Purpose, typeof
|
14
|
-
|
15
|
+
from numba.core.types.functions import Function
|
15
16
|
from numba.cuda.api import get_current_device
|
16
17
|
from numba.cuda.args import wrap_arg
|
17
|
-
from numba.cuda.compiler import compile_cuda, CUDACompiler, kernel_fixup
|
18
|
+
from numba.cuda.compiler import (compile_cuda, CUDACompiler, kernel_fixup,
|
19
|
+
ExternFunction)
|
18
20
|
from numba.cuda.cudadrv import driver
|
19
21
|
from numba.cuda.cudadrv.devices import get_context
|
20
22
|
from numba.cuda.descriptor import cuda_target
|
@@ -41,6 +43,55 @@ cuda_fp16_math_funcs = ['hsin', 'hcos',
|
|
41
43
|
reshape_funcs = ['nocopy_empty_reshape', 'numba_attempt_nocopy_reshape']
|
42
44
|
|
43
45
|
|
46
|
+
def get_cres_link_objects(cres):
|
47
|
+
"""Given a compile result, return a set of all linkable code objects that
|
48
|
+
are required for it to be fully linked."""
|
49
|
+
|
50
|
+
link_objects = set()
|
51
|
+
|
52
|
+
# List of calls into declared device functions
|
53
|
+
device_func_calls = [
|
54
|
+
(name, v) for name, v in cres.fndesc.typemap.items() if (
|
55
|
+
isinstance(v, cuda_types.CUDADispatcher)
|
56
|
+
)
|
57
|
+
]
|
58
|
+
|
59
|
+
# List of tuples with SSA name of calls and corresponding signature
|
60
|
+
call_signatures = [
|
61
|
+
(call.func.name, sig)
|
62
|
+
for call, sig in cres.fndesc.calltypes.items() if (
|
63
|
+
isinstance(call, ir.Expr) and call.op == 'call'
|
64
|
+
)
|
65
|
+
]
|
66
|
+
|
67
|
+
# Map SSA names to all invoked signatures
|
68
|
+
call_signature_d = defaultdict(list)
|
69
|
+
for name, sig in call_signatures:
|
70
|
+
call_signature_d[name].append(sig)
|
71
|
+
|
72
|
+
# Add the link objects from the current function's callees
|
73
|
+
for name, v in device_func_calls:
|
74
|
+
for sig in call_signature_d.get(name, []):
|
75
|
+
called_cres = v.dispatcher.overloads[sig.args]
|
76
|
+
called_link_objects = get_cres_link_objects(called_cres)
|
77
|
+
link_objects.update(called_link_objects)
|
78
|
+
|
79
|
+
# From this point onwards, we are only interested in ExternFunction
|
80
|
+
# declarations - these are the calls made directly in this function to
|
81
|
+
# them.
|
82
|
+
for name, v in cres.fndesc.typemap.items():
|
83
|
+
if not isinstance(v, Function):
|
84
|
+
continue
|
85
|
+
|
86
|
+
if not isinstance(v.typing_key, ExternFunction):
|
87
|
+
continue
|
88
|
+
|
89
|
+
for obj in v.typing_key.link:
|
90
|
+
link_objects.add(obj)
|
91
|
+
|
92
|
+
return link_objects
|
93
|
+
|
94
|
+
|
44
95
|
class _Kernel(serialize.ReduceMixin):
|
45
96
|
'''
|
46
97
|
CUDA Kernel specialized for a given set of argument types. When called, this
|
@@ -158,6 +209,9 @@ class _Kernel(serialize.ReduceMixin):
|
|
158
209
|
|
159
210
|
self.maybe_link_nrt(link, tgt_ctx, asm)
|
160
211
|
|
212
|
+
for obj in get_cres_link_objects(cres):
|
213
|
+
lib.add_linking_file(obj)
|
214
|
+
|
161
215
|
for filepath in link:
|
162
216
|
lib.add_linking_file(filepath)
|
163
217
|
|
@@ -256,7 +310,11 @@ class _Kernel(serialize.ReduceMixin):
|
|
256
310
|
"""
|
257
311
|
cufunc = self._codelibrary.get_cufunc()
|
258
312
|
|
259
|
-
if
|
313
|
+
if (
|
314
|
+
hasattr(self, "target_context")
|
315
|
+
and self.target_context.enable_nrt
|
316
|
+
and config.CUDA_NRT_STATS
|
317
|
+
):
|
260
318
|
rtsys.ensure_initialized()
|
261
319
|
rtsys.set_memsys_to_module(cufunc.module)
|
262
320
|
# We don't know which stream the kernel will be launched on, so
|
@@ -3,8 +3,7 @@ from functools import cached_property
|
|
3
3
|
import llvmlite.binding as ll
|
4
4
|
from llvmlite import ir
|
5
5
|
|
6
|
-
from numba.core import
|
7
|
-
typing, utils)
|
6
|
+
from numba.core import cgutils, config, itanium_mangler, types, typing
|
8
7
|
from numba.core.dispatcher import Dispatcher
|
9
8
|
from numba.core.base import BaseContext
|
10
9
|
from numba.core.callconv import BaseCallConv, MinimalCallConv
|
@@ -12,7 +11,8 @@ from numba.core.typing import cmathdecl
|
|
12
11
|
from numba.core import datamodel
|
13
12
|
|
14
13
|
from .cudadrv import nvvm
|
15
|
-
from numba.cuda import codegen,
|
14
|
+
from numba.cuda import codegen, ufuncs
|
15
|
+
from numba.cuda.debuginfo import CUDADIBuilder
|
16
16
|
from numba.cuda.models import cuda_data_manager
|
17
17
|
|
18
18
|
# -----------------------------------------------------------------------------
|
@@ -80,7 +80,7 @@ class CUDATargetContext(BaseContext):
|
|
80
80
|
|
81
81
|
@property
|
82
82
|
def DIBuilder(self):
|
83
|
-
return
|
83
|
+
return CUDADIBuilder
|
84
84
|
|
85
85
|
@property
|
86
86
|
def enable_boundscheck(self):
|
@@ -150,136 +150,6 @@ class CUDATargetContext(BaseContext):
|
|
150
150
|
return itanium_mangler.mangle(name, argtypes, abi_tags=abi_tags,
|
151
151
|
uid=uid)
|
152
152
|
|
153
|
-
def prepare_cuda_kernel(self, codelib, fndesc, debug, lineinfo,
|
154
|
-
nvvm_options, filename, linenum,
|
155
|
-
max_registers=None, lto=False):
|
156
|
-
"""
|
157
|
-
Adapt a code library ``codelib`` with the numba compiled CUDA kernel
|
158
|
-
with name ``fname`` and arguments ``argtypes`` for NVVM.
|
159
|
-
A new library is created with a wrapper function that can be used as
|
160
|
-
the kernel entry point for the given kernel.
|
161
|
-
|
162
|
-
Returns the new code library and the wrapper function.
|
163
|
-
|
164
|
-
Parameters:
|
165
|
-
|
166
|
-
codelib: The CodeLibrary containing the device function to wrap
|
167
|
-
in a kernel call.
|
168
|
-
fndesc: The FunctionDescriptor of the source function.
|
169
|
-
debug: Whether to compile with debug.
|
170
|
-
lineinfo: Whether to emit line info.
|
171
|
-
nvvm_options: Dict of NVVM options used when compiling the new library.
|
172
|
-
filename: The source filename that the function is contained in.
|
173
|
-
linenum: The source line that the function is on.
|
174
|
-
max_registers: The max_registers argument for the code library.
|
175
|
-
"""
|
176
|
-
kernel_name = itanium_mangler.prepend_namespace(
|
177
|
-
fndesc.llvm_func_name, ns='cudapy',
|
178
|
-
)
|
179
|
-
library = self.codegen().create_library(f'{codelib.name}_kernel_',
|
180
|
-
entry_name=kernel_name,
|
181
|
-
nvvm_options=nvvm_options,
|
182
|
-
max_registers=max_registers,
|
183
|
-
lto=lto
|
184
|
-
)
|
185
|
-
library.add_linking_library(codelib)
|
186
|
-
wrapper = self.generate_kernel_wrapper(library, fndesc, kernel_name,
|
187
|
-
debug, lineinfo, filename,
|
188
|
-
linenum)
|
189
|
-
return library, wrapper
|
190
|
-
|
191
|
-
def generate_kernel_wrapper(self, library, fndesc, kernel_name, debug,
|
192
|
-
lineinfo, filename, linenum):
|
193
|
-
"""
|
194
|
-
Generate the kernel wrapper in the given ``library``.
|
195
|
-
The function being wrapped is described by ``fndesc``.
|
196
|
-
The wrapper function is returned.
|
197
|
-
"""
|
198
|
-
|
199
|
-
argtypes = fndesc.argtypes
|
200
|
-
arginfo = self.get_arg_packer(argtypes)
|
201
|
-
argtys = list(arginfo.argument_types)
|
202
|
-
wrapfnty = ir.FunctionType(ir.VoidType(), argtys)
|
203
|
-
wrapper_module = self.create_module("cuda.kernel.wrapper")
|
204
|
-
fnty = ir.FunctionType(ir.IntType(32),
|
205
|
-
[self.call_conv.get_return_type(types.pyobject)]
|
206
|
-
+ argtys)
|
207
|
-
func = ir.Function(wrapper_module, fnty, fndesc.llvm_func_name)
|
208
|
-
|
209
|
-
prefixed = itanium_mangler.prepend_namespace(func.name, ns='cudapy')
|
210
|
-
wrapfn = ir.Function(wrapper_module, wrapfnty, prefixed)
|
211
|
-
builder = ir.IRBuilder(wrapfn.append_basic_block(''))
|
212
|
-
|
213
|
-
if debug or lineinfo:
|
214
|
-
directives_only = lineinfo and not debug
|
215
|
-
debuginfo = self.DIBuilder(module=wrapper_module,
|
216
|
-
filepath=filename,
|
217
|
-
cgctx=self,
|
218
|
-
directives_only=directives_only)
|
219
|
-
debuginfo.mark_subprogram(
|
220
|
-
wrapfn, kernel_name, fndesc.args, argtypes, linenum,
|
221
|
-
)
|
222
|
-
debuginfo.mark_location(builder, linenum)
|
223
|
-
|
224
|
-
# Define error handling variable
|
225
|
-
def define_error_gv(postfix):
|
226
|
-
name = wrapfn.name + postfix
|
227
|
-
gv = cgutils.add_global_variable(wrapper_module, ir.IntType(32),
|
228
|
-
name)
|
229
|
-
gv.initializer = ir.Constant(gv.type.pointee, None)
|
230
|
-
return gv
|
231
|
-
|
232
|
-
gv_exc = define_error_gv("__errcode__")
|
233
|
-
gv_tid = []
|
234
|
-
gv_ctaid = []
|
235
|
-
for i in 'xyz':
|
236
|
-
gv_tid.append(define_error_gv("__tid%s__" % i))
|
237
|
-
gv_ctaid.append(define_error_gv("__ctaid%s__" % i))
|
238
|
-
|
239
|
-
callargs = arginfo.from_arguments(builder, wrapfn.args)
|
240
|
-
status, _ = self.call_conv.call_function(
|
241
|
-
builder, func, types.void, argtypes, callargs)
|
242
|
-
|
243
|
-
if debug:
|
244
|
-
# Check error status
|
245
|
-
with cgutils.if_likely(builder, status.is_ok):
|
246
|
-
builder.ret_void()
|
247
|
-
|
248
|
-
with builder.if_then(builder.not_(status.is_python_exc)):
|
249
|
-
# User exception raised
|
250
|
-
old = ir.Constant(gv_exc.type.pointee, None)
|
251
|
-
|
252
|
-
# Use atomic cmpxchg to prevent rewriting the error status
|
253
|
-
# Only the first error is recorded
|
254
|
-
|
255
|
-
xchg = builder.cmpxchg(gv_exc, old, status.code,
|
256
|
-
'monotonic', 'monotonic')
|
257
|
-
changed = builder.extract_value(xchg, 1)
|
258
|
-
|
259
|
-
# If the xchange is successful, save the thread ID.
|
260
|
-
sreg = nvvmutils.SRegBuilder(builder)
|
261
|
-
with builder.if_then(changed):
|
262
|
-
for dim, ptr, in zip("xyz", gv_tid):
|
263
|
-
val = sreg.tid(dim)
|
264
|
-
builder.store(val, ptr)
|
265
|
-
|
266
|
-
for dim, ptr, in zip("xyz", gv_ctaid):
|
267
|
-
val = sreg.ctaid(dim)
|
268
|
-
builder.store(val, ptr)
|
269
|
-
|
270
|
-
builder.ret_void()
|
271
|
-
|
272
|
-
nvvm.set_cuda_kernel(wrapfn)
|
273
|
-
library.add_ir_module(wrapper_module)
|
274
|
-
if debug or lineinfo:
|
275
|
-
debuginfo.finalize()
|
276
|
-
library.finalize()
|
277
|
-
|
278
|
-
if config.DUMP_LLVM:
|
279
|
-
utils.dump_llvm(fndesc, wrapper_module)
|
280
|
-
|
281
|
-
return library.get_function(wrapfn.name)
|
282
|
-
|
283
153
|
def make_constant_array(self, builder, aryty, arr):
|
284
154
|
"""
|
285
155
|
Unlike the parent version. This returns a a pointer in the constant
|