numba-cuda 0.0.16__tar.gz → 0.0.18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/PKG-INFO +1 -1
- numba_cuda-0.0.18/numba_cuda/VERSION +1 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/codegen.py +15 -3
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/driver.py +209 -47
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
- numba_cuda-0.0.18/numba_cuda/numba/cuda/cudadrv/linkable_code.py +63 -0
- numba_cuda-0.0.18/numba_cuda/numba/cuda/cudadrv/mappings.py +24 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/device_init.py +3 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/dispatcher.py +2 -2
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/intrinsics.py +6 -1
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/printimpl.py +11 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/target.py +4 -2
- numba_cuda-0.0.18/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +199 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +44 -4
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
- numba_cuda-0.0.18/numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +51 -0
- numba_cuda-0.0.18/numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +163 -0
- numba_cuda-0.0.18/numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +19 -0
- numba_cuda-0.0.18/numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +3 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda.egg-info/PKG-INFO +1 -1
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda.egg-info/SOURCES.txt +7 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/pyproject.toml +1 -1
- numba_cuda-0.0.16/numba_cuda/VERSION +0 -1
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/LICENSE +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/README.md +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/__init__.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/_version.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/__init__.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/api.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/api_util.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/args.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cg.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/compiler.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cuda_fp16.h +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cuda_fp16.hpp +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cuda_paths.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadecl.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/__init__.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/devicearray.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/devices.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/drvapi.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/dummyarray.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/error.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/libs.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/ndarray.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/nvrtc.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/nvvm.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/rtapi.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/runtime.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudaimpl.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudamath.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/decorators.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/descriptor.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/deviceufunc.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/errors.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/extending.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/initialize.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/kernels/__init__.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/kernels/reduction.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/kernels/transpose.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/libdevice.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/libdevicedecl.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/libdevicefuncs.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/libdeviceimpl.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/mathimpl.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/models.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/nvvmutils.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/random.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/__init__.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/api.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/compiler.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/cudadrv/devices.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/cudadrv/driver.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/cudadrv/error.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/cudadrv/libs.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/kernel.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/kernelapi.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/reduction.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/vector_types.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator_init.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/stubs.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/testing.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/__init__.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_events.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_init.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_array.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_caching.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_casting.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_complex.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_debug.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_enums.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_errors.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_extending.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_forall.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_globals.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_lang.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_math.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_operator.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_overload.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_powi.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_random.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_sm.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_sync.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_warning.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudasim/__init__.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudasim/support.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/data/cuda_include.cu +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/data/error.cu +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/data/jitlink.cu +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/data/warn.cu +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/__init__.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/test_random.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/nocuda/test_import.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/types.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/ufuncs.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/vector_types.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/vectorizers.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda.egg-info/dependency_links.txt +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda.egg-info/requires.txt +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda.egg-info/top_level.txt +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/setup.cfg +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/setup.py +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/site-packages/_numba_cuda_redirector.pth +0 -0
- {numba_cuda-0.0.16 → numba_cuda-0.0.18}/site-packages/_numba_cuda_redirector.py +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
0.0.18
|
@@ -59,8 +59,15 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
59
59
|
get_cufunc), which may be of different compute capabilities.
|
60
60
|
"""
|
61
61
|
|
62
|
-
def __init__(
|
63
|
-
|
62
|
+
def __init__(
|
63
|
+
self,
|
64
|
+
codegen,
|
65
|
+
name,
|
66
|
+
entry_name=None,
|
67
|
+
max_registers=None,
|
68
|
+
lto=False,
|
69
|
+
nvvm_options=None
|
70
|
+
):
|
64
71
|
"""
|
65
72
|
codegen:
|
66
73
|
Codegen object.
|
@@ -71,6 +78,8 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
71
78
|
kernel and not a device function.
|
72
79
|
max_registers:
|
73
80
|
The maximum register usage to aim for when linking.
|
81
|
+
lto:
|
82
|
+
Whether to enable link-time optimization.
|
74
83
|
nvvm_options:
|
75
84
|
Dict of options to pass to NVVM.
|
76
85
|
"""
|
@@ -103,6 +112,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
103
112
|
self._cufunc_cache = {}
|
104
113
|
|
105
114
|
self._max_registers = max_registers
|
115
|
+
self._lto = lto
|
106
116
|
if nvvm_options is None:
|
107
117
|
nvvm_options = {}
|
108
118
|
self._nvvm_options = nvvm_options
|
@@ -178,7 +188,9 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
178
188
|
if cubin:
|
179
189
|
return cubin
|
180
190
|
|
181
|
-
linker = driver.Linker.new(
|
191
|
+
linker = driver.Linker.new(
|
192
|
+
max_registers=self._max_registers, cc=cc, lto=self._lto
|
193
|
+
)
|
182
194
|
|
183
195
|
if linker.lto:
|
184
196
|
ltoir = self.get_ltoir(cc=cc)
|
@@ -10,7 +10,6 @@ subsequent deallocation could further corrupt the CUDA context and causes the
|
|
10
10
|
system to freeze in some cases.
|
11
11
|
|
12
12
|
"""
|
13
|
-
|
14
13
|
import sys
|
15
14
|
import os
|
16
15
|
import ctypes
|
@@ -19,6 +18,7 @@ import functools
|
|
19
18
|
import warnings
|
20
19
|
import logging
|
21
20
|
import threading
|
21
|
+
import traceback
|
22
22
|
import asyncio
|
23
23
|
import pathlib
|
24
24
|
from itertools import product
|
@@ -35,6 +35,8 @@ from numba.core import utils, serialize, config
|
|
35
35
|
from .error import CudaSupportError, CudaDriverError
|
36
36
|
from .drvapi import API_PROTOTYPES
|
37
37
|
from .drvapi import cu_occupancy_b2d_size, cu_stream_callback_pyobj, cu_uuid
|
38
|
+
from .mappings import FILE_EXTENSION_MAP
|
39
|
+
from .linkable_code import LinkableCode
|
38
40
|
from numba.cuda.cudadrv import enums, drvapi, nvrtc
|
39
41
|
|
40
42
|
USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
|
@@ -56,6 +58,52 @@ _py_decref.argtypes = [ctypes.py_object]
|
|
56
58
|
_py_incref.argtypes = [ctypes.py_object]
|
57
59
|
|
58
60
|
|
61
|
+
def _readenv(name, ctor, default):
|
62
|
+
value = os.environ.get(name)
|
63
|
+
if value is None:
|
64
|
+
return default() if callable(default) else default
|
65
|
+
try:
|
66
|
+
if ctor is bool:
|
67
|
+
return value.lower() in {'1', "true"}
|
68
|
+
return ctor(value)
|
69
|
+
except Exception:
|
70
|
+
warnings.warn(
|
71
|
+
f"Environment variable '{name}' is defined but its associated "
|
72
|
+
f"value '{value}' could not be parsed.\n"
|
73
|
+
"The parse failed with exception:\n"
|
74
|
+
f"{traceback.format_exc()}",
|
75
|
+
RuntimeWarning
|
76
|
+
)
|
77
|
+
return default
|
78
|
+
|
79
|
+
|
80
|
+
_MVC_ERROR_MESSAGE = (
|
81
|
+
"Minor version compatibility requires ptxcompiler and cubinlinker packages "
|
82
|
+
"to be available"
|
83
|
+
)
|
84
|
+
|
85
|
+
ENABLE_PYNVJITLINK = (
|
86
|
+
_readenv("NUMBA_CUDA_ENABLE_PYNVJITLINK", bool, False)
|
87
|
+
or getattr(config, "CUDA_ENABLE_PYNVJITLINK", False)
|
88
|
+
)
|
89
|
+
if not hasattr(config, "CUDA_ENABLE_PYNVJITLINK"):
|
90
|
+
config.CUDA_ENABLE_PYNVJITLINK = ENABLE_PYNVJITLINK
|
91
|
+
|
92
|
+
if ENABLE_PYNVJITLINK:
|
93
|
+
try:
|
94
|
+
from pynvjitlink.api import NvJitLinker, NvJitLinkError
|
95
|
+
except ImportError:
|
96
|
+
raise ImportError(
|
97
|
+
"Using pynvjitlink requires the pynvjitlink package to be available"
|
98
|
+
)
|
99
|
+
|
100
|
+
if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
|
101
|
+
raise ValueError(
|
102
|
+
"Can't set CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY and "
|
103
|
+
"CUDA_ENABLE_PYNVJITLINK at the same time"
|
104
|
+
)
|
105
|
+
|
106
|
+
|
59
107
|
def make_logger():
|
60
108
|
logger = logging.getLogger(__name__)
|
61
109
|
# is logging configured?
|
@@ -432,7 +480,7 @@ class Driver(object):
|
|
432
480
|
|
433
481
|
def get_version(self):
|
434
482
|
"""
|
435
|
-
Returns the CUDA
|
483
|
+
Returns the CUDA Driver version as a tuple (major, minor).
|
436
484
|
"""
|
437
485
|
if USE_NV_BINDING:
|
438
486
|
version = driver.cuDriverGetVersion()
|
@@ -2546,38 +2594,47 @@ def launch_kernel(cufunc_handle,
|
|
2546
2594
|
extra)
|
2547
2595
|
|
2548
2596
|
|
2549
|
-
if USE_NV_BINDING:
|
2550
|
-
jitty = binding.CUjitInputType
|
2551
|
-
FILE_EXTENSION_MAP = {
|
2552
|
-
'o': jitty.CU_JIT_INPUT_OBJECT,
|
2553
|
-
'ptx': jitty.CU_JIT_INPUT_PTX,
|
2554
|
-
'a': jitty.CU_JIT_INPUT_LIBRARY,
|
2555
|
-
'lib': jitty.CU_JIT_INPUT_LIBRARY,
|
2556
|
-
'cubin': jitty.CU_JIT_INPUT_CUBIN,
|
2557
|
-
'fatbin': jitty.CU_JIT_INPUT_FATBINARY,
|
2558
|
-
}
|
2559
|
-
else:
|
2560
|
-
FILE_EXTENSION_MAP = {
|
2561
|
-
'o': enums.CU_JIT_INPUT_OBJECT,
|
2562
|
-
'ptx': enums.CU_JIT_INPUT_PTX,
|
2563
|
-
'a': enums.CU_JIT_INPUT_LIBRARY,
|
2564
|
-
'lib': enums.CU_JIT_INPUT_LIBRARY,
|
2565
|
-
'cubin': enums.CU_JIT_INPUT_CUBIN,
|
2566
|
-
'fatbin': enums.CU_JIT_INPUT_FATBINARY,
|
2567
|
-
}
|
2568
|
-
|
2569
|
-
|
2570
2597
|
class Linker(metaclass=ABCMeta):
|
2571
2598
|
"""Abstract base class for linkers"""
|
2572
2599
|
|
2573
2600
|
@classmethod
|
2574
|
-
def new(cls,
|
2575
|
-
|
2576
|
-
|
2577
|
-
|
2578
|
-
|
2601
|
+
def new(cls,
|
2602
|
+
max_registers=0,
|
2603
|
+
lineinfo=False,
|
2604
|
+
cc=None,
|
2605
|
+
lto=None,
|
2606
|
+
additional_flags=None
|
2607
|
+
):
|
2608
|
+
|
2609
|
+
driver_ver = driver.get_version()
|
2610
|
+
if (
|
2611
|
+
config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY
|
2612
|
+
and driver_ver >= (12, 0)
|
2613
|
+
):
|
2614
|
+
raise ValueError(
|
2615
|
+
"Use CUDA_ENABLE_PYNVJITLINK for CUDA >= 12.0 MVC"
|
2616
|
+
)
|
2617
|
+
if config.CUDA_ENABLE_PYNVJITLINK and driver_ver < (12, 0):
|
2618
|
+
raise ValueError(
|
2619
|
+
"Enabling pynvjitlink requires CUDA 12."
|
2620
|
+
)
|
2621
|
+
if config.CUDA_ENABLE_PYNVJITLINK:
|
2622
|
+
linker = PyNvJitLinker
|
2623
|
+
|
2624
|
+
elif config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
|
2625
|
+
linker = MVCLinker
|
2626
|
+
else:
|
2627
|
+
if USE_NV_BINDING:
|
2628
|
+
linker = CudaPythonLinker
|
2629
|
+
else:
|
2630
|
+
linker = CtypesLinker
|
2631
|
+
|
2632
|
+
if linker is PyNvJitLinker:
|
2633
|
+
return linker(max_registers, lineinfo, cc, lto, additional_flags)
|
2634
|
+
elif additional_flags or lto:
|
2635
|
+
raise ValueError("LTO and additional flags require PyNvJitLinker")
|
2579
2636
|
else:
|
2580
|
-
return
|
2637
|
+
return linker(max_registers, lineinfo, cc)
|
2581
2638
|
|
2582
2639
|
@abstractmethod
|
2583
2640
|
def __init__(self, max_registers, lineinfo, cc):
|
@@ -2626,19 +2683,42 @@ class Linker(metaclass=ABCMeta):
|
|
2626
2683
|
cu = f.read()
|
2627
2684
|
self.add_cu(cu, os.path.basename(path))
|
2628
2685
|
|
2629
|
-
def add_file_guess_ext(self,
|
2630
|
-
"""
|
2631
|
-
|
2632
|
-
|
2633
|
-
|
2634
|
-
|
2635
|
-
|
2686
|
+
def add_file_guess_ext(self, path_or_code):
|
2687
|
+
"""
|
2688
|
+
Add a file or LinkableCode object to the link. If a file is
|
2689
|
+
passed, the type will be inferred from the extension. A LinkableCode
|
2690
|
+
object represents a file already in memory.
|
2691
|
+
"""
|
2692
|
+
if isinstance(path_or_code, str):
|
2693
|
+
ext = pathlib.Path(path_or_code).suffix
|
2694
|
+
if ext == '':
|
2695
|
+
raise RuntimeError(
|
2696
|
+
"Don't know how to link file with no extension"
|
2697
|
+
)
|
2698
|
+
elif ext == '.cu':
|
2699
|
+
self.add_cu_file(path_or_code)
|
2700
|
+
else:
|
2701
|
+
kind = FILE_EXTENSION_MAP.get(ext.lstrip('.'), None)
|
2702
|
+
if kind is None:
|
2703
|
+
raise RuntimeError(
|
2704
|
+
"Don't know how to link file with extension "
|
2705
|
+
f"{ext}"
|
2706
|
+
)
|
2707
|
+
self.add_file(path_or_code, kind)
|
2708
|
+
return
|
2636
2709
|
else:
|
2637
|
-
|
2638
|
-
if
|
2639
|
-
raise
|
2640
|
-
|
2641
|
-
|
2710
|
+
# Otherwise, we should have been given a LinkableCode object
|
2711
|
+
if not isinstance(path_or_code, LinkableCode):
|
2712
|
+
raise TypeError(
|
2713
|
+
"Expected path to file or a LinkableCode object"
|
2714
|
+
)
|
2715
|
+
|
2716
|
+
if path_or_code.kind == "cu":
|
2717
|
+
self.add_cu(path_or_code.data, path_or_code.name)
|
2718
|
+
else:
|
2719
|
+
self.add_data(
|
2720
|
+
path_or_code.data, path_or_code.kind, path_or_code.name
|
2721
|
+
)
|
2642
2722
|
|
2643
2723
|
@abstractmethod
|
2644
2724
|
def complete(self):
|
@@ -2649,12 +2729,6 @@ class Linker(metaclass=ABCMeta):
|
|
2649
2729
|
"""
|
2650
2730
|
|
2651
2731
|
|
2652
|
-
_MVC_ERROR_MESSAGE = (
|
2653
|
-
"Minor version compatibility requires ptxcompiler and cubinlinker packages "
|
2654
|
-
"to be available"
|
2655
|
-
)
|
2656
|
-
|
2657
|
-
|
2658
2732
|
class MVCLinker(Linker):
|
2659
2733
|
"""
|
2660
2734
|
Linker supporting Minor Version Compatibility, backed by the cubinlinker
|
@@ -2930,6 +3004,94 @@ class CudaPythonLinker(Linker):
|
|
2930
3004
|
return bytes(np.ctypeslib.as_array(cubin_ptr, shape=(size,)))
|
2931
3005
|
|
2932
3006
|
|
3007
|
+
class PyNvJitLinker(Linker):
|
3008
|
+
def __init__(
|
3009
|
+
self,
|
3010
|
+
max_registers=None,
|
3011
|
+
lineinfo=False,
|
3012
|
+
cc=None,
|
3013
|
+
lto=False,
|
3014
|
+
additional_flags=None,
|
3015
|
+
):
|
3016
|
+
|
3017
|
+
if cc is None:
|
3018
|
+
raise RuntimeError("PyNvJitLinker requires CC to be specified")
|
3019
|
+
if not any(isinstance(cc, t) for t in [list, tuple]):
|
3020
|
+
raise TypeError("`cc` must be a list or tuple of length 2")
|
3021
|
+
|
3022
|
+
sm_ver = f"{cc[0] * 10 + cc[1]}"
|
3023
|
+
arch = f"-arch=sm_{sm_ver}"
|
3024
|
+
options = [arch]
|
3025
|
+
if max_registers:
|
3026
|
+
options.append(f"-maxrregcount={max_registers}")
|
3027
|
+
if lineinfo:
|
3028
|
+
options.append("-lineinfo")
|
3029
|
+
if lto:
|
3030
|
+
options.append("-lto")
|
3031
|
+
if additional_flags is not None:
|
3032
|
+
options.extend(additional_flags)
|
3033
|
+
|
3034
|
+
self._linker = NvJitLinker(*options)
|
3035
|
+
self.lto = lto
|
3036
|
+
self.options = options
|
3037
|
+
|
3038
|
+
@property
|
3039
|
+
def info_log(self):
|
3040
|
+
return self._linker.info_log
|
3041
|
+
|
3042
|
+
@property
|
3043
|
+
def error_log(self):
|
3044
|
+
return self._linker.error_log
|
3045
|
+
|
3046
|
+
def add_ptx(self, ptx, name="<cudapy-ptx>"):
|
3047
|
+
self._linker.add_ptx(ptx, name)
|
3048
|
+
|
3049
|
+
def add_fatbin(self, fatbin, name="<external-fatbin>"):
|
3050
|
+
self._linker.add_fatbin(fatbin, name)
|
3051
|
+
|
3052
|
+
def add_ltoir(self, ltoir, name="<external-ltoir>"):
|
3053
|
+
self._linker.add_ltoir(ltoir, name)
|
3054
|
+
|
3055
|
+
def add_object(self, obj, name="<external-object>"):
|
3056
|
+
self._linker.add_object(obj, name)
|
3057
|
+
|
3058
|
+
def add_file(self, path, kind):
|
3059
|
+
try:
|
3060
|
+
with open(path, "rb") as f:
|
3061
|
+
data = f.read()
|
3062
|
+
except FileNotFoundError:
|
3063
|
+
raise LinkerError(f"{path} not found")
|
3064
|
+
|
3065
|
+
name = pathlib.Path(path).name
|
3066
|
+
self.add_data(data, kind, name)
|
3067
|
+
|
3068
|
+
def add_data(self, data, kind, name):
|
3069
|
+
if kind == FILE_EXTENSION_MAP["cubin"]:
|
3070
|
+
fn = self._linker.add_cubin
|
3071
|
+
elif kind == FILE_EXTENSION_MAP["fatbin"]:
|
3072
|
+
fn = self._linker.add_fatbin
|
3073
|
+
elif kind == FILE_EXTENSION_MAP["a"]:
|
3074
|
+
fn = self._linker.add_library
|
3075
|
+
elif kind == FILE_EXTENSION_MAP["ptx"]:
|
3076
|
+
return self.add_ptx(data, name)
|
3077
|
+
elif kind == FILE_EXTENSION_MAP["o"]:
|
3078
|
+
fn = self._linker.add_object
|
3079
|
+
elif kind == FILE_EXTENSION_MAP["ltoir"]:
|
3080
|
+
fn = self._linker.add_ltoir
|
3081
|
+
else:
|
3082
|
+
raise LinkerError(f"Don't know how to link {kind}")
|
3083
|
+
|
3084
|
+
try:
|
3085
|
+
fn(data, name)
|
3086
|
+
except NvJitLinkError as e:
|
3087
|
+
raise LinkerError from e
|
3088
|
+
|
3089
|
+
def complete(self):
|
3090
|
+
try:
|
3091
|
+
return self._linker.get_linked_cubin()
|
3092
|
+
except NvJitLinkError as e:
|
3093
|
+
raise LinkerError from e
|
3094
|
+
|
2933
3095
|
# -----------------------------------------------------------------------------
|
2934
3096
|
|
2935
3097
|
|
@@ -0,0 +1,63 @@
|
|
1
|
+
from .mappings import FILE_EXTENSION_MAP
|
2
|
+
|
3
|
+
|
4
|
+
class LinkableCode:
|
5
|
+
"""An object that can be passed in the `link` list argument to `@cuda.jit`
|
6
|
+
kernels to supply code to be linked from memory."""
|
7
|
+
|
8
|
+
def __init__(self, data, name=None):
|
9
|
+
self.data = data
|
10
|
+
self._name = name
|
11
|
+
|
12
|
+
@property
|
13
|
+
def name(self):
|
14
|
+
return self._name or self.default_name
|
15
|
+
|
16
|
+
|
17
|
+
class PTXSource(LinkableCode):
|
18
|
+
"""PTX Source code in memory"""
|
19
|
+
|
20
|
+
kind = FILE_EXTENSION_MAP["ptx"]
|
21
|
+
default_name = "<unnamed-ptx>"
|
22
|
+
|
23
|
+
|
24
|
+
class CUSource(LinkableCode):
|
25
|
+
"""CUDA C/C++ Source code in memory"""
|
26
|
+
|
27
|
+
kind = "cu"
|
28
|
+
default_name = "<unnamed-cu>"
|
29
|
+
|
30
|
+
|
31
|
+
class Fatbin(LinkableCode):
|
32
|
+
"""A fatbin ELF in memory"""
|
33
|
+
|
34
|
+
kind = FILE_EXTENSION_MAP["fatbin"]
|
35
|
+
default_name = "<unnamed-fatbin>"
|
36
|
+
|
37
|
+
|
38
|
+
class Cubin(LinkableCode):
|
39
|
+
"""A cubin ELF in memory"""
|
40
|
+
|
41
|
+
kind = FILE_EXTENSION_MAP["cubin"]
|
42
|
+
default_name = "<unnamed-cubin>"
|
43
|
+
|
44
|
+
|
45
|
+
class Archive(LinkableCode):
|
46
|
+
"""An archive of objects in memory"""
|
47
|
+
|
48
|
+
kind = FILE_EXTENSION_MAP["a"]
|
49
|
+
default_name = "<unnamed-archive>"
|
50
|
+
|
51
|
+
|
52
|
+
class Object(LinkableCode):
|
53
|
+
"""An object file in memory"""
|
54
|
+
|
55
|
+
kind = FILE_EXTENSION_MAP["o"]
|
56
|
+
default_name = "<unnamed-object>"
|
57
|
+
|
58
|
+
|
59
|
+
class LTOIR(LinkableCode):
|
60
|
+
"""An LTOIR file in memory"""
|
61
|
+
|
62
|
+
kind = "ltoir"
|
63
|
+
default_name = "<unnamed-ltoir>"
|
@@ -0,0 +1,24 @@
|
|
1
|
+
from numba import config
|
2
|
+
from . import enums
|
3
|
+
if config.CUDA_USE_NVIDIA_BINDING:
|
4
|
+
from cuda import cuda
|
5
|
+
jitty = cuda.CUjitInputType
|
6
|
+
FILE_EXTENSION_MAP = {
|
7
|
+
'o': jitty.CU_JIT_INPUT_OBJECT,
|
8
|
+
'ptx': jitty.CU_JIT_INPUT_PTX,
|
9
|
+
'a': jitty.CU_JIT_INPUT_LIBRARY,
|
10
|
+
'lib': jitty.CU_JIT_INPUT_LIBRARY,
|
11
|
+
'cubin': jitty.CU_JIT_INPUT_CUBIN,
|
12
|
+
'fatbin': jitty.CU_JIT_INPUT_FATBINARY,
|
13
|
+
'ltoir': jitty.CU_JIT_INPUT_NVVM,
|
14
|
+
}
|
15
|
+
else:
|
16
|
+
FILE_EXTENSION_MAP = {
|
17
|
+
'o': enums.CU_JIT_INPUT_OBJECT,
|
18
|
+
'ptx': enums.CU_JIT_INPUT_PTX,
|
19
|
+
'a': enums.CU_JIT_INPUT_LIBRARY,
|
20
|
+
'lib': enums.CU_JIT_INPUT_LIBRARY,
|
21
|
+
'cubin': enums.CU_JIT_INPUT_CUBIN,
|
22
|
+
'fatbin': enums.CU_JIT_INPUT_FATBINARY,
|
23
|
+
'ltoir': enums.CU_JIT_INPUT_NVVM,
|
24
|
+
}
|
@@ -31,6 +31,9 @@ from .intrinsic_wrapper import (all_sync, any_sync, eq_sync, ballot_sync,
|
|
31
31
|
shfl_xor_sync)
|
32
32
|
|
33
33
|
from .kernels import reduction
|
34
|
+
from numba.cuda.cudadrv.linkable_code import (
|
35
|
+
Archive, CUSource, Cubin, Fatbin, LinkableCode, LTOIR, Object, PTXSource
|
36
|
+
)
|
34
37
|
|
35
38
|
reduce = Reduce = reduction.Reduce
|
36
39
|
|
@@ -46,7 +46,7 @@ class _Kernel(serialize.ReduceMixin):
|
|
46
46
|
@global_compiler_lock
|
47
47
|
def __init__(self, py_func, argtypes, link=None, debug=False,
|
48
48
|
lineinfo=False, inline=False, fastmath=False, extensions=None,
|
49
|
-
max_registers=None, opt=True, device=False):
|
49
|
+
max_registers=None, lto=False, opt=True, device=False):
|
50
50
|
|
51
51
|
if device:
|
52
52
|
raise RuntimeError('Cannot compile a device function as a kernel')
|
@@ -94,7 +94,7 @@ class _Kernel(serialize.ReduceMixin):
|
|
94
94
|
lib, kernel = tgt_ctx.prepare_cuda_kernel(cres.library, cres.fndesc,
|
95
95
|
debug, lineinfo, nvvm_options,
|
96
96
|
filename, linenum,
|
97
|
-
max_registers)
|
97
|
+
max_registers, lto)
|
98
98
|
|
99
99
|
if not link:
|
100
100
|
link = []
|
@@ -4,7 +4,7 @@ from numba import cuda, types
|
|
4
4
|
from numba.core import cgutils
|
5
5
|
from numba.core.errors import RequireLiteralValue
|
6
6
|
from numba.core.typing import signature
|
7
|
-
from numba.core.extending import overload_attribute
|
7
|
+
from numba.core.extending import overload_attribute, overload_method
|
8
8
|
from numba.cuda import nvvmutils
|
9
9
|
from numba.cuda.extending import intrinsic
|
10
10
|
|
@@ -196,3 +196,8 @@ def syncthreads_or(typingctx, predicate):
|
|
196
196
|
'''
|
197
197
|
fname = 'llvm.nvvm.barrier0.or'
|
198
198
|
return _syncthreads_predicate(typingctx, predicate, fname)
|
199
|
+
|
200
|
+
|
201
|
+
@overload_method(types.Integer, 'bit_count', target='cuda')
|
202
|
+
def integer_bit_count(i):
|
203
|
+
return lambda i: cuda.popc(i)
|
@@ -63,6 +63,17 @@ def dim3_print_impl(ty, context, builder, val):
|
|
63
63
|
return rawfmt, [x, y, z]
|
64
64
|
|
65
65
|
|
66
|
+
@print_item.register(types.Boolean)
|
67
|
+
def bool_print_impl(ty, context, builder, val):
|
68
|
+
true_string = context.insert_string_const_addrspace(builder, "True")
|
69
|
+
false_string = context.insert_string_const_addrspace(builder, "False")
|
70
|
+
res_ptr = cgutils.alloca_once_value(builder, false_string)
|
71
|
+
with builder.if_then(val):
|
72
|
+
builder.store(true_string, res_ptr)
|
73
|
+
rawfmt = "%s"
|
74
|
+
return rawfmt, [builder.load(res_ptr)]
|
75
|
+
|
76
|
+
|
66
77
|
@lower(print, types.VarArg(types.Any))
|
67
78
|
def print_varargs(context, builder, sig, args):
|
68
79
|
"""This function is a generic 'print' wrapper for arbitrary types.
|
@@ -148,7 +148,7 @@ class CUDATargetContext(BaseContext):
|
|
148
148
|
|
149
149
|
def prepare_cuda_kernel(self, codelib, fndesc, debug, lineinfo,
|
150
150
|
nvvm_options, filename, linenum,
|
151
|
-
max_registers=None):
|
151
|
+
max_registers=None, lto=False):
|
152
152
|
"""
|
153
153
|
Adapt a code library ``codelib`` with the numba compiled CUDA kernel
|
154
154
|
with name ``fname`` and arguments ``argtypes`` for NVVM.
|
@@ -175,7 +175,9 @@ class CUDATargetContext(BaseContext):
|
|
175
175
|
library = self.codegen().create_library(f'{codelib.name}_kernel_',
|
176
176
|
entry_name=kernel_name,
|
177
177
|
nvvm_options=nvvm_options,
|
178
|
-
max_registers=max_registers
|
178
|
+
max_registers=max_registers,
|
179
|
+
lto=lto
|
180
|
+
)
|
179
181
|
library.add_linking_library(codelib)
|
180
182
|
wrapper = self.generate_kernel_wrapper(library, fndesc, kernel_name,
|
181
183
|
debug, lineinfo, filename,
|