numba-cuda 0.1.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/PKG-INFO +1 -1
- numba_cuda-0.3.0/numba_cuda/VERSION +1 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cudadecl.py +5 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cudadrv/driver.py +16 -14
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cudaimpl.py +8 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/dispatcher.py +32 -14
- numba_cuda-0.3.0/numba_cuda/numba/cuda/reshape_funcs.cu +151 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +76 -3
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_array.py +73 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda.egg-info/PKG-INFO +1 -1
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda.egg-info/SOURCES.txt +1 -0
- numba_cuda-0.1.0/numba_cuda/VERSION +0 -1
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/LICENSE +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/README.md +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/__init__.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/_version.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/__init__.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/api.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/api_util.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/args.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cg.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/codegen.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/compiler.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cuda_fp16.h +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cuda_fp16.hpp +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cuda_paths.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cudadrv/__init__.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cudadrv/devicearray.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cudadrv/devices.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cudadrv/drvapi.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cudadrv/dummyarray.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cudadrv/enums.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cudadrv/error.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cudadrv/libs.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cudadrv/linkable_code.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cudadrv/mappings.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cudadrv/ndarray.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cudadrv/nvrtc.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cudadrv/nvvm.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cudadrv/rtapi.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cudadrv/runtime.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/cudamath.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/decorators.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/descriptor.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/device_init.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/deviceufunc.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/errors.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/extending.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/initialize.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/intrinsics.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/kernels/__init__.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/kernels/reduction.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/kernels/transpose.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/libdevice.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/libdevicedecl.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/libdevicefuncs.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/libdeviceimpl.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/mathimpl.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/models.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/nvvmutils.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/printimpl.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/random.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/runtime/nrt.cu +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/simulator/__init__.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/simulator/api.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/simulator/compiler.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/simulator/cudadrv/devices.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/simulator/cudadrv/driver.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/simulator/cudadrv/error.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/simulator/cudadrv/libs.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/simulator/kernel.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/simulator/kernelapi.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/simulator/reduction.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/simulator/vector_types.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/simulator_init.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/stubs.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/target.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/testing.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/__init__.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_events.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_init.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_caching.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_casting.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_complex.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_debug.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_enums.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_errors.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_extending.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_forall.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_globals.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_lang.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_math.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_operator.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_overload.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_powi.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_print.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_random.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_sm.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_sync.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_warning.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudasim/__init__.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudasim/support.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/data/cuda_include.cu +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/data/error.cu +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/data/jitlink.cu +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/data/warn.cu +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/doc_examples/__init__.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/doc_examples/test_random.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/nocuda/test_import.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/nrt/mock_numpy.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/nrt/test_nrt.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/types.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/ufuncs.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/vector_types.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda/numba/cuda/vectorizers.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda.egg-info/dependency_links.txt +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda.egg-info/requires.txt +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/numba_cuda.egg-info/top_level.txt +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/pyproject.toml +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/setup.cfg +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/setup.py +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/site-packages/_numba_cuda_redirector.pth +0 -0
- {numba_cuda-0.1.0 → numba_cuda-0.3.0}/site-packages/_numba_cuda_redirector.py +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
0.3.0
|
@@ -508,6 +508,11 @@ class Cuda_atomic_cas(AbstractTemplate):
|
|
508
508
|
return signature(dty, ary, idx, dty, dty)
|
509
509
|
|
510
510
|
|
511
|
+
@register_global(breakpoint)
|
512
|
+
class Cuda_breakpoint(ConcreteTemplate):
|
513
|
+
cases = [signature(types.none)]
|
514
|
+
|
515
|
+
|
511
516
|
@register
|
512
517
|
class Cuda_nanosleep(ConcreteTemplate):
|
513
518
|
key = cuda.nanosleep
|
@@ -42,6 +42,11 @@ from .mappings import FILE_EXTENSION_MAP
|
|
42
42
|
from .linkable_code import LinkableCode, LTOIR, Fatbin, Object
|
43
43
|
from numba.cuda.cudadrv import enums, drvapi, nvrtc
|
44
44
|
|
45
|
+
try:
|
46
|
+
from pynvjitlink.api import NvJitLinker, NvJitLinkError
|
47
|
+
except ImportError:
|
48
|
+
NvJitLinker, NvJitLinkError = None, None
|
49
|
+
|
45
50
|
USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
|
46
51
|
|
47
52
|
if USE_NV_BINDING:
|
@@ -92,20 +97,6 @@ ENABLE_PYNVJITLINK = (
|
|
92
97
|
if not hasattr(config, "CUDA_ENABLE_PYNVJITLINK"):
|
93
98
|
config.CUDA_ENABLE_PYNVJITLINK = ENABLE_PYNVJITLINK
|
94
99
|
|
95
|
-
if ENABLE_PYNVJITLINK:
|
96
|
-
try:
|
97
|
-
from pynvjitlink.api import NvJitLinker, NvJitLinkError
|
98
|
-
except ImportError:
|
99
|
-
raise ImportError(
|
100
|
-
"Using pynvjitlink requires the pynvjitlink package to be available"
|
101
|
-
)
|
102
|
-
|
103
|
-
if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
|
104
|
-
raise ValueError(
|
105
|
-
"Can't set CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY and "
|
106
|
-
"CUDA_ENABLE_PYNVJITLINK at the same time"
|
107
|
-
)
|
108
|
-
|
109
100
|
|
110
101
|
def make_logger():
|
111
102
|
logger = logging.getLogger(__name__)
|
@@ -3061,6 +3052,17 @@ class PyNvJitLinker(Linker):
|
|
3061
3052
|
lto=False,
|
3062
3053
|
additional_flags=None,
|
3063
3054
|
):
|
3055
|
+
if NvJitLinker is None:
|
3056
|
+
raise ImportError(
|
3057
|
+
"Using pynvjitlink requires the pynvjitlink package to be "
|
3058
|
+
"available"
|
3059
|
+
)
|
3060
|
+
|
3061
|
+
if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
|
3062
|
+
raise ValueError(
|
3063
|
+
"Can't set CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY and "
|
3064
|
+
"CUDA_ENABLE_PYNVJITLINK at the same time"
|
3065
|
+
)
|
3064
3066
|
|
3065
3067
|
if cc is None:
|
3066
3068
|
raise RuntimeError("PyNvJitLinker requires CC to be specified")
|
@@ -934,6 +934,14 @@ def ptx_atomic_cas(context, builder, sig, args):
|
|
934
934
|
|
935
935
|
# -----------------------------------------------------------------------------
|
936
936
|
|
937
|
+
|
938
|
+
@lower(breakpoint)
|
939
|
+
def ptx_brkpt(context, builder, sig, args):
|
940
|
+
brkpt = ir.InlineAsm(ir.FunctionType(ir.VoidType(), []),
|
941
|
+
"brkpt;", '', side_effect=True)
|
942
|
+
builder.call(brkpt, ())
|
943
|
+
|
944
|
+
|
937
945
|
@lower(stubs.nanosleep, types.uint32)
|
938
946
|
def ptx_nanosleep(context, builder, sig, args):
|
939
947
|
nanosleep = ir.InlineAsm(ir.FunctionType(ir.VoidType(), [ir.IntType(32)]),
|
@@ -37,6 +37,8 @@ cuda_fp16_math_funcs = ['hsin', 'hcos',
|
|
37
37
|
'hrcp', 'hrint',
|
38
38
|
'htrunc', 'hdiv']
|
39
39
|
|
40
|
+
reshape_funcs = ['nocopy_empty_reshape', 'numba_attempt_nocopy_reshape']
|
41
|
+
|
40
42
|
|
41
43
|
class _Kernel(serialize.ReduceMixin):
|
42
44
|
'''
|
@@ -117,25 +119,43 @@ class _Kernel(serialize.ReduceMixin):
|
|
117
119
|
if not link:
|
118
120
|
link = []
|
119
121
|
|
122
|
+
asm = lib.get_asm_str()
|
123
|
+
|
120
124
|
# A kernel needs cooperative launch if grid_sync is being used.
|
121
|
-
self.cooperative = 'cudaCGGetIntrinsicHandle' in
|
125
|
+
self.cooperative = 'cudaCGGetIntrinsicHandle' in asm
|
122
126
|
# We need to link against cudadevrt if grid sync is being used.
|
123
127
|
if self.cooperative:
|
124
128
|
lib.needs_cudadevrt = True
|
125
129
|
|
126
|
-
|
127
|
-
|
130
|
+
def link_to_library_functions(library_functions, library_path,
|
131
|
+
prefix=None):
|
132
|
+
"""
|
133
|
+
Dynamically links to library functions by searching for their names
|
134
|
+
in the specified library and linking to the corresponding source
|
135
|
+
file.
|
136
|
+
"""
|
137
|
+
if prefix is not None:
|
138
|
+
library_functions = [f"{prefix}{fn}" for fn in
|
139
|
+
library_functions]
|
128
140
|
|
129
|
-
|
130
|
-
|
141
|
+
found_functions = [fn for fn in library_functions
|
142
|
+
if f'{fn}' in asm]
|
131
143
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
link.append(functions_cu_path)
|
144
|
+
if found_functions:
|
145
|
+
basedir = os.path.dirname(os.path.abspath(__file__))
|
146
|
+
source_file_path = os.path.join(basedir, library_path)
|
147
|
+
link.append(source_file_path)
|
137
148
|
|
138
|
-
|
149
|
+
return found_functions
|
150
|
+
|
151
|
+
# Link to the helper library functions if needed
|
152
|
+
link_to_library_functions(reshape_funcs, 'reshape_funcs.cu')
|
153
|
+
# Link to the CUDA FP16 math library functions if needed
|
154
|
+
link_to_library_functions(cuda_fp16_math_funcs,
|
155
|
+
'cpp_function_wrappers.cu',
|
156
|
+
'__numba_wrapper_')
|
157
|
+
|
158
|
+
self.maybe_link_nrt(link, tgt_ctx, asm)
|
139
159
|
|
140
160
|
for filepath in link:
|
141
161
|
lib.add_linking_file(filepath)
|
@@ -160,7 +180,7 @@ class _Kernel(serialize.ReduceMixin):
|
|
160
180
|
|
161
181
|
def maybe_link_nrt(self, link, tgt_ctx, asm):
|
162
182
|
if not tgt_ctx.enable_nrt:
|
163
|
-
return
|
183
|
+
return
|
164
184
|
|
165
185
|
all_nrt = "|".join(self.NRT_functions)
|
166
186
|
pattern = (
|
@@ -175,8 +195,6 @@ class _Kernel(serialize.ReduceMixin):
|
|
175
195
|
nrt_path = os.path.join(basedir, 'runtime', 'nrt.cu')
|
176
196
|
link.append(nrt_path)
|
177
197
|
|
178
|
-
return link
|
179
|
-
|
180
198
|
@property
|
181
199
|
def library(self):
|
182
200
|
return self._codelibrary
|
@@ -0,0 +1,151 @@
|
|
1
|
+
/*
|
2
|
+
* Handle reshaping of zero-sized array.
|
3
|
+
* See numba_attempt_nocopy_reshape() below.
|
4
|
+
*/
|
5
|
+
#define NPY_MAXDIMS 32
|
6
|
+
|
7
|
+
typedef long long int npy_intp;
|
8
|
+
|
9
|
+
extern "C" __device__ int
|
10
|
+
nocopy_empty_reshape(npy_intp nd, const npy_intp *dims, const npy_intp *strides,
|
11
|
+
npy_intp newnd, const npy_intp *newdims,
|
12
|
+
npy_intp *newstrides, npy_intp itemsize,
|
13
|
+
int is_f_order)
|
14
|
+
{
|
15
|
+
int i;
|
16
|
+
/* Just make the strides vaguely reasonable
|
17
|
+
* (they can have any value in theory).
|
18
|
+
*/
|
19
|
+
for (i = 0; i < newnd; i++)
|
20
|
+
newstrides[i] = itemsize;
|
21
|
+
return 1; /* reshape successful */
|
22
|
+
}
|
23
|
+
|
24
|
+
/*
|
25
|
+
* Straight from Numpy's _attempt_nocopy_reshape()
|
26
|
+
* (np/core/src/multiarray/shape.c).
|
27
|
+
* Attempt to reshape an array without copying data
|
28
|
+
*
|
29
|
+
* This function should correctly handle all reshapes, including
|
30
|
+
* axes of length 1. Zero strides should work but are untested.
|
31
|
+
*
|
32
|
+
* If a copy is needed, returns 0
|
33
|
+
* If no copy is needed, returns 1 and fills `npy_intp *newstrides`
|
34
|
+
* with appropriate strides
|
35
|
+
*/
|
36
|
+
extern "C" __device__ int
|
37
|
+
numba_attempt_nocopy_reshape(npy_intp nd, const npy_intp *dims, const npy_intp *strides,
|
38
|
+
npy_intp newnd, const npy_intp *newdims,
|
39
|
+
npy_intp *newstrides, npy_intp itemsize,
|
40
|
+
int is_f_order)
|
41
|
+
{
|
42
|
+
int oldnd;
|
43
|
+
npy_intp olddims[NPY_MAXDIMS];
|
44
|
+
npy_intp oldstrides[NPY_MAXDIMS];
|
45
|
+
npy_intp np, op, last_stride;
|
46
|
+
int oi, oj, ok, ni, nj, nk;
|
47
|
+
|
48
|
+
oldnd = 0;
|
49
|
+
/*
|
50
|
+
* Remove axes with dimension 1 from the old array. They have no effect
|
51
|
+
* but would need special cases since their strides do not matter.
|
52
|
+
*/
|
53
|
+
for (oi = 0; oi < nd; oi++) {
|
54
|
+
if (dims[oi]!= 1) {
|
55
|
+
olddims[oldnd] = dims[oi];
|
56
|
+
oldstrides[oldnd] = strides[oi];
|
57
|
+
oldnd++;
|
58
|
+
}
|
59
|
+
}
|
60
|
+
|
61
|
+
np = 1;
|
62
|
+
for (ni = 0; ni < newnd; ni++) {
|
63
|
+
np *= newdims[ni];
|
64
|
+
}
|
65
|
+
op = 1;
|
66
|
+
for (oi = 0; oi < oldnd; oi++) {
|
67
|
+
op *= olddims[oi];
|
68
|
+
}
|
69
|
+
if (np != op) {
|
70
|
+
/* different total sizes; no hope */
|
71
|
+
return 0;
|
72
|
+
}
|
73
|
+
|
74
|
+
if (np == 0) {
|
75
|
+
/* the Numpy code does not handle 0-sized arrays */
|
76
|
+
return nocopy_empty_reshape(nd, dims, strides,
|
77
|
+
newnd, newdims, newstrides,
|
78
|
+
itemsize, is_f_order);
|
79
|
+
}
|
80
|
+
|
81
|
+
/* oi to oj and ni to nj give the axis ranges currently worked with */
|
82
|
+
oi = 0;
|
83
|
+
oj = 1;
|
84
|
+
ni = 0;
|
85
|
+
nj = 1;
|
86
|
+
while (ni < newnd && oi < oldnd) {
|
87
|
+
np = newdims[ni];
|
88
|
+
op = olddims[oi];
|
89
|
+
|
90
|
+
while (np != op) {
|
91
|
+
if (np < op) {
|
92
|
+
/* Misses trailing 1s, these are handled later */
|
93
|
+
np *= newdims[nj++];
|
94
|
+
} else {
|
95
|
+
op *= olddims[oj++];
|
96
|
+
}
|
97
|
+
}
|
98
|
+
|
99
|
+
/* Check whether the original axes can be combined */
|
100
|
+
for (ok = oi; ok < oj - 1; ok++) {
|
101
|
+
if (is_f_order) {
|
102
|
+
if (oldstrides[ok+1] != olddims[ok]*oldstrides[ok]) {
|
103
|
+
/* not contiguous enough */
|
104
|
+
return 0;
|
105
|
+
}
|
106
|
+
}
|
107
|
+
else {
|
108
|
+
/* C order */
|
109
|
+
if (oldstrides[ok] != olddims[ok+1]*oldstrides[ok+1]) {
|
110
|
+
/* not contiguous enough */
|
111
|
+
return 0;
|
112
|
+
}
|
113
|
+
}
|
114
|
+
}
|
115
|
+
|
116
|
+
/* Calculate new strides for all axes currently worked with */
|
117
|
+
if (is_f_order) {
|
118
|
+
newstrides[ni] = oldstrides[oi];
|
119
|
+
for (nk = ni + 1; nk < nj; nk++) {
|
120
|
+
newstrides[nk] = newstrides[nk - 1]*newdims[nk - 1];
|
121
|
+
}
|
122
|
+
}
|
123
|
+
else {
|
124
|
+
/* C order */
|
125
|
+
newstrides[nj - 1] = oldstrides[oj - 1];
|
126
|
+
for (nk = nj - 1; nk > ni; nk--) {
|
127
|
+
newstrides[nk - 1] = newstrides[nk]*newdims[nk];
|
128
|
+
}
|
129
|
+
}
|
130
|
+
ni = nj++;
|
131
|
+
oi = oj++;
|
132
|
+
}
|
133
|
+
|
134
|
+
/*
|
135
|
+
* Set strides corresponding to trailing 1s of the new shape.
|
136
|
+
*/
|
137
|
+
if (ni >= 1) {
|
138
|
+
last_stride = newstrides[ni - 1];
|
139
|
+
}
|
140
|
+
else {
|
141
|
+
last_stride = itemsize;
|
142
|
+
}
|
143
|
+
if (is_f_order) {
|
144
|
+
last_stride *= newdims[ni - 1];
|
145
|
+
}
|
146
|
+
for (nk = ni; nk < newnd; nk++) {
|
147
|
+
newstrides[nk] = last_stride;
|
148
|
+
}
|
149
|
+
|
150
|
+
return 1;
|
151
|
+
}
|
@@ -2,6 +2,18 @@ from numba.cuda.testing import unittest
|
|
2
2
|
from numba.cuda.testing import skip_on_cudasim
|
3
3
|
from numba.cuda.testing import CUDATestCase
|
4
4
|
from numba.cuda.cudadrv.driver import PyNvJitLinker
|
5
|
+
from numba.cuda import get_current_device
|
6
|
+
|
7
|
+
from numba import cuda
|
8
|
+
from numba import config
|
9
|
+
from numba.tests.support import run_in_subprocess, override_config
|
10
|
+
|
11
|
+
try:
|
12
|
+
import pynvjitlink # noqa: F401
|
13
|
+
PYNVJITLINK_INSTALLED = True
|
14
|
+
except ImportError:
|
15
|
+
PYNVJITLINK_INSTALLED = False
|
16
|
+
|
5
17
|
|
6
18
|
import itertools
|
7
19
|
import os
|
@@ -9,9 +21,6 @@ import io
|
|
9
21
|
import contextlib
|
10
22
|
import warnings
|
11
23
|
|
12
|
-
from numba.cuda import get_current_device
|
13
|
-
from numba import cuda
|
14
|
-
from numba import config
|
15
24
|
|
16
25
|
TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR")
|
17
26
|
if TEST_BIN_DIR:
|
@@ -251,5 +260,69 @@ class TestLinker(CUDATestCase):
|
|
251
260
|
pass
|
252
261
|
|
253
262
|
|
263
|
+
@unittest.skipIf(
|
264
|
+
not PYNVJITLINK_INSTALLED, reason="Pynvjitlink is not installed"
|
265
|
+
)
|
266
|
+
class TestLinkerUsage(CUDATestCase):
|
267
|
+
"""Test that whether pynvjitlink can be enabled by both environment variable
|
268
|
+
and modification of config at runtime.
|
269
|
+
"""
|
270
|
+
|
271
|
+
src = """if 1:
|
272
|
+
import os
|
273
|
+
from numba import cuda, config
|
274
|
+
|
275
|
+
{config}
|
276
|
+
|
277
|
+
TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR")
|
278
|
+
if TEST_BIN_DIR:
|
279
|
+
test_device_functions_cubin = os.path.join(
|
280
|
+
TEST_BIN_DIR, "test_device_functions.cubin"
|
281
|
+
)
|
282
|
+
|
283
|
+
sig = "uint32(uint32, uint32)"
|
284
|
+
add_from_numba = cuda.declare_device("add_from_numba", sig)
|
285
|
+
|
286
|
+
@cuda.jit(link=[test_device_functions_cubin], lto=True)
|
287
|
+
def kernel(result):
|
288
|
+
result[0] = add_from_numba(1, 2)
|
289
|
+
|
290
|
+
result = cuda.device_array(1)
|
291
|
+
kernel[1, 1](result)
|
292
|
+
assert result[0] == 3
|
293
|
+
"""
|
294
|
+
|
295
|
+
def test_linker_enabled_envvar(self):
|
296
|
+
env = os.environ.copy()
|
297
|
+
env['NUMBA_CUDA_ENABLE_PYNVJITLINK'] = "1"
|
298
|
+
run_in_subprocess(self.src.format(config=""), env=env)
|
299
|
+
|
300
|
+
def test_linker_disabled_envvar(self):
|
301
|
+
env = os.environ.copy()
|
302
|
+
env.pop('NUMBA_CUDA_ENABLE_PYNVJITLINK', None)
|
303
|
+
with self.assertRaisesRegex(
|
304
|
+
AssertionError, "LTO and additional flags require PyNvJitLinker"
|
305
|
+
):
|
306
|
+
# Actual error raised is `ValueError`, but `run_in_subprocess`
|
307
|
+
# reraises as AssertionError.
|
308
|
+
run_in_subprocess(self.src.format(config=""), env=env)
|
309
|
+
|
310
|
+
def test_linker_enabled_config(self):
|
311
|
+
env = os.environ.copy()
|
312
|
+
env.pop('NUMBA_CUDA_ENABLE_PYNVJITLINK', None)
|
313
|
+
run_in_subprocess(self.src.format(
|
314
|
+
config="config.CUDA_ENABLE_PYNVJITLINK = True"), env=env)
|
315
|
+
|
316
|
+
def test_linker_disabled_config(self):
|
317
|
+
env = os.environ.copy()
|
318
|
+
env.pop('NUMBA_CUDA_ENABLE_PYNVJITLINK', None)
|
319
|
+
with override_config("CUDA_ENABLE_PYNVJITLINK", False):
|
320
|
+
with self.assertRaisesRegex(
|
321
|
+
AssertionError, "LTO and additional flags require PyNvJitLinker"
|
322
|
+
):
|
323
|
+
run_in_subprocess(self.src.format(
|
324
|
+
config="config.CUDA_ENABLE_PYNVJITLINK = False"), env=env)
|
325
|
+
|
326
|
+
|
254
327
|
if __name__ == "__main__":
|
255
328
|
unittest.main()
|
@@ -12,6 +12,31 @@ else:
|
|
12
12
|
cuda.pinned_array_like)
|
13
13
|
|
14
14
|
|
15
|
+
def array_reshape1d(arr, newshape, got):
|
16
|
+
y = arr.reshape(newshape)
|
17
|
+
for i in range(y.shape[0]):
|
18
|
+
got[i] = y[i]
|
19
|
+
|
20
|
+
|
21
|
+
def array_reshape2d(arr, newshape, got):
|
22
|
+
y = arr.reshape(newshape)
|
23
|
+
for i in range(y.shape[0]):
|
24
|
+
for j in range(y.shape[1]):
|
25
|
+
got[i, j] = y[i, j]
|
26
|
+
|
27
|
+
|
28
|
+
def array_reshape3d(arr, newshape, got):
|
29
|
+
y = arr.reshape(newshape)
|
30
|
+
for i in range(y.shape[0]):
|
31
|
+
for j in range(y.shape[1]):
|
32
|
+
for k in range(y.shape[2]):
|
33
|
+
got[i, j, k] = y[i, j, k]
|
34
|
+
|
35
|
+
|
36
|
+
def array_reshape(arr, newshape):
|
37
|
+
return arr.reshape(newshape)
|
38
|
+
|
39
|
+
|
15
40
|
class TestCudaArray(CUDATestCase):
|
16
41
|
def test_gpu_array_zero_length(self):
|
17
42
|
x = np.arange(0)
|
@@ -255,6 +280,54 @@ class TestCudaArray(CUDATestCase):
|
|
255
280
|
|
256
281
|
self.assertEqual(1, len(func.overloads))
|
257
282
|
|
283
|
+
def test_array_reshape(self):
|
284
|
+
def check(pyfunc, kernelfunc, arr, shape):
|
285
|
+
kernel = cuda.jit(kernelfunc)
|
286
|
+
expected = pyfunc(arr, shape)
|
287
|
+
got = np.zeros(expected.shape, dtype=arr.dtype)
|
288
|
+
kernel[1, 1](arr, shape, got)
|
289
|
+
self.assertPreciseEqual(got, expected)
|
290
|
+
|
291
|
+
def check_only_shape(kernelfunc, arr, shape, expected_shape):
|
292
|
+
kernel = cuda.jit(kernelfunc)
|
293
|
+
got = np.zeros(expected_shape, dtype=arr.dtype)
|
294
|
+
kernel[1, 1](arr, shape, got)
|
295
|
+
self.assertEqual(got.shape, expected_shape)
|
296
|
+
self.assertEqual(got.size, arr.size)
|
297
|
+
|
298
|
+
# 0-sized arrays
|
299
|
+
def check_empty(arr):
|
300
|
+
check(array_reshape, array_reshape1d, arr, 0)
|
301
|
+
check(array_reshape, array_reshape1d, arr, (0,))
|
302
|
+
check(array_reshape, array_reshape3d, arr, (1, 0, 2))
|
303
|
+
check_only_shape(array_reshape2d, arr, (0, -1), (0, 0))
|
304
|
+
check_only_shape(array_reshape2d, arr, (4, -1), (4, 0))
|
305
|
+
check_only_shape(array_reshape3d, arr, (-1, 0, 4), (0, 0, 4))
|
306
|
+
|
307
|
+
# C-contiguous
|
308
|
+
arr = np.arange(24)
|
309
|
+
check(array_reshape, array_reshape1d, arr, (24,))
|
310
|
+
check(array_reshape, array_reshape2d, arr, (4, 6))
|
311
|
+
check(array_reshape, array_reshape2d, arr, (8, 3))
|
312
|
+
check(array_reshape, array_reshape3d, arr, (8, 1, 3))
|
313
|
+
|
314
|
+
arr = np.arange(24).reshape((1, 8, 1, 1, 3, 1))
|
315
|
+
check(array_reshape, array_reshape1d, arr, (24,))
|
316
|
+
check(array_reshape, array_reshape2d, arr, (4, 6))
|
317
|
+
check(array_reshape, array_reshape2d, arr, (8, 3))
|
318
|
+
check(array_reshape, array_reshape3d, arr, (8, 1, 3))
|
319
|
+
|
320
|
+
# Test negative shape value
|
321
|
+
arr = np.arange(25).reshape(5,5)
|
322
|
+
check(array_reshape, array_reshape1d, arr, -1)
|
323
|
+
check(array_reshape, array_reshape1d, arr, (-1,))
|
324
|
+
check(array_reshape, array_reshape2d, arr, (-1, 5))
|
325
|
+
check(array_reshape, array_reshape3d, arr, (5, -1, 5))
|
326
|
+
check(array_reshape, array_reshape3d, arr, (5, 5, -1))
|
327
|
+
|
328
|
+
arr = np.array([])
|
329
|
+
check_empty(arr)
|
330
|
+
|
258
331
|
|
259
332
|
if __name__ == '__main__':
|
260
333
|
unittest.main()
|
@@ -43,6 +43,7 @@ numba_cuda/numba/cuda/models.py
|
|
43
43
|
numba_cuda/numba/cuda/nvvmutils.py
|
44
44
|
numba_cuda/numba/cuda/printimpl.py
|
45
45
|
numba_cuda/numba/cuda/random.py
|
46
|
+
numba_cuda/numba/cuda/reshape_funcs.cu
|
46
47
|
numba_cuda/numba/cuda/simulator_init.py
|
47
48
|
numba_cuda/numba/cuda/stubs.py
|
48
49
|
numba_cuda/numba/cuda/target.py
|
@@ -1 +0,0 @@
|
|
1
|
-
0.1.0
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|