numba-cuda 0.9.0__tar.gz → 0.10.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/PKG-INFO +1 -1
- numba_cuda-0.10.1/numba_cuda/VERSION +1 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/compiler.py +35 -3
- numba_cuda-0.10.1/numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cuda_paths.py +2 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadecl.py +0 -42
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/linkable_code.py +11 -2
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/nvrtc.py +10 -3
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudaimpl.py +0 -63
- numba_cuda-0.10.1/numba_cuda/numba/cuda/debuginfo.py +136 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/decorators.py +27 -1
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/device_init.py +4 -5
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/dispatcher.py +4 -3
- numba_cuda-0.10.1/numba_cuda/numba/cuda/extending.py +61 -0
- numba_cuda-0.10.1/numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
- numba_cuda-0.10.1/numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
- {numba_cuda-0.9.0/numba_cuda/numba/cuda → numba_cuda-0.10.1/numba_cuda/numba/cuda/include/11}/cuda_fp16.h +550 -387
- {numba_cuda-0.9.0/numba_cuda/numba/cuda → numba_cuda-0.10.1/numba_cuda/numba/cuda/include/11}/cuda_fp16.hpp +465 -316
- numba_cuda-0.10.1/numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda-0.10.1/numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda-0.10.1/numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda-0.10.1/numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda-0.10.1/numba_cuda/numba/cuda/intrinsic_wrapper.py +38 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/intrinsics.py +172 -1
- numba_cuda-0.10.1/numba_cuda/numba/cuda/lowering.py +43 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/stubs.py +0 -11
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/target.py +28 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +4 -2
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +1 -1
- numba_cuda-0.10.1/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +1 -1
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +46 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_enums.py +18 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_extending.py +4 -2
- numba_cuda-0.10.1/numba_cuda/numba/cuda/tests/cudapy/test_inline.py +156 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +50 -5
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/vector_types.py +3 -1
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/vectorizers.py +1 -1
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda.egg-info/PKG-INFO +1 -1
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda.egg-info/SOURCES.txt +12 -2
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/pyproject.toml +1 -0
- numba_cuda-0.9.0/numba_cuda/VERSION +0 -1
- numba_cuda-0.9.0/numba_cuda/numba/cuda/debuginfo.py +0 -46
- numba_cuda-0.9.0/numba_cuda/numba/cuda/extending.py +0 -7
- numba_cuda-0.9.0/numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -77
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/LICENSE +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/README.md +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/__init__.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/_version.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/__init__.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/api.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/api_util.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/args.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cg.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/codegen.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/__init__.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/devicearray.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/devices.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/driver.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/drvapi.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/dummyarray.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/enums.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/error.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/libs.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/mappings.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/ndarray.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/nvvm.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/rtapi.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/runtime.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudamath.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/descriptor.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/deviceufunc.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/errors.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/initialize.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/kernels/__init__.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/kernels/reduction.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/kernels/transpose.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/libdevice.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/libdevicedecl.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/libdevicefuncs.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/libdeviceimpl.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/locks.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/mathimpl.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/models.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/nvvmutils.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/printimpl.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/random.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/reshape_funcs.cu +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/runtime/__init__.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/runtime/memsys.cu +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/runtime/memsys.cuh +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/runtime/nrt.cu +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/runtime/nrt.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/__init__.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/api.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/compiler.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/cudadrv/devices.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/cudadrv/driver.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/cudadrv/error.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/cudadrv/libs.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/kernel.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/kernelapi.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/reduction.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/vector_types.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator_init.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/testing.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/__init__.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_events.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_init.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_array.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_caching.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_casting.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_complex.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_debug.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_errors.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_forall.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_globals.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_lang.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_math.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_operator.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_overload.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_powi.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_print.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_random.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_sm.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_sync.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_warning.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudasim/__init__.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudasim/support.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/data/cuda_include.cu +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/data/error.cu +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/data/jitlink.cu +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/data/warn.cu +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/__init__.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/test_random.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/nocuda/test_import.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/nrt/test_nrt.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/support.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/types.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/ufuncs.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/utils.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda.egg-info/dependency_links.txt +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda.egg-info/requires.txt +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/numba_cuda.egg-info/top_level.txt +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/setup.cfg +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/setup.py +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/site-packages/_numba_cuda_redirector.pth +0 -0
- {numba_cuda-0.9.0 → numba_cuda-0.10.1}/site-packages/_numba_cuda_redirector.py +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
0.10.1
|
@@ -40,6 +40,7 @@ from numba.cuda.api import get_current_device
|
|
40
40
|
from numba.cuda.cudadrv import nvvm
|
41
41
|
from numba.cuda.descriptor import cuda_target
|
42
42
|
from numba.cuda.target import CUDACABICallConv
|
43
|
+
from numba.cuda import lowering
|
43
44
|
|
44
45
|
|
45
46
|
def _nvvm_options_type(x):
|
@@ -163,6 +164,18 @@ class CreateLibrary(LoweringPass):
|
|
163
164
|
return True
|
164
165
|
|
165
166
|
|
167
|
+
@register_pass(mutates_CFG=True, analysis_only=False)
|
168
|
+
class CUDANativeLowering(NativeLowering):
|
169
|
+
"""Lowering pass for a CUDA native function IR described solely in terms of
|
170
|
+
Numba's standard `numba.core.ir` nodes."""
|
171
|
+
|
172
|
+
_name = "cuda_native_lowering"
|
173
|
+
|
174
|
+
@property
|
175
|
+
def lowering_class(self):
|
176
|
+
return lowering.CUDALower
|
177
|
+
|
178
|
+
|
166
179
|
class CUDABytecodeInterpreter(Interpreter):
|
167
180
|
# Based on the superclass implementation, but names the resulting variable
|
168
181
|
# "$bool<N>" instead of "bool<N>" - see Numba PR #9888:
|
@@ -251,7 +264,7 @@ class CUDACompiler(CompilerBase):
|
|
251
264
|
|
252
265
|
# lower
|
253
266
|
pm.add_pass(CreateLibrary, "create library")
|
254
|
-
pm.add_pass(
|
267
|
+
pm.add_pass(CUDANativeLowering, "cuda native lowering")
|
255
268
|
pm.add_pass(CUDABackend, "cuda backend")
|
256
269
|
|
257
270
|
pm.finalize()
|
@@ -265,7 +278,7 @@ def compile_cuda(
|
|
265
278
|
args,
|
266
279
|
debug=False,
|
267
280
|
lineinfo=False,
|
268
|
-
|
281
|
+
forceinline=False,
|
269
282
|
fastmath=False,
|
270
283
|
nvvm_options=None,
|
271
284
|
cc=None,
|
@@ -303,7 +316,7 @@ def compile_cuda(
|
|
303
316
|
else:
|
304
317
|
flags.error_model = "numpy"
|
305
318
|
|
306
|
-
if
|
319
|
+
if forceinline:
|
307
320
|
flags.forceinline = True
|
308
321
|
if fastmath:
|
309
322
|
flags.fastmath = True
|
@@ -561,6 +574,7 @@ def compile(
|
|
561
574
|
abi="c",
|
562
575
|
abi_info=None,
|
563
576
|
output="ptx",
|
577
|
+
forceinline=False,
|
564
578
|
):
|
565
579
|
"""Compile a Python function to PTX or LTO-IR for a given set of argument
|
566
580
|
types.
|
@@ -601,6 +615,11 @@ def compile(
|
|
601
615
|
:type abi_info: dict
|
602
616
|
:param output: Type of output to generate, either ``"ptx"`` or ``"ltoir"``.
|
603
617
|
:type output: str
|
618
|
+
:param forceinline: Enables inlining at the NVVM IR level when set to
|
619
|
+
``True``. This is accomplished by adding the
|
620
|
+
``alwaysinline`` function attribute to the function
|
621
|
+
definition. This is only valid when the output is
|
622
|
+
``"ltoir"``.
|
604
623
|
:return: (code, resty): The compiled code and inferred return type
|
605
624
|
:rtype: tuple
|
606
625
|
"""
|
@@ -613,6 +632,12 @@ def compile(
|
|
613
632
|
if output not in ("ptx", "ltoir"):
|
614
633
|
raise NotImplementedError(f"Unsupported output type: {output}")
|
615
634
|
|
635
|
+
if forceinline and not device:
|
636
|
+
raise ValueError("Cannot force-inline kernels")
|
637
|
+
|
638
|
+
if forceinline and output != "ltoir":
|
639
|
+
raise ValueError("Can only designate forced inlining in LTO-IR")
|
640
|
+
|
616
641
|
debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug
|
617
642
|
opt = (config.OPT != 0) if opt is None else opt
|
618
643
|
|
@@ -647,6 +672,7 @@ def compile(
|
|
647
672
|
fastmath=fastmath,
|
648
673
|
nvvm_options=nvvm_options,
|
649
674
|
cc=cc,
|
675
|
+
forceinline=forceinline,
|
650
676
|
)
|
651
677
|
resty = cres.signature.return_type
|
652
678
|
|
@@ -686,6 +712,7 @@ def compile_for_current_device(
|
|
686
712
|
abi="c",
|
687
713
|
abi_info=None,
|
688
714
|
output="ptx",
|
715
|
+
forceinline=False,
|
689
716
|
):
|
690
717
|
"""Compile a Python function to PTX or LTO-IR for a given signature for the
|
691
718
|
current device's compute capabilility. This calls :func:`compile` with an
|
@@ -703,6 +730,7 @@ def compile_for_current_device(
|
|
703
730
|
abi=abi,
|
704
731
|
abi_info=abi_info,
|
705
732
|
output=output,
|
733
|
+
forceinline=forceinline,
|
706
734
|
)
|
707
735
|
|
708
736
|
|
@@ -717,6 +745,7 @@ def compile_ptx(
|
|
717
745
|
opt=None,
|
718
746
|
abi="numba",
|
719
747
|
abi_info=None,
|
748
|
+
forceinline=False,
|
720
749
|
):
|
721
750
|
"""Compile a Python function to PTX for a given signature. See
|
722
751
|
:func:`compile`. The defaults for this function are to compile a kernel
|
@@ -734,6 +763,7 @@ def compile_ptx(
|
|
734
763
|
abi=abi,
|
735
764
|
abi_info=abi_info,
|
736
765
|
output="ptx",
|
766
|
+
forceinline=forceinline,
|
737
767
|
)
|
738
768
|
|
739
769
|
|
@@ -747,6 +777,7 @@ def compile_ptx_for_current_device(
|
|
747
777
|
opt=None,
|
748
778
|
abi="numba",
|
749
779
|
abi_info=None,
|
780
|
+
forceinline=False,
|
750
781
|
):
|
751
782
|
"""Compile a Python function to PTX for a given signature for the current
|
752
783
|
device's compute capabilility. See :func:`compile_ptx`."""
|
@@ -762,6 +793,7 @@ def compile_ptx_for_current_device(
|
|
762
793
|
opt=opt,
|
763
794
|
abi=abi,
|
764
795
|
abi_info=abi_info,
|
796
|
+
forceinline=forceinline,
|
765
797
|
)
|
766
798
|
|
767
799
|
|