numba-cuda 0.10.1__tar.gz → 0.12.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/PKG-INFO +1 -1
- numba_cuda-0.12.1/numba_cuda/VERSION +1 -0
- {numba_cuda-0.10.1/numba_cuda/numba/cuda → numba_cuda-0.12.1/numba_cuda/numba/cuda/_internal}/cuda_bf16.py +1 -1
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/api.py +13 -0
- numba_cuda-0.12.1/numba_cuda/numba/cuda/bf16.py +112 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/cg.py +2 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/codegen.py +77 -2
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/compiler.py +22 -16
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/cudadecl.py +21 -6
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/cudadrv/driver.py +107 -20
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/cudadrv/linkable_code.py +10 -2
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/cudadrv/nvrtc.py +23 -1
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/cudaimpl.py +103 -11
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/debuginfo.py +27 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/decorators.py +7 -2
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/dispatcher.py +25 -65
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/runtime/nrt.cu +2 -17
- numba_cuda-0.12.1/numba_cuda/numba/cuda/runtime/nrt.cuh +41 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/runtime/nrt.py +13 -1
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/stubs.py +23 -11
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/target.py +10 -1
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +0 -12
- numba_cuda-0.12.1/numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +33 -0
- numba_cuda-0.12.1/numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +236 -0
- numba_cuda-0.12.1/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +55 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +49 -23
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_caching.py +34 -51
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +34 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +17 -0
- numba_cuda-0.12.1/numba_cuda/numba/cuda/tests/cudapy/test_extending.py +304 -0
- numba_cuda-0.12.1/numba_cuda/numba/cuda/tests/data/cta_barrier.cu +23 -0
- numba_cuda-0.12.1/numba_cuda/numba/cuda/tests/data/include/add.cuh +3 -0
- numba_cuda-0.12.1/numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +3 -0
- numba_cuda-0.12.1/numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +9 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +48 -1
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/nrt/test_nrt.py +122 -3
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +11 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +5 -2
- numba_cuda-0.12.1/numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu +7 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +4 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/utils.py +7 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda.egg-info/PKG-INFO +1 -1
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda.egg-info/SOURCES.txt +11 -1
- numba_cuda-0.10.1/numba_cuda/VERSION +0 -1
- numba_cuda-0.10.1/numba_cuda/numba/cuda/tests/cudapy/test_extending.py +0 -164
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/LICENSE +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/README.md +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/__init__.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/_version.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/__init__.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/api_util.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/args.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/cuda_paths.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/cudadrv/__init__.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/cudadrv/devicearray.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/cudadrv/devices.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/cudadrv/drvapi.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/cudadrv/dummyarray.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/cudadrv/enums.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/cudadrv/error.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/cudadrv/libs.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/cudadrv/mappings.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/cudadrv/ndarray.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/cudadrv/nvvm.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/cudadrv/rtapi.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/cudadrv/runtime.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/cudamath.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/descriptor.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/device_init.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/deviceufunc.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/errors.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/extending.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/include/11/cuda_bf16.h +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/include/11/cuda_fp16.h +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/include/11/cuda_fp16.hpp +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/include/12/cuda_bf16.h +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/include/12/cuda_fp16.h +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/initialize.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/intrinsics.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/kernels/__init__.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/kernels/reduction.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/kernels/transpose.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/libdevice.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/libdevicedecl.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/libdevicefuncs.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/libdeviceimpl.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/locks.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/lowering.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/mathimpl.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/models.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/nvvmutils.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/printimpl.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/random.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/reshape_funcs.cu +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/runtime/__init__.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/runtime/memsys.cu +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/runtime/memsys.cuh +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/simulator/__init__.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/simulator/api.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/simulator/compiler.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/simulator/cudadrv/devices.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/simulator/cudadrv/driver.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/simulator/cudadrv/error.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/simulator/cudadrv/libs.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/simulator/kernel.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/simulator/kernelapi.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/simulator/reduction.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/simulator/vector_types.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/simulator_init.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/testing.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/__init__.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_events.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_init.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_array.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_casting.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_complex.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_debug.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_enums.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_errors.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_forall.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_globals.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_inline.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_lang.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_math.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_operator.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_overload.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_powi.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_print.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_random.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_sm.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_sync.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_warning.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudasim/__init__.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudasim/support.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/data/cuda_include.cu +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/data/error.cu +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/data/jitlink.cu +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/data/warn.cu +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/doc_examples/__init__.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/doc_examples/test_random.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/nocuda/test_import.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/support.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/types.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/ufuncs.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/vector_types.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda/numba/cuda/vectorizers.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda.egg-info/dependency_links.txt +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda.egg-info/requires.txt +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/numba_cuda.egg-info/top_level.txt +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/pyproject.toml +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/setup.cfg +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/setup.py +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/site-packages/_numba_cuda_redirector.pth +0 -0
- {numba_cuda-0.10.1 → numba_cuda-0.12.1}/site-packages/_numba_cuda_redirector.py +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
0.12.1
|
@@ -2,7 +2,7 @@
|
|
2
2
|
# Generator Information:
|
3
3
|
# Ast_canopy version: 0.3.0
|
4
4
|
# Numbast version: 0.3.0
|
5
|
-
# Generation command: /home/wangm/numbast/numbast/src/numbast/__main__.py --cfg-path configs/cuda_bf16.yml --output-dir numba_cuda/numba/cuda/
|
5
|
+
# Generation command: /home/wangm/numbast/numbast/src/numbast/__main__.py --cfg-path configs/cuda_bf16.yml --output-dir numba_cuda/numba/cuda/_internal
|
6
6
|
# Static binding generator parameters: {'cfg_path': 'configs/cuda_bf16.yml', 'output_dir': 'numba_cuda/numba/cuda/', 'entry_point': None, 'retain': None, 'types': None, 'datamodels': None, 'compute_capability': None, 'run_ruff_format': True}
|
7
7
|
# Config file path (relative to the path of the generated binding): ../../../../configs/cuda_bf16.yml
|
8
8
|
# Cudatoolkit version: (12, 8)
|
@@ -10,6 +10,7 @@ import numpy as np
|
|
10
10
|
from .cudadrv import devicearray, devices, driver
|
11
11
|
from numba.core import config
|
12
12
|
from numba.cuda.api_util import prepare_shape_strides_dtype
|
13
|
+
from numba.cuda.cudadrv.runtime import get_version
|
13
14
|
|
14
15
|
# NDarray device helper
|
15
16
|
|
@@ -95,6 +96,18 @@ def is_float16_supported():
|
|
95
96
|
return True
|
96
97
|
|
97
98
|
|
99
|
+
def is_bfloat16_supported():
|
100
|
+
"""Whether bfloat16 are supported.
|
101
|
+
|
102
|
+
bfloat16 are only supported on devices with compute capability >= 8.0 and cuda version >= 12.0
|
103
|
+
"""
|
104
|
+
cuda_version = get_version()
|
105
|
+
return current_context().device.supports_bfloat16 and cuda_version >= (
|
106
|
+
12,
|
107
|
+
0,
|
108
|
+
)
|
109
|
+
|
110
|
+
|
98
111
|
@require_context
|
99
112
|
def to_device(obj, stream=0, copy=True, to=None):
|
100
113
|
"""to_device(obj, stream=0, copy=True, to=None)
|
@@ -0,0 +1,112 @@
|
|
1
|
+
from numba.cuda._internal.cuda_bf16 import (
|
2
|
+
_type_class___nv_bfloat16,
|
3
|
+
nv_bfloat16 as bfloat16,
|
4
|
+
htrunc,
|
5
|
+
hceil,
|
6
|
+
hfloor,
|
7
|
+
hrint,
|
8
|
+
hsqrt,
|
9
|
+
hrsqrt,
|
10
|
+
hrcp,
|
11
|
+
hlog,
|
12
|
+
hlog2,
|
13
|
+
hlog10,
|
14
|
+
hcos,
|
15
|
+
hsin,
|
16
|
+
hexp,
|
17
|
+
hexp2,
|
18
|
+
hexp10,
|
19
|
+
htanh,
|
20
|
+
htanh_approx,
|
21
|
+
)
|
22
|
+
from numba.extending import overload
|
23
|
+
|
24
|
+
import math
|
25
|
+
|
26
|
+
|
27
|
+
def _make_unary(a, func):
|
28
|
+
if isinstance(a, _type_class___nv_bfloat16):
|
29
|
+
return lambda a: func(a)
|
30
|
+
|
31
|
+
|
32
|
+
# Bind low++ bindings to math APIs
|
33
|
+
@overload(math.trunc, target="cuda")
|
34
|
+
def trunc_ol(a):
|
35
|
+
return _make_unary(a, htrunc)
|
36
|
+
|
37
|
+
|
38
|
+
@overload(math.ceil, target="cuda")
|
39
|
+
def ceil_ol(a):
|
40
|
+
return _make_unary(a, hceil)
|
41
|
+
|
42
|
+
|
43
|
+
@overload(math.floor, target="cuda")
|
44
|
+
def floor_ol(a):
|
45
|
+
return _make_unary(a, hfloor)
|
46
|
+
|
47
|
+
|
48
|
+
@overload(math.sqrt, target="cuda")
|
49
|
+
def sqrt_ol(a):
|
50
|
+
return _make_unary(a, hsqrt)
|
51
|
+
|
52
|
+
|
53
|
+
@overload(math.log, target="cuda")
|
54
|
+
def log_ol(a):
|
55
|
+
return _make_unary(a, hlog)
|
56
|
+
|
57
|
+
|
58
|
+
@overload(math.log10, target="cuda")
|
59
|
+
def log10_ol(a):
|
60
|
+
return _make_unary(a, hlog10)
|
61
|
+
|
62
|
+
|
63
|
+
@overload(math.cos, target="cuda")
|
64
|
+
def cos_ol(a):
|
65
|
+
return _make_unary(a, hcos)
|
66
|
+
|
67
|
+
|
68
|
+
@overload(math.sin, target="cuda")
|
69
|
+
def sin_ol(a):
|
70
|
+
return _make_unary(a, hsin)
|
71
|
+
|
72
|
+
|
73
|
+
@overload(math.tanh, target="cuda")
|
74
|
+
def tanh_ol(a):
|
75
|
+
return _make_unary(a, htanh)
|
76
|
+
|
77
|
+
|
78
|
+
@overload(math.exp, target="cuda")
|
79
|
+
def exp_ol(a):
|
80
|
+
return _make_unary(a, hexp)
|
81
|
+
|
82
|
+
|
83
|
+
try:
|
84
|
+
from math import exp2
|
85
|
+
|
86
|
+
@overload(exp2, target="cuda")
|
87
|
+
def exp2_ol(a):
|
88
|
+
return _make_unary(a, hexp2)
|
89
|
+
except ImportError:
|
90
|
+
pass
|
91
|
+
|
92
|
+
|
93
|
+
__all__ = [
|
94
|
+
"bfloat16",
|
95
|
+
"htrunc",
|
96
|
+
"hceil",
|
97
|
+
"hfloor",
|
98
|
+
"hrint",
|
99
|
+
"hsqrt",
|
100
|
+
"hrsqrt",
|
101
|
+
"hrcp",
|
102
|
+
"hlog",
|
103
|
+
"hlog2",
|
104
|
+
"hlog10",
|
105
|
+
"hcos",
|
106
|
+
"hsin",
|
107
|
+
"htanh",
|
108
|
+
"htanh_approx",
|
109
|
+
"hexp",
|
110
|
+
"hexp2",
|
111
|
+
"hexp10",
|
112
|
+
]
|
@@ -23,6 +23,7 @@ def _this_grid(typingctx):
|
|
23
23
|
sig = signature(grid_group)
|
24
24
|
|
25
25
|
def codegen(context, builder, sig, args):
|
26
|
+
context.active_code_library.use_cooperative = True
|
26
27
|
one = context.get_constant(types.int32, 1)
|
27
28
|
mod = builder.module
|
28
29
|
return builder.call(
|
@@ -45,6 +46,7 @@ def _grid_group_sync(typingctx, group):
|
|
45
46
|
sig = signature(types.int32, group)
|
46
47
|
|
47
48
|
def codegen(context, builder, sig, args):
|
49
|
+
context.active_code_library.use_cooperative = True
|
48
50
|
flags = context.get_constant(types.int32, 0)
|
49
51
|
mod = builder.module
|
50
52
|
return builder.call(
|
@@ -5,6 +5,7 @@ from numba.core.codegen import Codegen, CodeLibrary
|
|
5
5
|
from .cudadrv import devices, driver, nvvm, runtime
|
6
6
|
from numba.cuda.cudadrv.libs import get_cudalib
|
7
7
|
from numba.cuda.cudadrv.linkable_code import LinkableCode
|
8
|
+
from numba.cuda.runtime.nrt import NRT_LIBRARY
|
8
9
|
|
9
10
|
import os
|
10
11
|
import subprocess
|
@@ -57,6 +58,59 @@ def disassemble_cubin_for_cfg(cubin):
|
|
57
58
|
return run_nvdisasm(cubin, flags)
|
58
59
|
|
59
60
|
|
61
|
+
class ExternalCodeLibrary(CodeLibrary):
|
62
|
+
"""Holds code produced externally, for linking with generated code."""
|
63
|
+
|
64
|
+
def __init__(self, codegen, name):
|
65
|
+
super().__init__(codegen, name)
|
66
|
+
# Files to link
|
67
|
+
self._linking_files = set()
|
68
|
+
# Setup and teardown functions for the module.
|
69
|
+
# The order is determined by the order they are added to the codelib.
|
70
|
+
self._setup_functions = []
|
71
|
+
self._teardown_functions = []
|
72
|
+
|
73
|
+
self.use_cooperative = False
|
74
|
+
|
75
|
+
@property
|
76
|
+
def modules(self):
|
77
|
+
# There are no LLVM IR modules in an ExternalCodeLibrary
|
78
|
+
return set()
|
79
|
+
|
80
|
+
def add_linking_file(self, path_or_obj):
|
81
|
+
# Adding new files after finalization is prohibited, in case the list
|
82
|
+
# of libraries has already been added to another code library; the
|
83
|
+
# newly-added files would be omitted from their linking process.
|
84
|
+
self._raise_if_finalized()
|
85
|
+
|
86
|
+
if isinstance(path_or_obj, LinkableCode):
|
87
|
+
if path_or_obj.setup_callback:
|
88
|
+
self._setup_functions.append(path_or_obj.setup_callback)
|
89
|
+
if path_or_obj.teardown_callback:
|
90
|
+
self._teardown_functions.append(path_or_obj.teardown_callback)
|
91
|
+
|
92
|
+
self._linking_files.add(path_or_obj)
|
93
|
+
|
94
|
+
def add_ir_module(self, module):
|
95
|
+
raise NotImplementedError("Cannot add LLVM IR to external code")
|
96
|
+
|
97
|
+
def add_linking_library(self, library):
|
98
|
+
raise NotImplementedError("Cannot add libraries to external code")
|
99
|
+
|
100
|
+
def finalize(self):
|
101
|
+
self._raise_if_finalized()
|
102
|
+
self._finalized = True
|
103
|
+
|
104
|
+
def get_asm_str(self):
|
105
|
+
raise NotImplementedError("No assembly for external code")
|
106
|
+
|
107
|
+
def get_llvm_str(self):
|
108
|
+
raise NotImplementedError("No LLVM IR for external code")
|
109
|
+
|
110
|
+
def get_function(self, name):
|
111
|
+
raise NotImplementedError("Cannot get function from external code")
|
112
|
+
|
113
|
+
|
60
114
|
class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
61
115
|
"""
|
62
116
|
The CUDACodeLibrary generates PTX, SASS, cubins for multiple different
|
@@ -129,6 +183,8 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
129
183
|
self._nvvm_options = nvvm_options
|
130
184
|
self._entry_name = entry_name
|
131
185
|
|
186
|
+
self.use_cooperative = False
|
187
|
+
|
132
188
|
@property
|
133
189
|
def llvm_strs(self):
|
134
190
|
if self._llvm_strs is None:
|
@@ -297,6 +353,10 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
297
353
|
self._raise_if_finalized()
|
298
354
|
|
299
355
|
self._linking_libraries.add(library)
|
356
|
+
self._linking_files.update(library._linking_files)
|
357
|
+
self._setup_functions.extend(library._setup_functions)
|
358
|
+
self._teardown_functions.extend(library._teardown_functions)
|
359
|
+
self.use_cooperative |= library.use_cooperative
|
300
360
|
|
301
361
|
def add_linking_file(self, path_or_obj):
|
302
362
|
if isinstance(path_or_obj, LinkableCode):
|
@@ -362,9 +422,17 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
362
422
|
but loaded functions are discarded. They are recreated when needed
|
363
423
|
after deserialization.
|
364
424
|
"""
|
425
|
+
nrt = False
|
365
426
|
if self._linking_files:
|
366
|
-
|
367
|
-
|
427
|
+
if (
|
428
|
+
len(self._linking_files) == 1
|
429
|
+
and NRT_LIBRARY in self._linking_files
|
430
|
+
):
|
431
|
+
nrt = True
|
432
|
+
else:
|
433
|
+
msg = "Cannot pickle CUDACodeLibrary with linking files"
|
434
|
+
raise RuntimeError(msg)
|
435
|
+
|
368
436
|
if not self._finalized:
|
369
437
|
raise RuntimeError("Cannot pickle unfinalized CUDACodeLibrary")
|
370
438
|
return dict(
|
@@ -378,6 +446,8 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
378
446
|
max_registers=self._max_registers,
|
379
447
|
nvvm_options=self._nvvm_options,
|
380
448
|
needs_cudadevrt=self.needs_cudadevrt,
|
449
|
+
nrt=nrt,
|
450
|
+
use_cooperative=self.use_cooperative,
|
381
451
|
)
|
382
452
|
|
383
453
|
@classmethod
|
@@ -393,6 +463,8 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
393
463
|
max_registers,
|
394
464
|
nvvm_options,
|
395
465
|
needs_cudadevrt,
|
466
|
+
nrt,
|
467
|
+
use_cooperative,
|
396
468
|
):
|
397
469
|
"""
|
398
470
|
Rebuild an instance.
|
@@ -407,8 +479,11 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
407
479
|
instance._max_registers = max_registers
|
408
480
|
instance._nvvm_options = nvvm_options
|
409
481
|
instance.needs_cudadevrt = needs_cudadevrt
|
482
|
+
instance.use_cooperative = use_cooperative
|
410
483
|
|
411
484
|
instance._finalized = True
|
485
|
+
if nrt:
|
486
|
+
instance._linking_files = {NRT_LIBRARY}
|
412
487
|
|
413
488
|
return instance
|
414
489
|
|
@@ -1,5 +1,4 @@
|
|
1
1
|
from llvmlite import ir
|
2
|
-
from numba.core.typing.templates import ConcreteTemplate
|
3
2
|
from numba.core import ir as numba_ir
|
4
3
|
from numba.core import (
|
5
4
|
cgutils,
|
@@ -37,6 +36,7 @@ from numba.core.typed_passes import (
|
|
37
36
|
from warnings import warn
|
38
37
|
from numba.cuda import nvvmutils
|
39
38
|
from numba.cuda.api import get_current_device
|
39
|
+
from numba.cuda.codegen import ExternalCodeLibrary
|
40
40
|
from numba.cuda.cudadrv import nvvm
|
41
41
|
from numba.cuda.descriptor import cuda_target
|
42
42
|
from numba.cuda.target import CUDACABICallConv
|
@@ -797,33 +797,39 @@ def compile_ptx_for_current_device(
|
|
797
797
|
)
|
798
798
|
|
799
799
|
|
800
|
-
def declare_device_function(name, restype, argtypes, link):
|
801
|
-
return declare_device_function_template(name, restype, argtypes, link).key
|
802
|
-
|
803
|
-
|
804
|
-
def declare_device_function_template(name, restype, argtypes, link):
|
800
|
+
def declare_device_function(name, restype, argtypes, link, use_cooperative):
|
805
801
|
from .descriptor import cuda_target
|
806
802
|
|
807
803
|
typingctx = cuda_target.typing_context
|
808
804
|
targetctx = cuda_target.target_context
|
809
805
|
sig = typing.signature(restype, *argtypes)
|
810
|
-
extfn = ExternFunction(name, sig, link)
|
811
806
|
|
812
|
-
|
813
|
-
|
814
|
-
|
807
|
+
# extfn is the descriptor used to call the function from Python code, and
|
808
|
+
# is used as the key for typing and lowering.
|
809
|
+
extfn = ExternFunction(name, sig)
|
815
810
|
|
816
|
-
|
817
|
-
|
818
|
-
)
|
811
|
+
# Typing
|
812
|
+
device_function_template = typing.make_concrete_template(name, extfn, [sig])
|
819
813
|
typingctx.insert_user_function(extfn, device_function_template)
|
820
|
-
|
814
|
+
|
815
|
+
# Lowering
|
816
|
+
lib = ExternalCodeLibrary(f"{name}_externals", targetctx.codegen())
|
817
|
+
for file in link:
|
818
|
+
lib.add_linking_file(file)
|
819
|
+
lib.use_cooperative = use_cooperative
|
820
|
+
|
821
|
+
# ExternalFunctionDescriptor provides a lowering implementation for calling
|
822
|
+
# external functions
|
823
|
+
fndesc = funcdesc.ExternalFunctionDescriptor(name, restype, argtypes)
|
824
|
+
targetctx.insert_user_function(extfn, fndesc, libs=(lib,))
|
821
825
|
|
822
826
|
return device_function_template
|
823
827
|
|
824
828
|
|
825
829
|
class ExternFunction:
|
826
|
-
|
830
|
+
"""A descriptor that can be used to call the external function from within
|
831
|
+
a Python kernel."""
|
832
|
+
|
833
|
+
def __init__(self, name, sig):
|
827
834
|
self.name = name
|
828
835
|
self.sig = sig
|
829
|
-
self.link = link
|
@@ -1,5 +1,5 @@
|
|
1
1
|
import operator
|
2
|
-
from numba.core import types
|
2
|
+
from numba.core import errors, types
|
3
3
|
from numba.core.typing.npydecl import (
|
4
4
|
parse_dtype,
|
5
5
|
parse_shape,
|
@@ -21,7 +21,7 @@ from numba.core.typing.templates import (
|
|
21
21
|
from numba.cuda.types import dim3
|
22
22
|
from numba.core.typeconv import Conversion
|
23
23
|
from numba import cuda
|
24
|
-
from numba.cuda.compiler import
|
24
|
+
from numba.cuda.compiler import declare_device_function
|
25
25
|
|
26
26
|
registry = Registry()
|
27
27
|
register = registry.register
|
@@ -33,7 +33,7 @@ register_number_classes(register_global)
|
|
33
33
|
|
34
34
|
class Cuda_array_decl(CallableTemplate):
|
35
35
|
def generic(self):
|
36
|
-
def typer(shape, dtype):
|
36
|
+
def typer(shape, dtype, alignment=None):
|
37
37
|
# Only integer literals and tuples of integer literals are valid
|
38
38
|
# shapes
|
39
39
|
if isinstance(shape, types.Integer):
|
@@ -47,6 +47,16 @@ class Cuda_array_decl(CallableTemplate):
|
|
47
47
|
else:
|
48
48
|
return None
|
49
49
|
|
50
|
+
if alignment is not None:
|
51
|
+
permitted = (types.IntegerLiteral, types.NoneType)
|
52
|
+
if not isinstance(alignment, permitted):
|
53
|
+
msg = "alignment must be a constant integer"
|
54
|
+
raise errors.RequireLiteralValue(msg)
|
55
|
+
|
56
|
+
# N.B. We don't use alignment for typing; it's not part of
|
57
|
+
# types.Array. The value supplied to the array declaration
|
58
|
+
# is handled in the lowering.
|
59
|
+
|
50
60
|
ndim = parse_shape(shape)
|
51
61
|
nb_dtype = parse_dtype(dtype)
|
52
62
|
if nb_dtype is not None and ndim is not None:
|
@@ -412,15 +422,19 @@ _genfp16_binary_operator(operator.itruediv)
|
|
412
422
|
|
413
423
|
def _resolve_wrapped_unary(fname):
|
414
424
|
link = tuple()
|
415
|
-
decl =
|
416
|
-
f"__numba_wrapper_{fname}",
|
425
|
+
decl = declare_device_function(
|
426
|
+
f"__numba_wrapper_{fname}",
|
427
|
+
types.float16,
|
428
|
+
(types.float16,),
|
429
|
+
link,
|
430
|
+
use_cooperative=False,
|
417
431
|
)
|
418
432
|
return types.Function(decl)
|
419
433
|
|
420
434
|
|
421
435
|
def _resolve_wrapped_binary(fname):
|
422
436
|
link = tuple()
|
423
|
-
decl =
|
437
|
+
decl = declare_device_function(
|
424
438
|
f"__numba_wrapper_{fname}",
|
425
439
|
types.float16,
|
426
440
|
(
|
@@ -428,6 +442,7 @@ def _resolve_wrapped_binary(fname):
|
|
428
442
|
types.float16,
|
429
443
|
),
|
430
444
|
link,
|
445
|
+
use_cooperative=False,
|
431
446
|
)
|
432
447
|
return types.Function(decl)
|
433
448
|
|
@@ -49,7 +49,7 @@ from .drvapi import API_PROTOTYPES
|
|
49
49
|
from .drvapi import cu_occupancy_b2d_size, cu_stream_callback_pyobj, cu_uuid
|
50
50
|
from .mappings import FILE_EXTENSION_MAP
|
51
51
|
from .linkable_code import LinkableCode, LTOIR, Fatbin, Object
|
52
|
-
from numba.cuda.utils import _readenv
|
52
|
+
from numba.cuda.utils import _readenv, cached_file_read
|
53
53
|
from numba.cuda.cudadrv import enums, drvapi, nvrtc
|
54
54
|
|
55
55
|
try:
|
@@ -714,6 +714,10 @@ class Device(object):
|
|
714
714
|
def supports_float16(self):
|
715
715
|
return self.compute_capability >= (5, 3)
|
716
716
|
|
717
|
+
@property
|
718
|
+
def supports_bfloat16(self):
|
719
|
+
return self.compute_capability >= (8, 0)
|
720
|
+
|
717
721
|
|
718
722
|
def met_requirement_for_device(device):
|
719
723
|
if device.compute_capability < MIN_REQUIRED_CC:
|
@@ -2797,13 +2801,16 @@ class Linker(metaclass=ABCMeta):
|
|
2797
2801
|
ptx_name = os.path.splitext(name)[0] + ".ptx"
|
2798
2802
|
self.add_ptx(ptx.encode(), ptx_name)
|
2799
2803
|
|
2804
|
+
@abstractmethod
|
2805
|
+
def add_data(self, data, kind, name):
|
2806
|
+
"""Add in-memory data to the link"""
|
2807
|
+
|
2800
2808
|
@abstractmethod
|
2801
2809
|
def add_file(self, path, kind):
|
2802
2810
|
"""Add code from a file to the link"""
|
2803
2811
|
|
2804
2812
|
def add_cu_file(self, path):
|
2805
|
-
|
2806
|
-
cu = f.read()
|
2813
|
+
cu = cached_file_read(path, how="rb")
|
2807
2814
|
self.add_cu(cu, os.path.basename(path))
|
2808
2815
|
|
2809
2816
|
def add_file_guess_ext(self, path_or_code, ignore_nonlto=False):
|
@@ -2948,6 +2955,10 @@ class MVCLinker(Linker):
|
|
2948
2955
|
except CubinLinkerError as e:
|
2949
2956
|
raise LinkerError from e
|
2950
2957
|
|
2958
|
+
def add_data(self, data, kind, name):
|
2959
|
+
msg = "Adding in-memory data unsupported in the MVC linker"
|
2960
|
+
raise LinkerError(msg)
|
2961
|
+
|
2951
2962
|
def add_file(self, path, kind):
|
2952
2963
|
try:
|
2953
2964
|
from cubinlinker import CubinLinkerError
|
@@ -2955,8 +2966,7 @@ class MVCLinker(Linker):
|
|
2955
2966
|
raise ImportError(_MVC_ERROR_MESSAGE) from err
|
2956
2967
|
|
2957
2968
|
try:
|
2958
|
-
|
2959
|
-
data = f.read()
|
2969
|
+
data = cached_file_read(path, how="rb")
|
2960
2970
|
except FileNotFoundError:
|
2961
2971
|
raise LinkerError(f"{path} not found")
|
2962
2972
|
|
@@ -3046,17 +3056,32 @@ class CtypesLinker(Linker):
|
|
3046
3056
|
def error_log(self):
|
3047
3057
|
return self.linker_errors_buf.value.decode("utf8")
|
3048
3058
|
|
3049
|
-
def
|
3050
|
-
|
3051
|
-
|
3052
|
-
|
3059
|
+
def add_cubin(self, cubin, name="<unnamed-cubin>"):
|
3060
|
+
return self._add_data(enums.CU_JIT_INPUT_CUBIN, cubin, name)
|
3061
|
+
|
3062
|
+
def add_ptx(self, ptx, name="<unnamed-ptx>"):
|
3063
|
+
return self._add_data(enums.CU_JIT_INPUT_PTX, ptx, name)
|
3064
|
+
|
3065
|
+
def add_object(self, object_, name="<unnamed-object>"):
|
3066
|
+
return self._add_data(enums.CU_JIT_INPUT_OBJECT, object_, name)
|
3067
|
+
|
3068
|
+
def add_fatbin(self, fatbin, name="<unnamed-fatbin>"):
|
3069
|
+
return self._add_data(enums.CU_JIT_INPUT_FATBINARY, fatbin, name)
|
3070
|
+
|
3071
|
+
def add_library(self, library, name="<unnamed-library>"):
|
3072
|
+
return self._add_data(enums.CU_JIT_INPUT_LIBRARY, library, name)
|
3073
|
+
|
3074
|
+
def _add_data(self, input_type, data, name):
|
3075
|
+
data_buffer = c_char_p(data)
|
3076
|
+
name_buffer = c_char_p(name.encode("utf8"))
|
3077
|
+
self._keep_alive += [data_buffer, name_buffer]
|
3053
3078
|
try:
|
3054
3079
|
driver.cuLinkAddData(
|
3055
3080
|
self.handle,
|
3056
|
-
|
3057
|
-
|
3058
|
-
len(
|
3059
|
-
|
3081
|
+
input_type,
|
3082
|
+
data_buffer,
|
3083
|
+
len(data),
|
3084
|
+
name_buffer,
|
3060
3085
|
0,
|
3061
3086
|
None,
|
3062
3087
|
None,
|
@@ -3064,6 +3089,28 @@ class CtypesLinker(Linker):
|
|
3064
3089
|
except CudaAPIError as e:
|
3065
3090
|
raise LinkerError("%s\n%s" % (e, self.error_log))
|
3066
3091
|
|
3092
|
+
def add_data(self, data, kind, name=None):
|
3093
|
+
# We pass the name as **kwargs to ensure the default name for the input
|
3094
|
+
# type is used if none is supplied
|
3095
|
+
kws = {}
|
3096
|
+
if name is not None:
|
3097
|
+
kws["name"] = name
|
3098
|
+
|
3099
|
+
if kind == FILE_EXTENSION_MAP["cubin"]:
|
3100
|
+
self.add_cubin(data, **kws)
|
3101
|
+
elif kind == FILE_EXTENSION_MAP["fatbin"]:
|
3102
|
+
self.add_fatbin(data, **kws)
|
3103
|
+
elif kind == FILE_EXTENSION_MAP["a"]:
|
3104
|
+
self.add_library(data, **kws)
|
3105
|
+
elif kind == FILE_EXTENSION_MAP["ptx"]:
|
3106
|
+
self.add_ptx(data, **kws)
|
3107
|
+
elif kind == FILE_EXTENSION_MAP["o"]:
|
3108
|
+
self.add_object(data, **kws)
|
3109
|
+
elif kind == FILE_EXTENSION_MAP["ltoir"]:
|
3110
|
+
raise LinkerError("Ctypes linker cannot link LTO-IR")
|
3111
|
+
else:
|
3112
|
+
raise LinkerError(f"Don't know how to link {kind}")
|
3113
|
+
|
3067
3114
|
def add_file(self, path, kind):
|
3068
3115
|
pathbuf = c_char_p(path.encode("utf8"))
|
3069
3116
|
self._keep_alive.append(pathbuf)
|
@@ -3151,17 +3198,58 @@ class CudaPythonLinker(Linker):
|
|
3151
3198
|
def error_log(self):
|
3152
3199
|
return self.linker_errors_buf.decode("utf8")
|
3153
3200
|
|
3154
|
-
def
|
3155
|
-
|
3156
|
-
self.
|
3201
|
+
def add_cubin(self, cubin, name="<unnamed-cubin>"):
|
3202
|
+
input_type = binding.CUjitInputType.CU_JIT_INPUT_CUBIN
|
3203
|
+
return self._add_data(input_type, cubin, name)
|
3204
|
+
|
3205
|
+
def add_ptx(self, ptx, name="<unnamed-ptx>"):
|
3206
|
+
input_type = binding.CUjitInputType.CU_JIT_INPUT_PTX
|
3207
|
+
return self._add_data(input_type, ptx, name)
|
3208
|
+
|
3209
|
+
def add_object(self, object_, name="<unnamed-object>"):
|
3210
|
+
input_type = binding.CUjitInputType.CU_JIT_INPUT_OBJECT
|
3211
|
+
return self._add_data(input_type, object_, name)
|
3212
|
+
|
3213
|
+
def add_fatbin(self, fatbin, name="<unnamed-fatbin>"):
|
3214
|
+
input_type = binding.CUjitInputType.CU_JIT_INPUT_FATBINARY
|
3215
|
+
return self._add_data(input_type, fatbin, name)
|
3216
|
+
|
3217
|
+
def add_library(self, library, name="<unnamed-library>"):
|
3218
|
+
input_type = binding.CUjitInputType.CU_JIT_INPUT_LIBRARY
|
3219
|
+
return self._add_data(input_type, library, name)
|
3220
|
+
|
3221
|
+
def _add_data(self, input_type, data, name):
|
3222
|
+
name_buffer = name.encode("utf8")
|
3223
|
+
self._keep_alive += [data, name_buffer]
|
3157
3224
|
try:
|
3158
|
-
input_ptx = binding.CUjitInputType.CU_JIT_INPUT_PTX
|
3159
3225
|
driver.cuLinkAddData(
|
3160
|
-
self.handle,
|
3226
|
+
self.handle, input_type, data, len(data), name_buffer, 0, [], []
|
3161
3227
|
)
|
3162
3228
|
except CudaAPIError as e:
|
3163
3229
|
raise LinkerError("%s\n%s" % (e, self.error_log))
|
3164
3230
|
|
3231
|
+
def add_data(self, data, kind, name=None):
|
3232
|
+
# We pass the name as **kwargs to ensure the default name for the input
|
3233
|
+
# type is used if none is supplied
|
3234
|
+
kws = {}
|
3235
|
+
if name is not None:
|
3236
|
+
kws["name"] = name
|
3237
|
+
|
3238
|
+
if kind == FILE_EXTENSION_MAP["cubin"]:
|
3239
|
+
self.add_cubin(data, **kws)
|
3240
|
+
elif kind == FILE_EXTENSION_MAP["fatbin"]:
|
3241
|
+
self.add_fatbin(data, **kws)
|
3242
|
+
elif kind == FILE_EXTENSION_MAP["a"]:
|
3243
|
+
self.add_library(data, **kws)
|
3244
|
+
elif kind == FILE_EXTENSION_MAP["ptx"]:
|
3245
|
+
self.add_ptx(data, **kws)
|
3246
|
+
elif kind == FILE_EXTENSION_MAP["o"]:
|
3247
|
+
self.add_object(data, **kws)
|
3248
|
+
elif kind == FILE_EXTENSION_MAP["ltoir"]:
|
3249
|
+
raise LinkerError("CudaPythonLinker cannot link LTO-IR")
|
3250
|
+
else:
|
3251
|
+
raise LinkerError(f"Don't know how to link {kind}")
|
3252
|
+
|
3165
3253
|
def add_file(self, path, kind):
|
3166
3254
|
pathbuf = path.encode("utf8")
|
3167
3255
|
self._keep_alive.append(pathbuf)
|
@@ -3252,8 +3340,7 @@ class PyNvJitLinker(Linker):
|
|
3252
3340
|
|
3253
3341
|
def add_file(self, path, kind):
|
3254
3342
|
try:
|
3255
|
-
|
3256
|
-
data = f.read()
|
3343
|
+
data = cached_file_read(path, "rb")
|
3257
3344
|
except FileNotFoundError:
|
3258
3345
|
raise LinkerError(f"{path} not found")
|
3259
3346
|
|
@@ -16,16 +16,24 @@ class LinkableCode:
|
|
16
16
|
:param teardown_callback: A function called just prior to the unloading of
|
17
17
|
a module that has this code object linked into
|
18
18
|
it.
|
19
|
+
:param nrt: If True, assume this object contains NRT function calls and
|
20
|
+
add NRT source code to the final link.
|
19
21
|
"""
|
20
22
|
|
21
23
|
def __init__(
|
22
|
-
self,
|
24
|
+
self,
|
25
|
+
data,
|
26
|
+
name=None,
|
27
|
+
setup_callback=None,
|
28
|
+
teardown_callback=None,
|
29
|
+
nrt=False,
|
23
30
|
):
|
24
31
|
if setup_callback and not callable(setup_callback):
|
25
32
|
raise TypeError("setup_callback must be callable")
|
26
33
|
if teardown_callback and not callable(teardown_callback):
|
27
34
|
raise TypeError("teardown_callback must be callable")
|
28
35
|
|
36
|
+
self.nrt = nrt
|
29
37
|
self._name = name
|
30
38
|
self._data = data
|
31
39
|
self.setup_callback = setup_callback
|
@@ -87,5 +95,5 @@ class Object(LinkableCode):
|
|
87
95
|
class LTOIR(LinkableCode):
|
88
96
|
"""An LTOIR file in memory."""
|
89
97
|
|
90
|
-
kind = "ltoir"
|
98
|
+
kind = FILE_EXTENSION_MAP["ltoir"]
|
91
99
|
default_name = "<unnamed-ltoir>"
|