numba-cuda 0.18.1__py3-none-any.whl → 0.19.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- _numba_cuda_redirector.pth +3 -0
- _numba_cuda_redirector.py +3 -0
- numba_cuda/VERSION +1 -1
- numba_cuda/__init__.py +2 -1
- numba_cuda/_version.py +2 -13
- numba_cuda/numba/cuda/__init__.py +4 -1
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +5 -2
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +4 -1
- numba_cuda/numba/cuda/api.py +5 -7
- numba_cuda/numba/cuda/api_util.py +3 -0
- numba_cuda/numba/cuda/args.py +3 -0
- numba_cuda/numba/cuda/bf16.py +3 -0
- numba_cuda/numba/cuda/cg.py +3 -0
- numba_cuda/numba/cuda/cgutils.py +3 -0
- numba_cuda/numba/cuda/codegen.py +3 -0
- numba_cuda/numba/cuda/compiler.py +10 -4
- numba_cuda/numba/cuda/core/caching.py +3 -0
- numba_cuda/numba/cuda/core/callconv.py +3 -0
- numba_cuda/numba/cuda/core/codegen.py +3 -0
- numba_cuda/numba/cuda/core/compiler.py +3 -0
- numba_cuda/numba/cuda/core/interpreter.py +3595 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2644 -0
- numba_cuda/numba/cuda/core/sigutils.py +58 -0
- numba_cuda/numba/cuda/core/typed_passes.py +3 -0
- numba_cuda/numba/cuda/cuda_paths.py +12 -17
- numba_cuda/numba/cuda/cudadecl.py +4 -1
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +3 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +3 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +7 -19
- numba_cuda/numba/cuda/cudadrv/drvapi.py +3 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +3 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
- numba_cuda/numba/cuda/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +4 -2
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +3 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +3 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +3 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +47 -44
- numba_cuda/numba/cuda/cudadrv/nvvm.py +6 -18
- numba_cuda/numba/cuda/cudadrv/rtapi.py +3 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +15 -1
- numba_cuda/numba/cuda/cudaimpl.py +3 -0
- numba_cuda/numba/cuda/cudamath.py +4 -1
- numba_cuda/numba/cuda/debuginfo.py +3 -0
- numba_cuda/numba/cuda/decorators.py +7 -3
- numba_cuda/numba/cuda/descriptor.py +3 -0
- numba_cuda/numba/cuda/device_init.py +3 -0
- numba_cuda/numba/cuda/deviceufunc.py +5 -1
- numba_cuda/numba/cuda/dispatcher.py +6 -2
- numba_cuda/numba/cuda/errors.py +10 -0
- numba_cuda/numba/cuda/extending.py +4 -1
- numba_cuda/numba/cuda/flags.py +2 -0
- numba_cuda/numba/cuda/fp16.py +3 -0
- numba_cuda/numba/cuda/initialize.py +4 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -0
- numba_cuda/numba/cuda/intrinsics.py +3 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +3 -0
- numba_cuda/numba/cuda/kernels/transpose.py +3 -0
- numba_cuda/numba/cuda/libdevice.py +4 -0
- numba_cuda/numba/cuda/libdevicedecl.py +4 -1
- numba_cuda/numba/cuda/libdevicefuncs.py +4 -1
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -0
- numba_cuda/numba/cuda/locks.py +3 -0
- numba_cuda/numba/cuda/lowering.py +53 -16
- numba_cuda/numba/cuda/mathimpl.py +3 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +3 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +5 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +5 -1
- numba_cuda/numba/cuda/models.py +3 -0
- numba_cuda/numba/cuda/nvvmutils.py +3 -0
- numba_cuda/numba/cuda/printimpl.py +3 -0
- numba_cuda/numba/cuda/random.py +3 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +5 -0
- numba_cuda/numba/cuda/serialize.py +3 -0
- numba_cuda/numba/cuda/simulator/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +4 -1
- numba_cuda/numba/cuda/simulator/bf16.py +3 -0
- numba_cuda/numba/cuda/simulator/compiler.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +3 -7
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +4 -0
- numba_cuda/numba/cuda/simulator/kernel.py +3 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +3 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +3 -0
- numba_cuda/numba/cuda/simulator/reduction.py +3 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +3 -0
- numba_cuda/numba/cuda/simulator_init.py +3 -0
- numba_cuda/numba/cuda/stubs.py +3 -0
- numba_cuda/numba/cuda/target.py +4 -2
- numba_cuda/numba/cuda/testing.py +7 -6
- numba_cuda/numba/cuda/tests/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/complex_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +7 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -4
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +149 -3
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +3 -4
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +23 -284
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +298 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +8 -1
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +3 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +5 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +5 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +5 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +5 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +4 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +4 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +4 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +4 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +4 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +4 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +4 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +4 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +4 -1
- numba_cuda/numba/cuda/tests/enum_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +4 -1
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +3 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +5 -2
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +4 -1
- numba_cuda/numba/cuda/tests/support.py +755 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +6 -3
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +6 -2
- numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu +5 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +5 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +5 -0
- numba_cuda/numba/cuda/types.py +3 -0
- numba_cuda/numba/cuda/typing/__init__.py +11 -0
- numba_cuda/numba/cuda/typing/templates.py +1448 -0
- numba_cuda/numba/cuda/ufuncs.py +3 -0
- numba_cuda/numba/cuda/utils.py +3 -0
- numba_cuda/numba/cuda/vector_types.py +6 -3
- numba_cuda/numba/cuda/vectorizers.py +3 -0
- {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.1.dist-info}/METADATA +25 -29
- numba_cuda-0.19.1.dist-info/RECORD +302 -0
- {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.1.dist-info}/licenses/LICENSE +1 -0
- numba_cuda-0.19.1.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda/numba/cuda/include/11/cuda_bf16.h +0 -3749
- numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +0 -2683
- numba_cuda/numba/cuda/include/11/cuda_fp16.h +0 -3794
- numba_cuda/numba/cuda/include/11/cuda_fp16.hpp +0 -2614
- numba_cuda-0.18.1.dist-info/RECORD +0 -296
- {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.1.dist-info}/WHEEL +0 -0
- {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from numba.core import types, typing
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def is_signature(sig):
|
|
8
|
+
"""
|
|
9
|
+
Return whether *sig* is a potentially valid signature
|
|
10
|
+
specification (for user-facing APIs).
|
|
11
|
+
"""
|
|
12
|
+
return isinstance(sig, (str, tuple, typing.Signature))
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _parse_signature_string(signature_str):
|
|
16
|
+
"""
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
signature_str : str
|
|
20
|
+
"""
|
|
21
|
+
# Just eval signature_str using the types submodules as globals
|
|
22
|
+
return eval(signature_str, {}, types.__dict__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def normalize_signature(sig):
|
|
26
|
+
"""
|
|
27
|
+
From *sig* (a signature specification), return a ``(args, return_type)``
|
|
28
|
+
tuple, where ``args`` itself is a tuple of types, and ``return_type``
|
|
29
|
+
can be None if not specified.
|
|
30
|
+
"""
|
|
31
|
+
if isinstance(sig, str):
|
|
32
|
+
parsed = _parse_signature_string(sig)
|
|
33
|
+
else:
|
|
34
|
+
parsed = sig
|
|
35
|
+
if isinstance(parsed, tuple):
|
|
36
|
+
args, return_type = parsed, None
|
|
37
|
+
elif isinstance(parsed, typing.Signature):
|
|
38
|
+
args, return_type = parsed.args, parsed.return_type
|
|
39
|
+
else:
|
|
40
|
+
raise TypeError(
|
|
41
|
+
"invalid signature: %r (type: %r) evaluates to %r "
|
|
42
|
+
"instead of tuple or Signature"
|
|
43
|
+
% (sig, sig.__class__.__name__, parsed.__class__.__name__)
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
def check_type(ty):
|
|
47
|
+
if not isinstance(ty, types.Type):
|
|
48
|
+
raise TypeError(
|
|
49
|
+
"invalid type in signature: expected a type "
|
|
50
|
+
"instance, got %r" % (ty,)
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
if return_type is not None:
|
|
54
|
+
check_type(return_type)
|
|
55
|
+
for ty in args:
|
|
56
|
+
check_type(ty)
|
|
57
|
+
|
|
58
|
+
return args, return_type
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import sys
|
|
2
5
|
import re
|
|
3
6
|
import os
|
|
@@ -148,7 +151,6 @@ def get_nvrtc_dso_path():
|
|
|
148
151
|
# Check for each version of the NVRTC DLL, preferring the most
|
|
149
152
|
# recent.
|
|
150
153
|
versions = (
|
|
151
|
-
"112" if IS_WIN32 else "11.2",
|
|
152
154
|
"120" if IS_WIN32 else "12",
|
|
153
155
|
"130" if IS_WIN32 else "13",
|
|
154
156
|
)
|
|
@@ -303,16 +305,16 @@ def get_nvidia_nvvm_ctk():
|
|
|
303
305
|
|
|
304
306
|
# Assume the existence of NVVM in the conda env implies that a CUDA toolkit
|
|
305
307
|
# conda package is installed.
|
|
308
|
+
if IS_WIN32:
|
|
309
|
+
# The path used on Windows
|
|
310
|
+
libdir = os.path.join(sys.prefix, "Library", "nvvm", _cudalib_path())
|
|
311
|
+
else:
|
|
312
|
+
# The path used on Linux is different to that on Windows
|
|
313
|
+
libdir = os.path.join(sys.prefix, "nvvm", _cudalib_path())
|
|
306
314
|
|
|
307
|
-
# First, try the location used on Linux and the Windows 11.x packages
|
|
308
|
-
libdir = os.path.join(sys.prefix, "nvvm", _cudalib_path())
|
|
309
315
|
if not os.path.exists(libdir) or not os.path.isdir(libdir):
|
|
310
|
-
# If
|
|
311
|
-
|
|
312
|
-
if not os.path.exists(libdir) or not os.path.isdir(libdir):
|
|
313
|
-
# If that doesn't exist either, assume we don't have the NVIDIA
|
|
314
|
-
# conda package
|
|
315
|
-
return
|
|
316
|
+
# If the path doesn't exist, we didn't find the NVIDIA conda package
|
|
317
|
+
return
|
|
316
318
|
|
|
317
319
|
paths = find_lib("nvvm", libdir=libdir)
|
|
318
320
|
if not paths:
|
|
@@ -346,15 +348,8 @@ def get_nvidia_static_cudalib_ctk():
|
|
|
346
348
|
if not nvvm_ctk:
|
|
347
349
|
return
|
|
348
350
|
|
|
349
|
-
if IS_WIN32 and ("Library" not in nvvm_ctk):
|
|
350
|
-
# Location specific to CUDA 11.x packages on Windows
|
|
351
|
-
dirs = ("Lib", "x64")
|
|
352
|
-
else:
|
|
353
|
-
# Linux, or Windows with CUDA 12.x packages
|
|
354
|
-
dirs = ("lib",)
|
|
355
|
-
|
|
356
351
|
env_dir = os.path.dirname(os.path.dirname(nvvm_ctk))
|
|
357
|
-
return os.path.join(env_dir,
|
|
352
|
+
return os.path.join(env_dir, "lib")
|
|
358
353
|
|
|
359
354
|
|
|
360
355
|
def get_cuda_home(*subdirs):
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
from numba.core import errors, types
|
|
2
5
|
from numba.core.typing.npydecl import (
|
|
3
6
|
parse_dtype,
|
|
@@ -9,7 +12,7 @@ from numba.core.typing.npydecl import (
|
|
|
9
12
|
math_operations,
|
|
10
13
|
bit_twiddling_functions,
|
|
11
14
|
)
|
|
12
|
-
from numba.
|
|
15
|
+
from numba.cuda.typing.templates import (
|
|
13
16
|
AttributeTemplate,
|
|
14
17
|
ConcreteTemplate,
|
|
15
18
|
AbstractTemplate,
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
"""
|
|
2
5
|
A CUDA ND Array is recognized by checking the __cuda_memory__ attribute
|
|
3
6
|
on the object. If it exists and evaluate to True, it must define shape,
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
"""
|
|
2
5
|
CUDA driver bridge implementation
|
|
3
6
|
|
|
@@ -54,12 +57,6 @@ from .linkable_code import LinkableCode, LTOIR, Fatbin, Object
|
|
|
54
57
|
from numba.cuda.utils import cached_file_read
|
|
55
58
|
from numba.cuda.cudadrv import enums, drvapi, nvrtc
|
|
56
59
|
|
|
57
|
-
try:
|
|
58
|
-
from pynvjitlink.api import NvJitLinker, NvJitLinkError
|
|
59
|
-
except ImportError:
|
|
60
|
-
NvJitLinker, NvJitLinkError = None, None
|
|
61
|
-
|
|
62
|
-
|
|
63
60
|
USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
|
|
64
61
|
|
|
65
62
|
if USE_NV_BINDING:
|
|
@@ -640,7 +637,7 @@ class Device(object):
|
|
|
640
637
|
|
|
641
638
|
if USE_NV_BINDING:
|
|
642
639
|
buf = driver.cuDeviceGetName(bufsz, self.id)
|
|
643
|
-
name = buf.
|
|
640
|
+
name = buf.split(b"\x00")[0]
|
|
644
641
|
else:
|
|
645
642
|
buf = (c_char * bufsz)()
|
|
646
643
|
driver.cuDeviceGetName(buf, bufsz, self.id)
|
|
@@ -2808,19 +2805,10 @@ class _LinkerBase(metaclass=ABCMeta):
|
|
|
2808
2805
|
lto=None,
|
|
2809
2806
|
additional_flags=None,
|
|
2810
2807
|
):
|
|
2811
|
-
|
|
2812
|
-
|
|
2813
|
-
if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
|
|
2814
|
-
linker = MVCLinker
|
|
2815
|
-
elif USE_NV_BINDING:
|
|
2816
|
-
linker = _Linker
|
|
2817
|
-
else:
|
|
2818
|
-
linker = CtypesLinker
|
|
2808
|
+
if USE_NV_BINDING:
|
|
2809
|
+
linker = _Linker
|
|
2819
2810
|
else:
|
|
2820
|
-
|
|
2821
|
-
linker = _Linker
|
|
2822
|
-
else:
|
|
2823
|
-
linker = CtypesLinker
|
|
2811
|
+
linker = CtypesLinker
|
|
2824
2812
|
|
|
2825
2813
|
params = (max_registers, lineinfo, cc)
|
|
2826
2814
|
if linker is _Linker:
|
|
@@ -1,9 +1,11 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
"""CUDA Toolkit libraries lookup utilities.
|
|
2
5
|
|
|
3
6
|
CUDA Toolkit libraries can be available via either:
|
|
4
7
|
|
|
5
|
-
- the `cuda-nvcc` and `cuda-nvrtc` conda packages
|
|
6
|
-
- the `cudatoolkit` conda package for CUDA 11,
|
|
8
|
+
- the `cuda-nvcc` and `cuda-nvrtc` conda packages,
|
|
7
9
|
- a user supplied location from CUDA_HOME,
|
|
8
10
|
- a system wide location,
|
|
9
11
|
- package-specific locations (e.g. the Debian NVIDIA packages),
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
from ctypes import byref, c_char, c_char_p, c_int, c_size_t, c_void_p, POINTER
|
|
2
5
|
from enum import IntEnum
|
|
3
6
|
from numba.cuda.cudadrv.error import (
|
|
@@ -29,6 +32,7 @@ nvrtc_program = c_void_p
|
|
|
29
32
|
nvrtc_result = c_int
|
|
30
33
|
|
|
31
34
|
if config.CUDA_USE_NVIDIA_BINDING:
|
|
35
|
+
from cuda.bindings import nvrtc as bindings_nvrtc
|
|
32
36
|
from cuda.core.experimental import Program, ProgramOptions
|
|
33
37
|
|
|
34
38
|
|
|
@@ -142,6 +146,10 @@ class NVRTC:
|
|
|
142
146
|
|
|
143
147
|
def __new__(cls):
|
|
144
148
|
with _nvrtc_lock:
|
|
149
|
+
if config.CUDA_USE_NVIDIA_BINDING:
|
|
150
|
+
raise RuntimeError(
|
|
151
|
+
"NVRTC objects should not be used with cuda-python bindings"
|
|
152
|
+
)
|
|
145
153
|
if cls.__INSTANCE is None:
|
|
146
154
|
from numba.cuda.cudadrv.libs import open_cudalib
|
|
147
155
|
|
|
@@ -154,16 +162,9 @@ class NVRTC:
|
|
|
154
162
|
|
|
155
163
|
# Find & populate functions
|
|
156
164
|
for name, proto in inst._PROTOTYPES.items():
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
func.argtypes = proto[1:]
|
|
161
|
-
except AttributeError:
|
|
162
|
-
if "LTOIR" in name:
|
|
163
|
-
# CUDA 11 does not have LTOIR functions; ignore
|
|
164
|
-
continue
|
|
165
|
-
else:
|
|
166
|
-
raise
|
|
165
|
+
func = getattr(lib, name)
|
|
166
|
+
func.restype = proto[0]
|
|
167
|
+
func.argtypes = proto[1:]
|
|
167
168
|
|
|
168
169
|
@functools.wraps(func)
|
|
169
170
|
def checked_call(*args, func=func, name=name):
|
|
@@ -303,32 +304,35 @@ def compile(src, name, cc, ltoir=False):
|
|
|
303
304
|
:return: The compiled PTX and compilation log
|
|
304
305
|
:rtype: tuple
|
|
305
306
|
"""
|
|
306
|
-
nvrtc = NVRTC()
|
|
307
|
-
program = nvrtc.create_program(src, name)
|
|
308
307
|
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
raise RuntimeError(
|
|
313
|
-
"Unsupported CUDA version. CUDA 11.2 or higher is required."
|
|
314
|
-
)
|
|
315
|
-
else:
|
|
316
|
-
supported_arch = nvrtc.get_supported_archs()
|
|
317
|
-
try:
|
|
318
|
-
found = max(filter(lambda v: v <= cc, [v for v in supported_arch]))
|
|
319
|
-
except ValueError:
|
|
308
|
+
if config.CUDA_USE_NVIDIA_BINDING:
|
|
309
|
+
retcode, *version = bindings_nvrtc.nvrtcVersion()
|
|
310
|
+
if retcode != bindings_nvrtc.nvrtcResult.NVRTC_SUCCESS:
|
|
320
311
|
raise RuntimeError(
|
|
321
|
-
f"
|
|
322
|
-
f"minimum supported by NVRTC {ver_str(version)}. Supported "
|
|
323
|
-
"compute capabilities are "
|
|
324
|
-
f"{', '.join([ver_str(v) for v in supported_arch])}."
|
|
312
|
+
f"{retcode.name} when calling nvrtcGetSupportedArchs()"
|
|
325
313
|
)
|
|
314
|
+
version = tuple(version)
|
|
315
|
+
else:
|
|
316
|
+
nvrtc = NVRTC()
|
|
317
|
+
version = nvrtc.get_version()
|
|
326
318
|
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
319
|
+
ver_str = lambda version: ".".join(str(v) for v in version)
|
|
320
|
+
supported_ccs = get_supported_ccs()
|
|
321
|
+
try:
|
|
322
|
+
found = max(filter(lambda v: v <= cc, [v for v in supported_ccs]))
|
|
323
|
+
except ValueError:
|
|
324
|
+
raise RuntimeError(
|
|
325
|
+
f"Device compute capability {ver_str(cc)} is less than the "
|
|
326
|
+
f"minimum supported by NVRTC {ver_str(version)}. Supported "
|
|
327
|
+
"compute capabilities are "
|
|
328
|
+
f"{', '.join([ver_str(v) for v in supported_ccs])}."
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
if found != cc:
|
|
332
|
+
warnings.warn(
|
|
333
|
+
f"Device compute capability {ver_str(cc)} is not supported by "
|
|
334
|
+
f"NVRTC {ver_str(version)}. Using {ver_str(found)} instead."
|
|
335
|
+
)
|
|
332
336
|
|
|
333
337
|
# Compilation options:
|
|
334
338
|
# - Compile for the current device's compute capability.
|
|
@@ -348,16 +352,10 @@ def compile(src, name, cc, ltoir=False):
|
|
|
348
352
|
f"{os.path.join(cuda_include_dir, 'cccl')}",
|
|
349
353
|
]
|
|
350
354
|
|
|
351
|
-
nvrtc_version = nvrtc.get_version()
|
|
352
|
-
nvrtc_ver_major = nvrtc_version[0]
|
|
353
|
-
|
|
354
355
|
cudadrv_path = os.path.dirname(os.path.abspath(__file__))
|
|
355
356
|
numba_cuda_path = os.path.dirname(cudadrv_path)
|
|
356
357
|
|
|
357
|
-
|
|
358
|
-
numba_include = f"{os.path.join(numba_cuda_path, 'include', '11')}"
|
|
359
|
-
else:
|
|
360
|
-
numba_include = f"{os.path.join(numba_cuda_path, 'include', '12')}"
|
|
358
|
+
numba_include = f"{os.path.join(numba_cuda_path, 'include', '12')}"
|
|
361
359
|
|
|
362
360
|
if config.CUDA_NVRTC_EXTRA_SEARCH_PATHS:
|
|
363
361
|
extra_includes = config.CUDA_NVRTC_EXTRA_SEARCH_PATHS.split(":")
|
|
@@ -373,7 +371,6 @@ def compile(src, name, cc, ltoir=False):
|
|
|
373
371
|
arch=arch,
|
|
374
372
|
include_path=includes,
|
|
375
373
|
relocatable_device_code=True,
|
|
376
|
-
std="c++17" if nvrtc_version < (12, 0) else None,
|
|
377
374
|
link_time_optimization=ltoir,
|
|
378
375
|
name=name,
|
|
379
376
|
)
|
|
@@ -399,6 +396,7 @@ def compile(src, name, cc, ltoir=False):
|
|
|
399
396
|
return result, log
|
|
400
397
|
|
|
401
398
|
else:
|
|
399
|
+
program = nvrtc.create_program(src, name)
|
|
402
400
|
includes = [f"-I{path}" for path in includes]
|
|
403
401
|
options = [
|
|
404
402
|
arch,
|
|
@@ -410,9 +408,6 @@ def compile(src, name, cc, ltoir=False):
|
|
|
410
408
|
if ltoir:
|
|
411
409
|
options.append("-dlto")
|
|
412
410
|
|
|
413
|
-
if nvrtc_version < (12, 0):
|
|
414
|
-
options.append("-std=c++17")
|
|
415
|
-
|
|
416
411
|
# Compile the program
|
|
417
412
|
compile_error = nvrtc.compile_program(program, options)
|
|
418
413
|
|
|
@@ -482,4 +477,12 @@ def get_lowest_supported_cc():
|
|
|
482
477
|
|
|
483
478
|
|
|
484
479
|
def get_supported_ccs():
|
|
485
|
-
|
|
480
|
+
if config.CUDA_USE_NVIDIA_BINDING:
|
|
481
|
+
retcode, archs = bindings_nvrtc.nvrtcGetSupportedArchs()
|
|
482
|
+
if retcode != bindings_nvrtc.nvrtcResult.NVRTC_SUCCESS:
|
|
483
|
+
raise RuntimeError(
|
|
484
|
+
f"{retcode.name} when calling nvrtcGetSupportedArchs()"
|
|
485
|
+
)
|
|
486
|
+
return [(arch // 10, arch % 10) for arch in archs]
|
|
487
|
+
else:
|
|
488
|
+
return NVRTC().get_supported_archs()
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
"""
|
|
2
5
|
This is a direct translation of nvvm.h
|
|
3
6
|
"""
|
|
@@ -47,14 +50,7 @@ NVVM_ERROR_COMPILATION
|
|
|
47
50
|
for i, k in enumerate(RESULT_CODE_NAMES):
|
|
48
51
|
setattr(sys.modules[__name__], k, i)
|
|
49
52
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
_datalayout_original = (
|
|
53
|
-
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
|
|
54
|
-
"i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-"
|
|
55
|
-
"v64:64:64-v128:128:128-n16:32:64"
|
|
56
|
-
)
|
|
57
|
-
_datalayout_i128 = (
|
|
53
|
+
_datalayout = (
|
|
58
54
|
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
|
|
59
55
|
"i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-"
|
|
60
56
|
"v64:64:64-v128:128:128-n16:32:64"
|
|
@@ -182,10 +178,7 @@ class NVVM(object):
|
|
|
182
178
|
|
|
183
179
|
@property
|
|
184
180
|
def data_layout(self):
|
|
185
|
-
|
|
186
|
-
return _datalayout_original
|
|
187
|
-
else:
|
|
188
|
-
return _datalayout_i128
|
|
181
|
+
return _datalayout
|
|
189
182
|
|
|
190
183
|
def get_version(self):
|
|
191
184
|
major = c_int()
|
|
@@ -346,14 +339,9 @@ class CompilationUnit(object):
|
|
|
346
339
|
|
|
347
340
|
|
|
348
341
|
MISSING_LIBDEVICE_FILE_MSG = """Missing libdevice file.
|
|
349
|
-
|
|
350
|
-
For CUDA 12, ``cuda-nvcc`` and ``cuda-nvrtc`` are required:
|
|
342
|
+
``cuda-nvcc`` and ``cuda-nvrtc`` are required:
|
|
351
343
|
|
|
352
344
|
$ conda install -c conda-forge cuda-nvcc cuda-nvrtc "cuda-version>=12.0"
|
|
353
|
-
|
|
354
|
-
For CUDA 11, ``cudatoolkit`` is required:
|
|
355
|
-
|
|
356
|
-
$ conda install -c conda-forge cudatoolkit "cuda-version>=11.2,<12.0"
|
|
357
345
|
"""
|
|
358
346
|
|
|
359
347
|
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
"""
|
|
2
5
|
Former CUDA Runtime wrapper.
|
|
3
6
|
|
|
@@ -5,12 +8,23 @@ The toolkit version can now be obtained from NVRTC, so we don't use a binding
|
|
|
5
8
|
to the runtime anymore. This file is provided to maintain the existing API.
|
|
6
9
|
"""
|
|
7
10
|
|
|
11
|
+
from numba import config
|
|
8
12
|
from numba.cuda.cudadrv.nvrtc import NVRTC
|
|
9
13
|
|
|
10
14
|
|
|
11
15
|
class Runtime:
|
|
12
16
|
def get_version(self):
|
|
13
|
-
|
|
17
|
+
if config.CUDA_USE_NVIDIA_BINDING:
|
|
18
|
+
from cuda.bindings import nvrtc
|
|
19
|
+
|
|
20
|
+
retcode, *version = nvrtc.nvrtcVersion()
|
|
21
|
+
if retcode != nvrtc.nvrtcResult.NVRTC_SUCCESS:
|
|
22
|
+
raise RuntimeError(
|
|
23
|
+
f"{retcode.name} when calling nvrtcGetVersion()"
|
|
24
|
+
)
|
|
25
|
+
return tuple(version)
|
|
26
|
+
else:
|
|
27
|
+
return NVRTC().get_version()
|
|
14
28
|
|
|
15
29
|
|
|
16
30
|
runtime = Runtime()
|
|
@@ -1,6 +1,9 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import math
|
|
2
5
|
from numba.core import types
|
|
3
|
-
from numba.
|
|
6
|
+
from numba.cuda.typing.templates import ConcreteTemplate, signature, Registry
|
|
4
7
|
|
|
5
8
|
|
|
6
9
|
registry = Registry()
|
|
@@ -1,7 +1,11 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
from warnings import warn
|
|
2
|
-
from numba.core import types, config
|
|
5
|
+
from numba.core import types, config
|
|
3
6
|
from numba.core.errors import DeprecationError, NumbaInvalidConfigWarning
|
|
4
7
|
from numba.cuda.compiler import declare_device_function
|
|
8
|
+
from numba.cuda.core import sigutils
|
|
5
9
|
from numba.cuda.dispatcher import CUDADispatcher
|
|
6
10
|
from numba.cuda.simulator.kernel import FakeCUDAKernel
|
|
7
11
|
from numba.cuda.cudadrv.driver import _have_nvjitlink
|
|
@@ -86,7 +90,7 @@ def jit(
|
|
|
86
90
|
number of threads per block.
|
|
87
91
|
:type launch_bounds: int | tuple[int]
|
|
88
92
|
:param lto: Whether to enable LTO. If unspecified, LTO is enabled by
|
|
89
|
-
default when
|
|
93
|
+
default when nvjitlink is available, except for kernels where
|
|
90
94
|
``debug=True``.
|
|
91
95
|
:type lto: bool
|
|
92
96
|
"""
|
|
@@ -143,7 +147,7 @@ def jit(
|
|
|
143
147
|
raise ValueError("link keyword invalid for device function")
|
|
144
148
|
|
|
145
149
|
if lto is None:
|
|
146
|
-
# Default to using LTO if
|
|
150
|
+
# Default to using LTO if nvjitlink is available and we're not debugging
|
|
147
151
|
lto = _have_nvjitlink() and not debug
|
|
148
152
|
else:
|
|
149
153
|
if lto and not _have_nvjitlink():
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
from numba.core.descriptors import TargetDescriptor
|
|
2
5
|
from numba.core.options import TargetOptions
|
|
3
6
|
from .target import CUDATargetContext, CUDATypingContext
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
"""
|
|
2
5
|
Implements custom ufunc dispatch mechanism for non-CPU devices.
|
|
3
6
|
"""
|
|
@@ -11,8 +14,9 @@ from functools import reduce
|
|
|
11
14
|
import numpy as np
|
|
12
15
|
|
|
13
16
|
from numba.np.ufunc.ufuncbuilder import _BaseUFuncBuilder, parse_identity
|
|
14
|
-
from numba.core import types
|
|
17
|
+
from numba.core import types
|
|
15
18
|
from numba.core.typing import signature
|
|
19
|
+
from numba.cuda.core import sigutils
|
|
16
20
|
from numba.np.ufunc.sigparse import parse_signature
|
|
17
21
|
|
|
18
22
|
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import numpy as np
|
|
2
5
|
import os
|
|
3
6
|
import sys
|
|
@@ -8,13 +11,13 @@ import types as pytypes
|
|
|
8
11
|
import weakref
|
|
9
12
|
import uuid
|
|
10
13
|
|
|
11
|
-
from numba.core import compiler,
|
|
14
|
+
from numba.core import compiler, types, typing, config
|
|
12
15
|
from numba.cuda import serialize, utils
|
|
13
16
|
from numba.cuda.core.caching import Cache, CacheImpl, NullCache
|
|
14
17
|
from numba.core.compiler_lock import global_compiler_lock
|
|
15
18
|
from numba.core.dispatcher import _DispatcherBase
|
|
16
19
|
from numba.core.errors import NumbaPerformanceWarning, TypingError
|
|
17
|
-
from numba.
|
|
20
|
+
from numba.cuda.typing.templates import fold_arguments
|
|
18
21
|
from numba.core.typing.typeof import Purpose, typeof
|
|
19
22
|
from numba.cuda.api import get_current_device
|
|
20
23
|
from numba.cuda.args import wrap_arg
|
|
@@ -23,6 +26,7 @@ from numba.cuda.compiler import (
|
|
|
23
26
|
CUDACompiler,
|
|
24
27
|
kernel_fixup,
|
|
25
28
|
)
|
|
29
|
+
from numba.cuda.core import sigutils
|
|
26
30
|
import re
|
|
27
31
|
from numba.cuda.cudadrv import driver, nvvm
|
|
28
32
|
from numba.cuda.cudadrv.linkable_code import LinkableCode
|