numba-cuda 0.19.0__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- _numba_cuda_redirector.pth +3 -0
- _numba_cuda_redirector.py +3 -0
- numba_cuda/VERSION +1 -1
- numba_cuda/__init__.py +2 -1
- numba_cuda/_version.py +2 -13
- numba_cuda/numba/cuda/__init__.py +4 -1
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +12708 -1469
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +2656 -8769
- numba_cuda/numba/cuda/api.py +9 -1
- numba_cuda/numba/cuda/api_util.py +3 -0
- numba_cuda/numba/cuda/args.py +3 -0
- numba_cuda/numba/cuda/bf16.py +288 -2
- numba_cuda/numba/cuda/cg.py +3 -0
- numba_cuda/numba/cuda/cgutils.py +5 -2
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +4 -1
- numba_cuda/numba/cuda/compiler.py +376 -30
- numba_cuda/numba/cuda/core/analysis.py +319 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
- numba_cuda/numba/cuda/core/base.py +1289 -0
- numba_cuda/numba/cuda/core/bytecode.py +727 -0
- numba_cuda/numba/cuda/core/caching.py +5 -2
- numba_cuda/numba/cuda/core/callconv.py +3 -0
- numba_cuda/numba/cuda/core/codegen.py +3 -0
- numba_cuda/numba/cuda/core/compiler.py +9 -14
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +747 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/cpu.py +370 -0
- numba_cuda/numba/cuda/core/environment.py +68 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
- numba_cuda/numba/cuda/core/interpreter.py +52 -27
- numba_cuda/numba/cuda/core/ir_utils.py +17 -29
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
- numba_cuda/numba/cuda/core/sigutils.py +3 -0
- numba_cuda/numba/cuda/core/ssa.py +496 -0
- numba_cuda/numba/cuda/core/targetconfig.py +329 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +952 -0
- numba_cuda/numba/cuda/core/typed_passes.py +741 -7
- numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
- numba_cuda/numba/cuda/cuda_paths.py +425 -246
- numba_cuda/numba/cuda/cudadecl.py +4 -1
- numba_cuda/numba/cuda/cudadrv/__init__.py +4 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +5 -1
- numba_cuda/numba/cuda/cudadrv/devices.py +3 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +14 -140
- numba_cuda/numba/cuda/cudadrv/drvapi.py +3 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +114 -24
- numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
- numba_cuda/numba/cuda/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +8 -5
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +3 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +4 -1
- numba_cuda/numba/cuda/cudadrv/ndarray.py +3 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +22 -8
- numba_cuda/numba/cuda/cudadrv/nvvm.py +4 -4
- numba_cuda/numba/cuda/cudadrv/rtapi.py +3 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +4 -1
- numba_cuda/numba/cuda/cudaimpl.py +8 -1
- numba_cuda/numba/cuda/cudamath.py +3 -0
- numba_cuda/numba/cuda/debuginfo.py +88 -2
- numba_cuda/numba/cuda/decorators.py +6 -3
- numba_cuda/numba/cuda/descriptor.py +6 -4
- numba_cuda/numba/cuda/device_init.py +3 -0
- numba_cuda/numba/cuda/deviceufunc.py +69 -2
- numba_cuda/numba/cuda/dispatcher.py +21 -39
- numba_cuda/numba/cuda/errors.py +10 -0
- numba_cuda/numba/cuda/extending.py +3 -0
- numba_cuda/numba/cuda/flags.py +143 -1
- numba_cuda/numba/cuda/fp16.py +3 -2
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +4 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -0
- numba_cuda/numba/cuda/intrinsics.py +3 -0
- numba_cuda/numba/cuda/itanium_mangler.py +3 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +3 -0
- numba_cuda/numba/cuda/kernels/transpose.py +3 -0
- numba_cuda/numba/cuda/libdevice.py +4 -0
- numba_cuda/numba/cuda/libdevicedecl.py +3 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +3 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -0
- numba_cuda/numba/cuda/locks.py +3 -0
- numba_cuda/numba/cuda/lowering.py +59 -159
- numba_cuda/numba/cuda/mathimpl.py +5 -1
- numba_cuda/numba/cuda/memory_management/__init__.py +3 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +5 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +48 -18
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/models.py +12 -1
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
- numba_cuda/numba/cuda/np/numpy_support.py +553 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
- numba_cuda/numba/cuda/nvvmutils.py +4 -1
- numba_cuda/numba/cuda/printimpl.py +15 -1
- numba_cuda/numba/cuda/random.py +4 -1
- numba_cuda/numba/cuda/reshape_funcs.cu +5 -0
- numba_cuda/numba/cuda/serialize.py +4 -1
- numba_cuda/numba/cuda/simulator/__init__.py +4 -1
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +4 -1
- numba_cuda/numba/cuda/simulator/bf16.py +3 -0
- numba_cuda/numba/cuda/simulator/compiler.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +4 -1
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +4 -0
- numba_cuda/numba/cuda/simulator/kernel.py +3 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +4 -1
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +17 -2
- numba_cuda/numba/cuda/simulator/reduction.py +3 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +3 -0
- numba_cuda/numba/cuda/simulator_init.py +3 -0
- numba_cuda/numba/cuda/stubs.py +3 -0
- numba_cuda/numba/cuda/target.py +38 -17
- numba_cuda/numba/cuda/testing.py +7 -19
- numba_cuda/numba/cuda/tests/__init__.py +4 -1
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/complex_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +3 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +7 -4
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +9 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +21 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +5 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +5 -1
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +542 -2
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +84 -1
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +314 -3
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +5 -1
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +21 -8
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +13 -37
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +266 -2
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +115 -6
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +4 -1
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +3 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +5 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +5 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +5 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +5 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +6 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +3 -0
- numba_cuda/numba/cuda/tests/enum_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +6 -1
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +27 -12
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +3 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +5 -1
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +3 -0
- numba_cuda/numba/cuda/tests/support.py +58 -15
- numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +3 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -1
- numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu +5 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +5 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +5 -0
- numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
- numba_cuda/numba/cuda/types.py +59 -0
- numba_cuda/numba/cuda/typing/__init__.py +12 -1
- numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
- numba_cuda/numba/cuda/typing/context.py +751 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/npydecl.py +658 -0
- numba_cuda/numba/cuda/typing/templates.py +10 -14
- numba_cuda/numba/cuda/ufuncs.py +6 -3
- numba_cuda/numba/cuda/utils.py +9 -112
- numba_cuda/numba/cuda/vector_types.py +3 -0
- numba_cuda/numba/cuda/vectorizers.py +3 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/METADATA +6 -2
- numba_cuda-0.20.0.dist-info/RECORD +357 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE +1 -0
- numba_cuda-0.20.0.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -57
- numba_cuda-0.19.0.dist-info/RECORD +0 -301
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,8 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
from numba.core import errors, types
|
|
2
|
-
from numba.
|
|
5
|
+
from numba.cuda.typing.npydecl import (
|
|
3
6
|
parse_dtype,
|
|
4
7
|
parse_shape,
|
|
5
8
|
register_number_classes,
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
"""CUDA Driver
|
|
2
5
|
|
|
3
6
|
- Driver API binding
|
|
@@ -6,6 +9,6 @@
|
|
|
6
9
|
|
|
7
10
|
"""
|
|
8
11
|
|
|
9
|
-
from numba.core import config
|
|
12
|
+
from numba.cuda.core import config
|
|
10
13
|
|
|
11
14
|
assert not config.ENABLE_CUDASIM, "Cannot use real driver API with simulator"
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
"""
|
|
2
5
|
A CUDA ND Array is recognized by checking the __cuda_memory__ attribute
|
|
3
6
|
on the object. If it exists and evaluate to True, it must define shape,
|
|
@@ -16,7 +19,8 @@ import numba
|
|
|
16
19
|
from numba import _devicearray
|
|
17
20
|
from numba.cuda.cudadrv import devices, dummyarray
|
|
18
21
|
from numba.cuda.cudadrv import driver as _driver
|
|
19
|
-
from numba.core import types
|
|
22
|
+
from numba.core import types
|
|
23
|
+
from numba.cuda.core import config
|
|
20
24
|
from numba.np.unsafe.ndarray import to_fixed_tuple
|
|
21
25
|
from numba.np.numpy_support import numpy_version
|
|
22
26
|
from numba.np import numpy_support
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
"""
|
|
2
5
|
CUDA driver bridge implementation
|
|
3
6
|
|
|
@@ -44,7 +47,7 @@ from collections import namedtuple, deque
|
|
|
44
47
|
|
|
45
48
|
|
|
46
49
|
from numba import mviewbuf
|
|
47
|
-
from numba.core import config
|
|
50
|
+
from numba.cuda.core import config
|
|
48
51
|
from numba.cuda import utils, serialize
|
|
49
52
|
from .error import CudaSupportError, CudaDriverError
|
|
50
53
|
from .drvapi import API_PROTOTYPES
|
|
@@ -79,12 +82,6 @@ _py_incref = ctypes.pythonapi.Py_IncRef
|
|
|
79
82
|
_py_decref.argtypes = [ctypes.py_object]
|
|
80
83
|
_py_incref.argtypes = [ctypes.py_object]
|
|
81
84
|
|
|
82
|
-
|
|
83
|
-
_MVC_ERROR_MESSAGE = (
|
|
84
|
-
"Minor version compatibility requires ptxcompiler and cubinlinker packages "
|
|
85
|
-
"to be available"
|
|
86
|
-
)
|
|
87
|
-
|
|
88
85
|
USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
|
|
89
86
|
|
|
90
87
|
if USE_NV_BINDING:
|
|
@@ -134,7 +131,7 @@ def _have_nvjitlink():
|
|
|
134
131
|
nvjitlink_internal._inspect_function_pointer("__nvJitLinkVersion")
|
|
135
132
|
!= 0
|
|
136
133
|
)
|
|
137
|
-
except NotSupportedError:
|
|
134
|
+
except (RuntimeError, NotSupportedError):
|
|
138
135
|
# no driver
|
|
139
136
|
return False
|
|
140
137
|
|
|
@@ -158,12 +155,6 @@ class CudaAPIError(CudaDriverError):
|
|
|
158
155
|
|
|
159
156
|
|
|
160
157
|
def locate_driver_and_loader():
|
|
161
|
-
envpath = config.CUDA_DRIVER
|
|
162
|
-
|
|
163
|
-
if envpath == "0":
|
|
164
|
-
# Force fail
|
|
165
|
-
_raise_driver_not_found()
|
|
166
|
-
|
|
167
158
|
# Determine DLL type
|
|
168
159
|
if sys.platform == "win32":
|
|
169
160
|
dlloader = ctypes.WinDLL
|
|
@@ -179,26 +170,11 @@ def locate_driver_and_loader():
|
|
|
179
170
|
dldir = ["/usr/lib", "/usr/lib64"]
|
|
180
171
|
dlnames = ["libcuda.so", "libcuda.so.1"]
|
|
181
172
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
"NUMBA_CUDA_DRIVER %s is not a valid path" % envpath
|
|
188
|
-
)
|
|
189
|
-
if not os.path.isfile(envpath):
|
|
190
|
-
raise ValueError(
|
|
191
|
-
"NUMBA_CUDA_DRIVER %s is not a valid file "
|
|
192
|
-
"path. Note it must be a filepath of the .so/"
|
|
193
|
-
".dll/.dylib or the driver" % envpath
|
|
194
|
-
)
|
|
195
|
-
candidates = [envpath]
|
|
196
|
-
else:
|
|
197
|
-
# First search for the name in the default library path.
|
|
198
|
-
# If that is not found, try the specific path.
|
|
199
|
-
candidates = dlnames + [
|
|
200
|
-
os.path.join(x, y) for x, y in product(dldir, dlnames)
|
|
201
|
-
]
|
|
173
|
+
# First search for the name in the default library path.
|
|
174
|
+
# If that is not found, try specific common paths.
|
|
175
|
+
candidates = dlnames + [
|
|
176
|
+
os.path.join(x, y) for x, y in product(dldir, dlnames)
|
|
177
|
+
]
|
|
202
178
|
|
|
203
179
|
return dlloader, candidates
|
|
204
180
|
|
|
@@ -234,9 +210,7 @@ def find_driver():
|
|
|
234
210
|
|
|
235
211
|
DRIVER_NOT_FOUND_MSG = """
|
|
236
212
|
CUDA driver library cannot be found.
|
|
237
|
-
|
|
238
|
-
try setting environment variable NUMBA_CUDA_DRIVER
|
|
239
|
-
with the file path of the CUDA driver shared library.
|
|
213
|
+
Ensure that a compatible NVIDIA driver is installed and available on your system path.
|
|
240
214
|
"""
|
|
241
215
|
|
|
242
216
|
DRIVER_LOAD_ERROR_MSG = """
|
|
@@ -2839,10 +2813,7 @@ class _LinkerBase(metaclass=ABCMeta):
|
|
|
2839
2813
|
def add_cu(self, cu, name):
|
|
2840
2814
|
"""Add CUDA source in a string to the link. The name of the source
|
|
2841
2815
|
file should be specified in `name`."""
|
|
2842
|
-
|
|
2843
|
-
dev = driver.get_device(ac.devnum)
|
|
2844
|
-
cc = dev.compute_capability
|
|
2845
|
-
ptx, log = nvrtc.compile(cu, name, cc)
|
|
2816
|
+
ptx, log = nvrtc.compile(cu, name, self.cc)
|
|
2846
2817
|
|
|
2847
2818
|
if config.DUMP_ASSEMBLY:
|
|
2848
2819
|
print(("ASSEMBLY %s" % name).center(80, "-"))
|
|
@@ -3006,10 +2977,7 @@ class _Linker(_LinkerBase):
|
|
|
3006
2977
|
self._object_codes.append(obj)
|
|
3007
2978
|
|
|
3008
2979
|
def add_cu(self, cu, name="<cudapy-cu>"):
|
|
3009
|
-
|
|
3010
|
-
dev = driver.get_device(ac.devnum)
|
|
3011
|
-
cc = dev.compute_capability
|
|
3012
|
-
obj, log = nvrtc.compile(cu, name, cc, ltoir=self.lto)
|
|
2980
|
+
obj, log = nvrtc.compile(cu, name, self.cc, ltoir=self.lto)
|
|
3013
2981
|
|
|
3014
2982
|
if not self.lto and config.DUMP_ASSEMBLY:
|
|
3015
2983
|
print(("ASSEMBLY %s" % name).center(80, "-"))
|
|
@@ -3096,101 +3064,6 @@ class _Linker(_LinkerBase):
|
|
|
3096
3064
|
return result
|
|
3097
3065
|
|
|
3098
3066
|
|
|
3099
|
-
class MVCLinker(_LinkerBase):
|
|
3100
|
-
"""
|
|
3101
|
-
Linker supporting Minor Version Compatibility, backed by the cubinlinker
|
|
3102
|
-
package.
|
|
3103
|
-
"""
|
|
3104
|
-
|
|
3105
|
-
def __init__(self, max_registers=None, lineinfo=False, cc=None):
|
|
3106
|
-
try:
|
|
3107
|
-
from cubinlinker import CubinLinker
|
|
3108
|
-
except ImportError as err:
|
|
3109
|
-
raise ImportError(_MVC_ERROR_MESSAGE) from err
|
|
3110
|
-
|
|
3111
|
-
if cc is None:
|
|
3112
|
-
raise RuntimeError(
|
|
3113
|
-
"MVCLinker requires Compute Capability to be "
|
|
3114
|
-
"specified, but cc is None"
|
|
3115
|
-
)
|
|
3116
|
-
|
|
3117
|
-
super().__init__(max_registers, lineinfo, cc)
|
|
3118
|
-
|
|
3119
|
-
arch = f"sm_{cc[0] * 10 + cc[1]}"
|
|
3120
|
-
ptx_compile_opts = ["--gpu-name", arch, "-c"]
|
|
3121
|
-
if max_registers:
|
|
3122
|
-
arg = f"--maxrregcount={max_registers}"
|
|
3123
|
-
ptx_compile_opts.append(arg)
|
|
3124
|
-
if lineinfo:
|
|
3125
|
-
ptx_compile_opts.append("--generate-line-info")
|
|
3126
|
-
self.ptx_compile_options = tuple(ptx_compile_opts)
|
|
3127
|
-
|
|
3128
|
-
self._linker = CubinLinker(f"--arch={arch}")
|
|
3129
|
-
|
|
3130
|
-
@property
|
|
3131
|
-
def info_log(self):
|
|
3132
|
-
return self._linker.info_log
|
|
3133
|
-
|
|
3134
|
-
@property
|
|
3135
|
-
def error_log(self):
|
|
3136
|
-
return self._linker.error_log
|
|
3137
|
-
|
|
3138
|
-
def add_ptx(self, ptx, name="<cudapy-ptx>"):
|
|
3139
|
-
try:
|
|
3140
|
-
from ptxcompiler import compile_ptx
|
|
3141
|
-
from cubinlinker import CubinLinkerError
|
|
3142
|
-
except ImportError as err:
|
|
3143
|
-
raise ImportError(_MVC_ERROR_MESSAGE) from err
|
|
3144
|
-
compile_result = compile_ptx(ptx.decode(), self.ptx_compile_options)
|
|
3145
|
-
try:
|
|
3146
|
-
self._linker.add_cubin(compile_result.compiled_program, name)
|
|
3147
|
-
except CubinLinkerError as e:
|
|
3148
|
-
raise LinkerError from e
|
|
3149
|
-
|
|
3150
|
-
def add_data(self, data, kind, name):
|
|
3151
|
-
msg = "Adding in-memory data unsupported in the MVC linker"
|
|
3152
|
-
raise LinkerError(msg)
|
|
3153
|
-
|
|
3154
|
-
def add_file(self, path, kind):
|
|
3155
|
-
try:
|
|
3156
|
-
from cubinlinker import CubinLinkerError
|
|
3157
|
-
except ImportError as err:
|
|
3158
|
-
raise ImportError(_MVC_ERROR_MESSAGE) from err
|
|
3159
|
-
|
|
3160
|
-
try:
|
|
3161
|
-
data = cached_file_read(path, how="rb")
|
|
3162
|
-
except FileNotFoundError:
|
|
3163
|
-
raise LinkerError(f"{path} not found")
|
|
3164
|
-
|
|
3165
|
-
name = pathlib.Path(path).name
|
|
3166
|
-
if kind == FILE_EXTENSION_MAP["cubin"]:
|
|
3167
|
-
fn = self._linker.add_cubin
|
|
3168
|
-
elif kind == FILE_EXTENSION_MAP["fatbin"]:
|
|
3169
|
-
fn = self._linker.add_fatbin
|
|
3170
|
-
elif kind == FILE_EXTENSION_MAP["a"]:
|
|
3171
|
-
raise LinkerError(f"Don't know how to link {kind}")
|
|
3172
|
-
elif kind == FILE_EXTENSION_MAP["ptx"]:
|
|
3173
|
-
return self.add_ptx(data, name)
|
|
3174
|
-
else:
|
|
3175
|
-
raise LinkerError(f"Don't know how to link {kind}")
|
|
3176
|
-
|
|
3177
|
-
try:
|
|
3178
|
-
fn(data, name)
|
|
3179
|
-
except CubinLinkerError as e:
|
|
3180
|
-
raise LinkerError from e
|
|
3181
|
-
|
|
3182
|
-
def complete(self):
|
|
3183
|
-
try:
|
|
3184
|
-
from cubinlinker import CubinLinkerError
|
|
3185
|
-
except ImportError as err:
|
|
3186
|
-
raise ImportError(_MVC_ERROR_MESSAGE) from err
|
|
3187
|
-
|
|
3188
|
-
try:
|
|
3189
|
-
return self._linker.complete()
|
|
3190
|
-
except CubinLinkerError as e:
|
|
3191
|
-
raise LinkerError from e
|
|
3192
|
-
|
|
3193
|
-
|
|
3194
3067
|
class CtypesLinker(_LinkerBase):
|
|
3195
3068
|
"""
|
|
3196
3069
|
Links for current device if no CC given
|
|
@@ -3215,6 +3088,7 @@ class CtypesLinker(_LinkerBase):
|
|
|
3215
3088
|
if lineinfo:
|
|
3216
3089
|
options[enums.CU_JIT_GENERATE_LINE_INFO] = c_void_p(1)
|
|
3217
3090
|
|
|
3091
|
+
self.cc = cc
|
|
3218
3092
|
if cc is None:
|
|
3219
3093
|
# No option value is needed, but we need something as a placeholder
|
|
3220
3094
|
options[enums.CU_JIT_TARGET_FROM_CUCONTEXT] = 1
|
|
@@ -1,26 +1,122 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
from collections import namedtuple
|
|
2
5
|
import itertools
|
|
3
6
|
import functools
|
|
4
7
|
import operator
|
|
5
|
-
import ctypes
|
|
6
|
-
|
|
7
|
-
import numpy as np
|
|
8
8
|
|
|
9
|
-
from numba import _helperlib
|
|
10
9
|
|
|
11
10
|
Extent = namedtuple("Extent", ["begin", "end"])
|
|
12
11
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
12
|
+
|
|
13
|
+
def attempt_nocopy_reshape(
|
|
14
|
+
nd, dims, strides, newnd, newdims, newstrides, itemsize, is_f_order
|
|
15
|
+
):
|
|
16
|
+
"""
|
|
17
|
+
Attempt to reshape an array without copying data.
|
|
18
|
+
|
|
19
|
+
This function should correctly handle all reshapes, including
|
|
20
|
+
axes of length 1. Zero strides should work but are untested.
|
|
21
|
+
|
|
22
|
+
If a copy is needed, returns 0
|
|
23
|
+
If no copy is needed, returns 1 and fills `newstrides`
|
|
24
|
+
with appropriate strides
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
olddims = []
|
|
28
|
+
oldstrides = []
|
|
29
|
+
oldnd = 0
|
|
30
|
+
|
|
31
|
+
# Remove axes with dimension 1 from the old array. They have no effect
|
|
32
|
+
# but would need special cases since their strides do not matter.
|
|
33
|
+
for oi in range(nd):
|
|
34
|
+
if dims[oi] != 1:
|
|
35
|
+
olddims.append(dims[oi])
|
|
36
|
+
oldstrides.append(strides[oi])
|
|
37
|
+
oldnd += 1
|
|
38
|
+
|
|
39
|
+
# Calculate total sizes
|
|
40
|
+
np_total = 1
|
|
41
|
+
for ni in range(newnd):
|
|
42
|
+
np_total *= newdims[ni]
|
|
43
|
+
|
|
44
|
+
op_total = 1
|
|
45
|
+
for oi in range(oldnd):
|
|
46
|
+
op_total *= olddims[oi]
|
|
47
|
+
|
|
48
|
+
if np_total != op_total:
|
|
49
|
+
# Different total sizes; no hope
|
|
50
|
+
return 0
|
|
51
|
+
|
|
52
|
+
if np_total == 0:
|
|
53
|
+
# Handle zero-sized arrays
|
|
54
|
+
# Just make the strides vaguely reasonable
|
|
55
|
+
# (they can have any value in theory).
|
|
56
|
+
for i in range(newnd):
|
|
57
|
+
newstrides[i] = itemsize
|
|
58
|
+
return 1
|
|
59
|
+
|
|
60
|
+
# oi to oj and ni to nj give the axis ranges currently worked with
|
|
61
|
+
oi = 0
|
|
62
|
+
oj = 1
|
|
63
|
+
ni = 0
|
|
64
|
+
nj = 1
|
|
65
|
+
|
|
66
|
+
while ni < newnd and oi < oldnd:
|
|
67
|
+
np = newdims[ni]
|
|
68
|
+
op = olddims[oi]
|
|
69
|
+
|
|
70
|
+
while np != op:
|
|
71
|
+
if np < op:
|
|
72
|
+
# Misses trailing 1s, these are handled later
|
|
73
|
+
np *= newdims[nj]
|
|
74
|
+
nj += 1
|
|
75
|
+
else:
|
|
76
|
+
op *= olddims[oj]
|
|
77
|
+
oj += 1
|
|
78
|
+
|
|
79
|
+
# Check whether the original axes can be combined
|
|
80
|
+
for ok in range(oi, oj - 1):
|
|
81
|
+
if is_f_order:
|
|
82
|
+
if oldstrides[ok + 1] != olddims[ok] * oldstrides[ok]:
|
|
83
|
+
# not contiguous enough
|
|
84
|
+
return 0
|
|
85
|
+
else:
|
|
86
|
+
# C order
|
|
87
|
+
if oldstrides[ok] != olddims[ok + 1] * oldstrides[ok + 1]:
|
|
88
|
+
# not contiguous enough
|
|
89
|
+
return 0
|
|
90
|
+
|
|
91
|
+
# Calculate new strides for all axes currently worked with
|
|
92
|
+
if is_f_order:
|
|
93
|
+
newstrides[ni] = oldstrides[oi]
|
|
94
|
+
for nk in range(ni + 1, nj):
|
|
95
|
+
newstrides[nk] = newstrides[nk - 1] * newdims[nk - 1]
|
|
96
|
+
else:
|
|
97
|
+
# C order
|
|
98
|
+
newstrides[nj - 1] = oldstrides[oj - 1]
|
|
99
|
+
for nk in range(nj - 1, ni, -1):
|
|
100
|
+
newstrides[nk - 1] = newstrides[nk] * newdims[nk]
|
|
101
|
+
|
|
102
|
+
ni = nj
|
|
103
|
+
nj += 1
|
|
104
|
+
oi = oj
|
|
105
|
+
oj += 1
|
|
106
|
+
|
|
107
|
+
# Set strides corresponding to trailing 1s of the new shape
|
|
108
|
+
if ni >= 1:
|
|
109
|
+
last_stride = newstrides[ni - 1]
|
|
110
|
+
else:
|
|
111
|
+
last_stride = itemsize
|
|
112
|
+
|
|
113
|
+
if is_f_order:
|
|
114
|
+
last_stride *= newdims[ni - 1]
|
|
115
|
+
|
|
116
|
+
for nk in range(ni, newnd):
|
|
117
|
+
newstrides[nk] = last_stride
|
|
118
|
+
|
|
119
|
+
return 1
|
|
24
120
|
|
|
25
121
|
|
|
26
122
|
class Dim(object):
|
|
@@ -330,18 +426,12 @@ class Array(object):
|
|
|
330
426
|
else:
|
|
331
427
|
raise AssertionError("unreachable")
|
|
332
428
|
else:
|
|
333
|
-
newstrides =
|
|
334
|
-
|
|
335
|
-
# need to keep these around in variables, not temporaries, so they
|
|
336
|
-
# don't get GC'ed before we call into the C code
|
|
337
|
-
olddims = np.array(self.shape, dtype=np.ctypeslib.c_intp)
|
|
338
|
-
oldstrides = np.array(self.strides, dtype=np.ctypeslib.c_intp)
|
|
339
|
-
newdims = np.array(newdims, dtype=np.ctypeslib.c_intp)
|
|
429
|
+
newstrides = [0] * newnd
|
|
340
430
|
|
|
341
431
|
if not attempt_nocopy_reshape(
|
|
342
432
|
oldnd,
|
|
343
|
-
|
|
344
|
-
|
|
433
|
+
self.shape,
|
|
434
|
+
self.strides,
|
|
345
435
|
newnd,
|
|
346
436
|
newdims,
|
|
347
437
|
newstrides,
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
"""CUDA Toolkit libraries lookup utilities.
|
|
2
5
|
|
|
3
6
|
CUDA Toolkit libraries can be available via either:
|
|
@@ -13,11 +16,11 @@ import os
|
|
|
13
16
|
import sys
|
|
14
17
|
import ctypes
|
|
15
18
|
|
|
16
|
-
from numba.misc.findlib import find_lib
|
|
19
|
+
from numba.cuda.misc.findlib import find_lib
|
|
17
20
|
from numba.cuda.cuda_paths import get_cuda_paths
|
|
18
21
|
from numba.cuda.cudadrv.driver import locate_driver_and_loader, load_driver
|
|
19
22
|
from numba.cuda.cudadrv.error import CudaSupportError
|
|
20
|
-
from numba.core import config
|
|
23
|
+
from numba.cuda.core import config
|
|
21
24
|
|
|
22
25
|
|
|
23
26
|
if sys.platform == "win32":
|
|
@@ -51,9 +54,9 @@ def get_cudalib(lib, static=False):
|
|
|
51
54
|
"""
|
|
52
55
|
if lib in {"nvrtc", "nvvm"}:
|
|
53
56
|
return get_cuda_paths()[lib].info or _dllnamepattern % lib
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
+
|
|
58
|
+
dir_type = "static_cudalib_dir" if static else "cudalib_dir"
|
|
59
|
+
libdir = get_cuda_paths()[dir_type].info
|
|
57
60
|
|
|
58
61
|
candidates = find_lib(lib, libdir, static=static)
|
|
59
62
|
namepattern = _staticnamepattern if static else _dllnamepattern
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
from ctypes import byref, c_char, c_char_p, c_int, c_size_t, c_void_p, POINTER
|
|
2
5
|
from enum import IntEnum
|
|
3
6
|
from numba.cuda.cudadrv.error import (
|
|
@@ -7,7 +10,7 @@ from numba.cuda.cudadrv.error import (
|
|
|
7
10
|
NvrtcCompilationError,
|
|
8
11
|
NvrtcSupportError,
|
|
9
12
|
)
|
|
10
|
-
from numba import config
|
|
13
|
+
from numba.cuda import config
|
|
11
14
|
from numba.cuda.cuda_paths import get_cuda_paths
|
|
12
15
|
from numba.cuda.utils import _readenv
|
|
13
16
|
|
|
@@ -18,8 +21,8 @@ import warnings
|
|
|
18
21
|
|
|
19
22
|
NVRTC_EXTRA_SEARCH_PATHS = _readenv(
|
|
20
23
|
"NUMBA_CUDA_NVRTC_EXTRA_SEARCH_PATHS", str, ""
|
|
21
|
-
) or getattr(config, "
|
|
22
|
-
if not hasattr(config, "
|
|
24
|
+
) or getattr(config, "CUDA_NVRTC_EXTRA_SEARCH_PATHS", "")
|
|
25
|
+
if not hasattr(config, "CUDA_NVRTC_EXTRA_SEARCH_PATHS"):
|
|
23
26
|
config.CUDA_NVRTC_EXTRA_SEARCH_PATHS = NVRTC_EXTRA_SEARCH_PATHS
|
|
24
27
|
|
|
25
28
|
# Opaque handle for compilation unit
|
|
@@ -344,15 +347,26 @@ def compile(src, name, cc, ltoir=False):
|
|
|
344
347
|
arch = f"--gpu-architecture=compute_{major}{minor}"
|
|
345
348
|
|
|
346
349
|
cuda_include_dir = get_cuda_paths()["include_dir"].info
|
|
347
|
-
cuda_includes = [
|
|
348
|
-
f"{cuda_include_dir}",
|
|
349
|
-
f"{os.path.join(cuda_include_dir, 'cccl')}",
|
|
350
|
-
]
|
|
350
|
+
cuda_includes = [f"{cuda_include_dir}"]
|
|
351
351
|
|
|
352
352
|
cudadrv_path = os.path.dirname(os.path.abspath(__file__))
|
|
353
353
|
numba_cuda_path = os.path.dirname(cudadrv_path)
|
|
354
354
|
|
|
355
|
-
|
|
355
|
+
nvrtc_ver_major = version[0]
|
|
356
|
+
if nvrtc_ver_major == 12:
|
|
357
|
+
numba_include = f"{os.path.join(numba_cuda_path, 'include', '12')}"
|
|
358
|
+
# For CUDA 12 wheels, `cuda_include_dir` is `site-packages/nvidia/cuda_runtime/include`
|
|
359
|
+
# We need to find CCCL at `site-packages/nvidia/cuda_cccl/include`
|
|
360
|
+
# For CUDA 12 conda / system install, CCCL is just in the `include` directory
|
|
361
|
+
cuda_includes.append(
|
|
362
|
+
f"{os.path.join(cuda_include_dir, '..', '..', 'cuda_cccl', 'include')}"
|
|
363
|
+
)
|
|
364
|
+
elif nvrtc_ver_major == 13:
|
|
365
|
+
numba_include = f"{os.path.join(numba_cuda_path, 'include', '13')}"
|
|
366
|
+
# For CUDA 13 wheels, `cuda_include_dir` is `site-packages/nvidia/cu13/include`
|
|
367
|
+
# We need to find CCCL at `site-packages/nvidia/cu13/include/cccl`
|
|
368
|
+
# For CUDA 13 conda / system install, CCCL is in the `include/cccl` directory
|
|
369
|
+
cuda_includes.append(f"{os.path.join(cuda_include_dir, 'cccl')}")
|
|
356
370
|
|
|
357
371
|
if config.CUDA_NVRTC_EXTRA_SEARCH_PATHS:
|
|
358
372
|
extra_includes = config.CUDA_NVRTC_EXTRA_SEARCH_PATHS.split(":")
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
"""
|
|
2
5
|
This is a direct translation of nvvm.h
|
|
3
6
|
"""
|
|
@@ -151,10 +154,7 @@ class NVVM(object):
|
|
|
151
154
|
inst.driver = open_cudalib("nvvm")
|
|
152
155
|
except OSError as e:
|
|
153
156
|
cls.__INSTANCE = None
|
|
154
|
-
errmsg =
|
|
155
|
-
"libNVVM cannot be found. Do `conda install "
|
|
156
|
-
"cudatoolkit`:\n%s"
|
|
157
|
-
)
|
|
157
|
+
errmsg = "libNVVM cannot be found. Please install the cuda-toolkit conda package:\n%s"
|
|
158
158
|
raise NvvmSupportError(errmsg % e)
|
|
159
159
|
|
|
160
160
|
# Find & populate functions
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
"""
|
|
2
5
|
Former CUDA Runtime wrapper.
|
|
3
6
|
|
|
@@ -5,7 +8,7 @@ The toolkit version can now be obtained from NVRTC, so we don't use a binding
|
|
|
5
8
|
to the runtime anymore. This file is provided to maintain the existing API.
|
|
6
9
|
"""
|
|
7
10
|
|
|
8
|
-
from numba import config
|
|
11
|
+
from numba.cuda import config
|
|
9
12
|
from numba.cuda.cudadrv.nvrtc import NVRTC
|
|
10
13
|
|
|
11
14
|
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
from functools import reduce
|
|
2
5
|
import operator
|
|
3
6
|
import math
|
|
@@ -7,7 +10,7 @@ from llvmlite import ir
|
|
|
7
10
|
import llvmlite.binding as ll
|
|
8
11
|
|
|
9
12
|
from numba.core.imputils import Registry
|
|
10
|
-
from numba.
|
|
13
|
+
from numba.cuda.typing.npydecl import parse_dtype
|
|
11
14
|
from numba.core.datamodel import models
|
|
12
15
|
from numba.core import types
|
|
13
16
|
from numba.cuda import cgutils
|
|
@@ -22,6 +25,10 @@ registry = Registry()
|
|
|
22
25
|
lower = registry.lower
|
|
23
26
|
lower_attr = registry.lower_getattr
|
|
24
27
|
lower_constant = registry.lower_constant
|
|
28
|
+
lower_getattr_generic = registry.lower_getattr_generic
|
|
29
|
+
lower_setattr = registry.lower_setattr
|
|
30
|
+
lower_setattr_generic = registry.lower_setattr_generic
|
|
31
|
+
lower_cast = registry.lower_cast
|
|
25
32
|
|
|
26
33
|
|
|
27
34
|
def initialize_dim3(builder, prefix):
|