numba-cuda 0.19.0__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- _numba_cuda_redirector.pth +3 -0
- _numba_cuda_redirector.py +3 -0
- numba_cuda/VERSION +1 -1
- numba_cuda/__init__.py +2 -1
- numba_cuda/_version.py +2 -13
- numba_cuda/numba/cuda/__init__.py +4 -1
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +12708 -1469
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +2656 -8769
- numba_cuda/numba/cuda/api.py +9 -1
- numba_cuda/numba/cuda/api_util.py +3 -0
- numba_cuda/numba/cuda/args.py +3 -0
- numba_cuda/numba/cuda/bf16.py +288 -2
- numba_cuda/numba/cuda/cg.py +3 -0
- numba_cuda/numba/cuda/cgutils.py +5 -2
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +4 -1
- numba_cuda/numba/cuda/compiler.py +376 -30
- numba_cuda/numba/cuda/core/analysis.py +319 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
- numba_cuda/numba/cuda/core/base.py +1289 -0
- numba_cuda/numba/cuda/core/bytecode.py +727 -0
- numba_cuda/numba/cuda/core/caching.py +5 -2
- numba_cuda/numba/cuda/core/callconv.py +3 -0
- numba_cuda/numba/cuda/core/codegen.py +3 -0
- numba_cuda/numba/cuda/core/compiler.py +9 -14
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +747 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/cpu.py +370 -0
- numba_cuda/numba/cuda/core/environment.py +68 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
- numba_cuda/numba/cuda/core/interpreter.py +52 -27
- numba_cuda/numba/cuda/core/ir_utils.py +17 -29
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
- numba_cuda/numba/cuda/core/sigutils.py +3 -0
- numba_cuda/numba/cuda/core/ssa.py +496 -0
- numba_cuda/numba/cuda/core/targetconfig.py +329 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +952 -0
- numba_cuda/numba/cuda/core/typed_passes.py +741 -7
- numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
- numba_cuda/numba/cuda/cuda_paths.py +425 -246
- numba_cuda/numba/cuda/cudadecl.py +4 -1
- numba_cuda/numba/cuda/cudadrv/__init__.py +4 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +5 -1
- numba_cuda/numba/cuda/cudadrv/devices.py +3 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +14 -140
- numba_cuda/numba/cuda/cudadrv/drvapi.py +3 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +114 -24
- numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
- numba_cuda/numba/cuda/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +8 -5
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +3 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +4 -1
- numba_cuda/numba/cuda/cudadrv/ndarray.py +3 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +22 -8
- numba_cuda/numba/cuda/cudadrv/nvvm.py +4 -4
- numba_cuda/numba/cuda/cudadrv/rtapi.py +3 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +4 -1
- numba_cuda/numba/cuda/cudaimpl.py +8 -1
- numba_cuda/numba/cuda/cudamath.py +3 -0
- numba_cuda/numba/cuda/debuginfo.py +88 -2
- numba_cuda/numba/cuda/decorators.py +6 -3
- numba_cuda/numba/cuda/descriptor.py +6 -4
- numba_cuda/numba/cuda/device_init.py +3 -0
- numba_cuda/numba/cuda/deviceufunc.py +69 -2
- numba_cuda/numba/cuda/dispatcher.py +21 -39
- numba_cuda/numba/cuda/errors.py +10 -0
- numba_cuda/numba/cuda/extending.py +3 -0
- numba_cuda/numba/cuda/flags.py +143 -1
- numba_cuda/numba/cuda/fp16.py +3 -2
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +4 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -0
- numba_cuda/numba/cuda/intrinsics.py +3 -0
- numba_cuda/numba/cuda/itanium_mangler.py +3 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +3 -0
- numba_cuda/numba/cuda/kernels/transpose.py +3 -0
- numba_cuda/numba/cuda/libdevice.py +4 -0
- numba_cuda/numba/cuda/libdevicedecl.py +3 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +3 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -0
- numba_cuda/numba/cuda/locks.py +3 -0
- numba_cuda/numba/cuda/lowering.py +59 -159
- numba_cuda/numba/cuda/mathimpl.py +5 -1
- numba_cuda/numba/cuda/memory_management/__init__.py +3 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +5 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +48 -18
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/models.py +12 -1
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
- numba_cuda/numba/cuda/np/numpy_support.py +553 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
- numba_cuda/numba/cuda/nvvmutils.py +4 -1
- numba_cuda/numba/cuda/printimpl.py +15 -1
- numba_cuda/numba/cuda/random.py +4 -1
- numba_cuda/numba/cuda/reshape_funcs.cu +5 -0
- numba_cuda/numba/cuda/serialize.py +4 -1
- numba_cuda/numba/cuda/simulator/__init__.py +4 -1
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +4 -1
- numba_cuda/numba/cuda/simulator/bf16.py +3 -0
- numba_cuda/numba/cuda/simulator/compiler.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +4 -1
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +4 -0
- numba_cuda/numba/cuda/simulator/kernel.py +3 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +4 -1
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +17 -2
- numba_cuda/numba/cuda/simulator/reduction.py +3 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +3 -0
- numba_cuda/numba/cuda/simulator_init.py +3 -0
- numba_cuda/numba/cuda/stubs.py +3 -0
- numba_cuda/numba/cuda/target.py +38 -17
- numba_cuda/numba/cuda/testing.py +7 -19
- numba_cuda/numba/cuda/tests/__init__.py +4 -1
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/complex_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +3 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +7 -4
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +9 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +21 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +5 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +5 -1
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +542 -2
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +84 -1
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +314 -3
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +5 -1
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +21 -8
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +13 -37
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +266 -2
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +115 -6
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +4 -1
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +3 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +5 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +5 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +5 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +5 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +6 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +3 -0
- numba_cuda/numba/cuda/tests/enum_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +6 -1
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +27 -12
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +3 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +5 -1
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +3 -0
- numba_cuda/numba/cuda/tests/support.py +58 -15
- numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +3 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -1
- numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu +5 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +5 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +5 -0
- numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
- numba_cuda/numba/cuda/types.py +59 -0
- numba_cuda/numba/cuda/typing/__init__.py +12 -1
- numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
- numba_cuda/numba/cuda/typing/context.py +751 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/npydecl.py +658 -0
- numba_cuda/numba/cuda/typing/templates.py +10 -14
- numba_cuda/numba/cuda/ufuncs.py +6 -3
- numba_cuda/numba/cuda/utils.py +9 -112
- numba_cuda/numba/cuda/vector_types.py +3 -0
- numba_cuda/numba/cuda/vectorizers.py +3 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/METADATA +6 -2
- numba_cuda-0.20.0.dist-info/RECORD +357 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE +1 -0
- numba_cuda-0.20.0.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -57
- numba_cuda-0.19.0.dist-info/RECORD +0 -301
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/top_level.txt +0 -0
|
@@ -1,12 +1,98 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import abc
|
|
1
5
|
import os
|
|
6
|
+
from contextlib import contextmanager
|
|
2
7
|
|
|
3
8
|
from llvmlite import ir
|
|
4
|
-
from numba.core import types
|
|
9
|
+
from numba.core import types
|
|
10
|
+
from numba.cuda.core import config
|
|
5
11
|
from numba.cuda import cgutils
|
|
6
12
|
from numba.core.datamodel.models import ComplexModel, UnionModel, UniTupleModel
|
|
7
|
-
from numba.core.debuginfo import AbstractDIBuilder
|
|
8
13
|
from numba.cuda.types import GridGroup
|
|
9
14
|
|
|
15
|
+
|
|
16
|
+
@contextmanager
|
|
17
|
+
def suspend_emission(builder):
|
|
18
|
+
"""Suspends the emission of debug_metadata for the duration of the context
|
|
19
|
+
managed block."""
|
|
20
|
+
ref = builder.debug_metadata
|
|
21
|
+
builder.debug_metadata = None
|
|
22
|
+
try:
|
|
23
|
+
yield
|
|
24
|
+
finally:
|
|
25
|
+
builder.debug_metadata = ref
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class AbstractDIBuilder(metaclass=abc.ABCMeta):
|
|
29
|
+
@abc.abstractmethod
|
|
30
|
+
def mark_variable(
|
|
31
|
+
self,
|
|
32
|
+
builder,
|
|
33
|
+
allocavalue,
|
|
34
|
+
name,
|
|
35
|
+
lltype,
|
|
36
|
+
size,
|
|
37
|
+
line,
|
|
38
|
+
datamodel=None,
|
|
39
|
+
argidx=None,
|
|
40
|
+
):
|
|
41
|
+
"""Emit debug info for the variable."""
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
@abc.abstractmethod
|
|
45
|
+
def mark_location(self, builder, line):
|
|
46
|
+
"""Emit source location information to the given IRBuilder."""
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
@abc.abstractmethod
|
|
50
|
+
def mark_subprogram(self, function, qualname, argnames, argtypes, line):
|
|
51
|
+
"""Emit source location information for the given function."""
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
@abc.abstractmethod
|
|
55
|
+
def initialize(self):
|
|
56
|
+
"""Initialize the debug info. An opportunity for the debuginfo to
|
|
57
|
+
prepare any necessary data structures.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
@abc.abstractmethod
|
|
61
|
+
def finalize(self):
|
|
62
|
+
"""Finalize the debuginfo by emitting all necessary metadata."""
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class DummyDIBuilder(AbstractDIBuilder):
|
|
67
|
+
def __init__(self, module, filepath, cgctx, directives_only):
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
def mark_variable(
|
|
71
|
+
self,
|
|
72
|
+
builder,
|
|
73
|
+
allocavalue,
|
|
74
|
+
name,
|
|
75
|
+
lltype,
|
|
76
|
+
size,
|
|
77
|
+
line,
|
|
78
|
+
datamodel=None,
|
|
79
|
+
argidx=None,
|
|
80
|
+
):
|
|
81
|
+
pass
|
|
82
|
+
|
|
83
|
+
def mark_location(self, builder, line):
|
|
84
|
+
pass
|
|
85
|
+
|
|
86
|
+
def mark_subprogram(self, function, qualname, argnames, argtypes, line):
|
|
87
|
+
pass
|
|
88
|
+
|
|
89
|
+
def initialize(self):
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
def finalize(self):
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
|
|
10
96
|
_BYTE_SIZE = 8
|
|
11
97
|
|
|
12
98
|
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
from warnings import warn
|
|
2
|
-
from numba.core import types
|
|
5
|
+
from numba.core import types
|
|
3
6
|
from numba.core.errors import DeprecationError, NumbaInvalidConfigWarning
|
|
4
7
|
from numba.cuda.compiler import declare_device_function
|
|
5
|
-
from numba.cuda.core import sigutils
|
|
8
|
+
from numba.cuda.core import sigutils, config
|
|
6
9
|
from numba.cuda.dispatcher import CUDADispatcher
|
|
7
10
|
from numba.cuda.simulator.kernel import FakeCUDAKernel
|
|
8
11
|
from numba.cuda.cudadrv.driver import _have_nvjitlink
|
|
@@ -196,7 +199,7 @@ def jit(
|
|
|
196
199
|
raise TypeError("CUDA kernel must have void return type.")
|
|
197
200
|
|
|
198
201
|
if device:
|
|
199
|
-
from numba.core import typeinfer
|
|
202
|
+
from numba.cuda.core import typeinfer
|
|
200
203
|
|
|
201
204
|
with typeinfer.register_dispatcher(disp):
|
|
202
205
|
disp.compile_device(argtypes, restype)
|
|
@@ -1,5 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from numba.cuda.core.options import TargetOptions
|
|
3
5
|
from .target import CUDATargetContext, CUDATypingContext
|
|
4
6
|
|
|
5
7
|
|
|
@@ -7,7 +9,7 @@ class CUDATargetOptions(TargetOptions):
|
|
|
7
9
|
pass
|
|
8
10
|
|
|
9
11
|
|
|
10
|
-
class CUDATarget
|
|
12
|
+
class CUDATarget:
|
|
11
13
|
def __init__(self, name):
|
|
12
14
|
self.options = CUDATargetOptions
|
|
13
15
|
# The typing and target contexts are initialized only when needed -
|
|
@@ -15,7 +17,7 @@ class CUDATarget(TargetDescriptor):
|
|
|
15
17
|
# systems that might not have them present.
|
|
16
18
|
self._typingctx = None
|
|
17
19
|
self._targetctx = None
|
|
18
|
-
|
|
20
|
+
self._target_name = name
|
|
19
21
|
|
|
20
22
|
@property
|
|
21
23
|
def typing_context(self):
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
"""
|
|
2
5
|
Implements custom ufunc dispatch mechanism for non-CPU devices.
|
|
3
6
|
"""
|
|
@@ -7,14 +10,78 @@ from collections import OrderedDict
|
|
|
7
10
|
import operator
|
|
8
11
|
import warnings
|
|
9
12
|
from functools import reduce
|
|
13
|
+
import tokenize
|
|
14
|
+
import string
|
|
10
15
|
|
|
11
16
|
import numpy as np
|
|
12
17
|
|
|
13
|
-
from numba.np.ufunc.ufuncbuilder import _BaseUFuncBuilder, parse_identity
|
|
18
|
+
from numba.cuda.np.ufunc.ufuncbuilder import _BaseUFuncBuilder, parse_identity
|
|
14
19
|
from numba.core import types
|
|
15
20
|
from numba.core.typing import signature
|
|
16
21
|
from numba.cuda.core import sigutils
|
|
17
|
-
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def parse_signature(sig):
|
|
25
|
+
"""Parse generalized ufunc signature.
|
|
26
|
+
|
|
27
|
+
NOTE: ',' (COMMA) is a delimiter; not separator.
|
|
28
|
+
This means trailing comma is legal.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def stripws(s):
|
|
32
|
+
return "".join(c for c in s if c not in string.whitespace)
|
|
33
|
+
|
|
34
|
+
def tokenizer(src):
|
|
35
|
+
def readline():
|
|
36
|
+
yield src
|
|
37
|
+
|
|
38
|
+
gen = readline()
|
|
39
|
+
return tokenize.generate_tokens(lambda: next(gen))
|
|
40
|
+
|
|
41
|
+
def parse(src):
|
|
42
|
+
tokgen = tokenizer(src)
|
|
43
|
+
while True:
|
|
44
|
+
tok = next(tokgen)
|
|
45
|
+
if tok[1] == "(":
|
|
46
|
+
symbols = []
|
|
47
|
+
while True:
|
|
48
|
+
tok = next(tokgen)
|
|
49
|
+
if tok[1] == ")":
|
|
50
|
+
break
|
|
51
|
+
elif tok[0] == tokenize.NAME:
|
|
52
|
+
symbols.append(tok[1])
|
|
53
|
+
elif tok[1] == ",":
|
|
54
|
+
continue
|
|
55
|
+
else:
|
|
56
|
+
raise ValueError('bad token in signature "%s"' % tok[1])
|
|
57
|
+
yield tuple(symbols)
|
|
58
|
+
tok = next(tokgen)
|
|
59
|
+
if tok[1] == ",":
|
|
60
|
+
continue
|
|
61
|
+
elif tokenize.ISEOF(tok[0]):
|
|
62
|
+
break
|
|
63
|
+
elif tokenize.ISEOF(tok[0]):
|
|
64
|
+
break
|
|
65
|
+
else:
|
|
66
|
+
raise ValueError('bad token in signature "%s"' % tok[1])
|
|
67
|
+
|
|
68
|
+
ins, _, outs = stripws(sig).partition("->")
|
|
69
|
+
inputs = list(parse(ins))
|
|
70
|
+
outputs = list(parse(outs))
|
|
71
|
+
|
|
72
|
+
# check that all output symbols are defined in the inputs
|
|
73
|
+
isym = set()
|
|
74
|
+
osym = set()
|
|
75
|
+
for grp in inputs:
|
|
76
|
+
isym |= set(grp)
|
|
77
|
+
for grp in outputs:
|
|
78
|
+
osym |= set(grp)
|
|
79
|
+
|
|
80
|
+
diff = osym.difference(isym)
|
|
81
|
+
if diff:
|
|
82
|
+
raise NameError("undefined output symbols: %s" % ",".join(sorted(diff)))
|
|
83
|
+
|
|
84
|
+
return inputs, outputs
|
|
18
85
|
|
|
19
86
|
|
|
20
87
|
def _broadcast_axis(a, b):
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import numpy as np
|
|
2
5
|
import os
|
|
3
6
|
import sys
|
|
@@ -7,40 +10,42 @@ import functools
|
|
|
7
10
|
import types as pytypes
|
|
8
11
|
import weakref
|
|
9
12
|
import uuid
|
|
13
|
+
import re
|
|
14
|
+
from warnings import warn
|
|
10
15
|
|
|
11
|
-
from numba
|
|
12
|
-
|
|
13
|
-
from numba.
|
|
16
|
+
from numba import cuda, _dispatcher
|
|
17
|
+
|
|
18
|
+
from numba.core import types
|
|
14
19
|
from numba.core.compiler_lock import global_compiler_lock
|
|
15
20
|
from numba.core.dispatcher import _DispatcherBase
|
|
16
21
|
from numba.core.errors import NumbaPerformanceWarning, TypingError
|
|
17
|
-
from numba.cuda.typing.templates import fold_arguments
|
|
18
22
|
from numba.core.typing.typeof import Purpose, typeof
|
|
23
|
+
|
|
24
|
+
from numba.cuda import serialize, utils, typing
|
|
25
|
+
from numba.cuda import types as cuda_types
|
|
19
26
|
from numba.cuda.api import get_current_device
|
|
20
27
|
from numba.cuda.args import wrap_arg
|
|
21
28
|
from numba.cuda.compiler import (
|
|
22
29
|
compile_cuda,
|
|
23
30
|
CUDACompiler,
|
|
24
31
|
kernel_fixup,
|
|
32
|
+
compile_extra,
|
|
25
33
|
)
|
|
26
|
-
from numba.cuda.core import sigutils
|
|
27
|
-
import
|
|
34
|
+
from numba.cuda.core import sigutils, config
|
|
35
|
+
from numba.cuda.flags import Flags
|
|
28
36
|
from numba.cuda.cudadrv import driver, nvvm
|
|
29
|
-
from numba.cuda.
|
|
30
|
-
from numba.cuda.
|
|
37
|
+
from numba.cuda.locks import module_init_lock
|
|
38
|
+
from numba.cuda.core.caching import Cache, CacheImpl, NullCache
|
|
31
39
|
from numba.cuda.descriptor import cuda_target
|
|
32
40
|
from numba.cuda.errors import (
|
|
33
41
|
missing_launch_config_msg,
|
|
34
42
|
normalize_kernel_dimensions,
|
|
35
43
|
)
|
|
36
|
-
from numba.cuda import
|
|
37
|
-
from numba.cuda.
|
|
44
|
+
from numba.cuda.typing.templates import fold_arguments
|
|
45
|
+
from numba.cuda.cudadrv.linkable_code import LinkableCode
|
|
46
|
+
from numba.cuda.cudadrv.devices import get_context
|
|
38
47
|
from numba.cuda.memory_management.nrt import rtsys, NRT_LIBRARY
|
|
39
48
|
|
|
40
|
-
from numba import cuda
|
|
41
|
-
from numba import _dispatcher
|
|
42
|
-
|
|
43
|
-
from warnings import warn
|
|
44
49
|
|
|
45
50
|
cuda_fp16_math_funcs = [
|
|
46
51
|
"hsin",
|
|
@@ -208,13 +213,11 @@ class _Kernel(serialize.ReduceMixin):
|
|
|
208
213
|
# The following are referred to by the cache implementation. Note:
|
|
209
214
|
# - There are no referenced environments in CUDA.
|
|
210
215
|
# - Kernels don't have lifted code.
|
|
211
|
-
# - reload_init is only for parfors.
|
|
212
216
|
self.target_context = tgt_ctx
|
|
213
217
|
self.fndesc = cres.fndesc
|
|
214
218
|
self.environment = cres.environment
|
|
215
219
|
self._referenced_environments = []
|
|
216
220
|
self.lifted = []
|
|
217
|
-
self.reload_init = []
|
|
218
221
|
|
|
219
222
|
def maybe_link_nrt(self, link, tgt_ctx, asm):
|
|
220
223
|
"""
|
|
@@ -832,12 +835,12 @@ class _FunctionCompiler(object):
|
|
|
832
835
|
return True, retval
|
|
833
836
|
|
|
834
837
|
def _compile_core(self, args, return_type):
|
|
835
|
-
flags =
|
|
838
|
+
flags = Flags()
|
|
836
839
|
self.targetdescr.options.parse_as_flags(flags, self.targetoptions)
|
|
837
840
|
flags = self._customize_flags(flags)
|
|
838
841
|
|
|
839
842
|
impl = self._get_implementation(args, {})
|
|
840
|
-
cres =
|
|
843
|
+
cres = compile_extra(
|
|
841
844
|
self.targetdescr.typing_context,
|
|
842
845
|
self.targetdescr.target_context,
|
|
843
846
|
impl,
|
|
@@ -1342,27 +1345,6 @@ class CUDADispatcher(serialize.ReduceMixin, _MemoMixin, _DispatcherBase):
|
|
|
1342
1345
|
cache_misses=self._cache_misses,
|
|
1343
1346
|
)
|
|
1344
1347
|
|
|
1345
|
-
def parallel_diagnostics(self, signature=None, level=1):
|
|
1346
|
-
"""
|
|
1347
|
-
Print parallel diagnostic information for the given signature. If no
|
|
1348
|
-
signature is present it is printed for all known signatures. level is
|
|
1349
|
-
used to adjust the verbosity, level=1 (default) is minimal verbosity,
|
|
1350
|
-
and 2, 3, and 4 provide increasing levels of verbosity.
|
|
1351
|
-
"""
|
|
1352
|
-
|
|
1353
|
-
def dump(sig):
|
|
1354
|
-
ol = self.overloads[sig]
|
|
1355
|
-
pfdiag = ol.metadata.get("parfor_diagnostics", None)
|
|
1356
|
-
if pfdiag is None:
|
|
1357
|
-
msg = "No parfors diagnostic available, is 'parallel=True' set?"
|
|
1358
|
-
raise ValueError(msg)
|
|
1359
|
-
pfdiag.dump(level)
|
|
1360
|
-
|
|
1361
|
-
if signature is not None:
|
|
1362
|
-
dump(signature)
|
|
1363
|
-
else:
|
|
1364
|
-
[dump(sig) for sig in self.signatures]
|
|
1365
|
-
|
|
1366
1348
|
def get_metadata(self, signature=None):
|
|
1367
1349
|
"""
|
|
1368
1350
|
Obtain the compilation metadata for a given signature.
|
numba_cuda/numba/cuda/errors.py
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import numbers
|
|
2
5
|
from numba.core.errors import LoweringError
|
|
3
6
|
|
|
@@ -12,6 +15,13 @@ class KernelRuntimeError(RuntimeError):
|
|
|
12
15
|
super(KernelRuntimeError, self).__init__(msg)
|
|
13
16
|
|
|
14
17
|
|
|
18
|
+
class UnsupportedBytecodeError(Exception):
|
|
19
|
+
"""Unsupported bytecode is non-recoverable"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, msg, loc=None):
|
|
22
|
+
super().__init__(f"{msg}. Raised from {loc}")
|
|
23
|
+
|
|
24
|
+
|
|
15
25
|
class CudaLoweringError(LoweringError):
|
|
16
26
|
pass
|
|
17
27
|
|
numba_cuda/numba/cuda/flags.py
CHANGED
|
@@ -1,4 +1,146 @@
|
|
|
1
|
-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from numba.cuda.core.targetconfig import TargetConfig, Option
|
|
5
|
+
|
|
6
|
+
from numba.cuda.core.options import (
|
|
7
|
+
ParallelOptions,
|
|
8
|
+
FastMathOptions,
|
|
9
|
+
InlineOptions,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Flags(TargetConfig):
|
|
14
|
+
__slots__ = ()
|
|
15
|
+
|
|
16
|
+
enable_looplift = Option(
|
|
17
|
+
type=bool,
|
|
18
|
+
default=False,
|
|
19
|
+
doc="Enable loop-lifting",
|
|
20
|
+
)
|
|
21
|
+
enable_pyobject = Option(
|
|
22
|
+
type=bool,
|
|
23
|
+
default=False,
|
|
24
|
+
doc="Enable pyobject mode (in general)",
|
|
25
|
+
)
|
|
26
|
+
enable_pyobject_looplift = Option(
|
|
27
|
+
type=bool,
|
|
28
|
+
default=False,
|
|
29
|
+
doc="Enable pyobject mode inside lifted loops",
|
|
30
|
+
)
|
|
31
|
+
enable_ssa = Option(
|
|
32
|
+
type=bool,
|
|
33
|
+
default=True,
|
|
34
|
+
doc="Enable SSA",
|
|
35
|
+
)
|
|
36
|
+
force_pyobject = Option(
|
|
37
|
+
type=bool,
|
|
38
|
+
default=False,
|
|
39
|
+
doc="Force pyobject mode inside the whole function",
|
|
40
|
+
)
|
|
41
|
+
release_gil = Option(
|
|
42
|
+
type=bool,
|
|
43
|
+
default=False,
|
|
44
|
+
doc="Release GIL inside the native function",
|
|
45
|
+
)
|
|
46
|
+
no_compile = Option(
|
|
47
|
+
type=bool,
|
|
48
|
+
default=False,
|
|
49
|
+
doc="TODO",
|
|
50
|
+
)
|
|
51
|
+
debuginfo = Option(
|
|
52
|
+
type=bool,
|
|
53
|
+
default=False,
|
|
54
|
+
doc="TODO",
|
|
55
|
+
)
|
|
56
|
+
boundscheck = Option(
|
|
57
|
+
type=bool,
|
|
58
|
+
default=False,
|
|
59
|
+
doc="TODO",
|
|
60
|
+
)
|
|
61
|
+
forceinline = Option(
|
|
62
|
+
type=bool,
|
|
63
|
+
default=False,
|
|
64
|
+
doc="Force inlining of the function. Overrides _dbg_optnone.",
|
|
65
|
+
)
|
|
66
|
+
no_cpython_wrapper = Option(
|
|
67
|
+
type=bool,
|
|
68
|
+
default=False,
|
|
69
|
+
doc="TODO",
|
|
70
|
+
)
|
|
71
|
+
no_cfunc_wrapper = Option(
|
|
72
|
+
type=bool,
|
|
73
|
+
default=False,
|
|
74
|
+
doc="TODO",
|
|
75
|
+
)
|
|
76
|
+
auto_parallel = Option(
|
|
77
|
+
type=ParallelOptions,
|
|
78
|
+
default=ParallelOptions(False),
|
|
79
|
+
doc="""Enable automatic parallel optimization, can be fine-tuned by
|
|
80
|
+
taking a dictionary of sub-options instead of a boolean, see parfor.py for
|
|
81
|
+
detail""",
|
|
82
|
+
)
|
|
83
|
+
nrt = Option(
|
|
84
|
+
type=bool,
|
|
85
|
+
default=False,
|
|
86
|
+
doc="TODO",
|
|
87
|
+
)
|
|
88
|
+
no_rewrites = Option(
|
|
89
|
+
type=bool,
|
|
90
|
+
default=False,
|
|
91
|
+
doc="TODO",
|
|
92
|
+
)
|
|
93
|
+
error_model = Option(
|
|
94
|
+
type=str,
|
|
95
|
+
default="python",
|
|
96
|
+
doc="TODO",
|
|
97
|
+
)
|
|
98
|
+
fastmath = Option(
|
|
99
|
+
type=FastMathOptions,
|
|
100
|
+
default=FastMathOptions(False),
|
|
101
|
+
doc="TODO",
|
|
102
|
+
)
|
|
103
|
+
noalias = Option(
|
|
104
|
+
type=bool,
|
|
105
|
+
default=False,
|
|
106
|
+
doc="TODO",
|
|
107
|
+
)
|
|
108
|
+
inline = Option(
|
|
109
|
+
type=InlineOptions,
|
|
110
|
+
default=InlineOptions("never"),
|
|
111
|
+
doc="TODO",
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
dbg_extend_lifetimes = Option(
|
|
115
|
+
type=bool,
|
|
116
|
+
default=False,
|
|
117
|
+
doc=(
|
|
118
|
+
"Extend variable lifetime for debugging. "
|
|
119
|
+
"This automatically turns on with debug=True."
|
|
120
|
+
),
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
dbg_optnone = Option(
|
|
124
|
+
type=bool,
|
|
125
|
+
default=False,
|
|
126
|
+
doc=(
|
|
127
|
+
"Disable optimization for debug. "
|
|
128
|
+
"Equivalent to adding optnone attribute in the LLVM Function."
|
|
129
|
+
),
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
dbg_directives_only = Option(
|
|
133
|
+
type=bool,
|
|
134
|
+
default=False,
|
|
135
|
+
doc=(
|
|
136
|
+
"Make debug emissions directives-only. "
|
|
137
|
+
"Used when generating lineinfo."
|
|
138
|
+
),
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
DEFAULT_FLAGS = Flags()
|
|
143
|
+
DEFAULT_FLAGS.nrt = True
|
|
2
144
|
|
|
3
145
|
|
|
4
146
|
def _nvvm_options_type(x):
|
numba_cuda/numba/cuda/fp16.py
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import numba.core.types as types
|
|
2
5
|
from numba.cuda._internal.cuda_fp16 import (
|
|
3
6
|
typing_registry,
|
|
@@ -70,7 +73,6 @@ from numba.cuda._internal.cuda_fp16 import (
|
|
|
70
73
|
__hadd,
|
|
71
74
|
__hadd_rn,
|
|
72
75
|
__hadd_sat,
|
|
73
|
-
__hcmadd,
|
|
74
76
|
__hdiv as hdiv,
|
|
75
77
|
__hdiv,
|
|
76
78
|
__heq as heq,
|
|
@@ -284,7 +286,6 @@ __all__ = [
|
|
|
284
286
|
"__hadd",
|
|
285
287
|
"__hadd_rn",
|
|
286
288
|
"__hadd_sat",
|
|
287
|
-
"__hcmadd",
|
|
288
289
|
"hdiv",
|
|
289
290
|
"__hdiv",
|
|
290
291
|
"heq",
|