numba-cuda 0.19.0__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- _numba_cuda_redirector.pth +3 -0
- _numba_cuda_redirector.py +3 -0
- numba_cuda/VERSION +1 -1
- numba_cuda/__init__.py +2 -1
- numba_cuda/_version.py +2 -13
- numba_cuda/numba/cuda/__init__.py +4 -1
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +12708 -1469
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +2656 -8769
- numba_cuda/numba/cuda/api.py +9 -1
- numba_cuda/numba/cuda/api_util.py +3 -0
- numba_cuda/numba/cuda/args.py +3 -0
- numba_cuda/numba/cuda/bf16.py +288 -2
- numba_cuda/numba/cuda/cg.py +3 -0
- numba_cuda/numba/cuda/cgutils.py +5 -2
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +4 -1
- numba_cuda/numba/cuda/compiler.py +376 -30
- numba_cuda/numba/cuda/core/analysis.py +319 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
- numba_cuda/numba/cuda/core/base.py +1289 -0
- numba_cuda/numba/cuda/core/bytecode.py +727 -0
- numba_cuda/numba/cuda/core/caching.py +5 -2
- numba_cuda/numba/cuda/core/callconv.py +3 -0
- numba_cuda/numba/cuda/core/codegen.py +3 -0
- numba_cuda/numba/cuda/core/compiler.py +9 -14
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +747 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/cpu.py +370 -0
- numba_cuda/numba/cuda/core/environment.py +68 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
- numba_cuda/numba/cuda/core/interpreter.py +52 -27
- numba_cuda/numba/cuda/core/ir_utils.py +17 -29
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
- numba_cuda/numba/cuda/core/sigutils.py +3 -0
- numba_cuda/numba/cuda/core/ssa.py +496 -0
- numba_cuda/numba/cuda/core/targetconfig.py +329 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +952 -0
- numba_cuda/numba/cuda/core/typed_passes.py +741 -7
- numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
- numba_cuda/numba/cuda/cuda_paths.py +425 -246
- numba_cuda/numba/cuda/cudadecl.py +4 -1
- numba_cuda/numba/cuda/cudadrv/__init__.py +4 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +5 -1
- numba_cuda/numba/cuda/cudadrv/devices.py +3 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +14 -140
- numba_cuda/numba/cuda/cudadrv/drvapi.py +3 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +114 -24
- numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
- numba_cuda/numba/cuda/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +8 -5
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +3 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +4 -1
- numba_cuda/numba/cuda/cudadrv/ndarray.py +3 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +22 -8
- numba_cuda/numba/cuda/cudadrv/nvvm.py +4 -4
- numba_cuda/numba/cuda/cudadrv/rtapi.py +3 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +4 -1
- numba_cuda/numba/cuda/cudaimpl.py +8 -1
- numba_cuda/numba/cuda/cudamath.py +3 -0
- numba_cuda/numba/cuda/debuginfo.py +88 -2
- numba_cuda/numba/cuda/decorators.py +6 -3
- numba_cuda/numba/cuda/descriptor.py +6 -4
- numba_cuda/numba/cuda/device_init.py +3 -0
- numba_cuda/numba/cuda/deviceufunc.py +69 -2
- numba_cuda/numba/cuda/dispatcher.py +21 -39
- numba_cuda/numba/cuda/errors.py +10 -0
- numba_cuda/numba/cuda/extending.py +3 -0
- numba_cuda/numba/cuda/flags.py +143 -1
- numba_cuda/numba/cuda/fp16.py +3 -2
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +4 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -0
- numba_cuda/numba/cuda/intrinsics.py +3 -0
- numba_cuda/numba/cuda/itanium_mangler.py +3 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +3 -0
- numba_cuda/numba/cuda/kernels/transpose.py +3 -0
- numba_cuda/numba/cuda/libdevice.py +4 -0
- numba_cuda/numba/cuda/libdevicedecl.py +3 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +3 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -0
- numba_cuda/numba/cuda/locks.py +3 -0
- numba_cuda/numba/cuda/lowering.py +59 -159
- numba_cuda/numba/cuda/mathimpl.py +5 -1
- numba_cuda/numba/cuda/memory_management/__init__.py +3 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +5 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +48 -18
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/models.py +12 -1
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
- numba_cuda/numba/cuda/np/numpy_support.py +553 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
- numba_cuda/numba/cuda/nvvmutils.py +4 -1
- numba_cuda/numba/cuda/printimpl.py +15 -1
- numba_cuda/numba/cuda/random.py +4 -1
- numba_cuda/numba/cuda/reshape_funcs.cu +5 -0
- numba_cuda/numba/cuda/serialize.py +4 -1
- numba_cuda/numba/cuda/simulator/__init__.py +4 -1
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +4 -1
- numba_cuda/numba/cuda/simulator/bf16.py +3 -0
- numba_cuda/numba/cuda/simulator/compiler.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +4 -1
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +4 -0
- numba_cuda/numba/cuda/simulator/kernel.py +3 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +4 -1
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +17 -2
- numba_cuda/numba/cuda/simulator/reduction.py +3 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +3 -0
- numba_cuda/numba/cuda/simulator_init.py +3 -0
- numba_cuda/numba/cuda/stubs.py +3 -0
- numba_cuda/numba/cuda/target.py +38 -17
- numba_cuda/numba/cuda/testing.py +7 -19
- numba_cuda/numba/cuda/tests/__init__.py +4 -1
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/complex_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +3 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +7 -4
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +9 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +21 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +5 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +5 -1
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +542 -2
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +84 -1
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +314 -3
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +5 -1
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +21 -8
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +13 -37
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +266 -2
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +115 -6
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +4 -1
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +3 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +5 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +5 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +5 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +5 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +6 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +3 -0
- numba_cuda/numba/cuda/tests/enum_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +6 -1
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +27 -12
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +3 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +5 -1
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +3 -0
- numba_cuda/numba/cuda/tests/support.py +58 -15
- numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +3 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -1
- numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu +5 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +5 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +5 -0
- numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
- numba_cuda/numba/cuda/types.py +59 -0
- numba_cuda/numba/cuda/typing/__init__.py +12 -1
- numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
- numba_cuda/numba/cuda/typing/context.py +751 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/npydecl.py +658 -0
- numba_cuda/numba/cuda/typing/templates.py +10 -14
- numba_cuda/numba/cuda/ufuncs.py +6 -3
- numba_cuda/numba/cuda/utils.py +9 -112
- numba_cuda/numba/cuda/vector_types.py +3 -0
- numba_cuda/numba/cuda/vectorizers.py +3 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/METADATA +6 -2
- numba_cuda-0.20.0.dist-info/RECORD +357 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE +1 -0
- numba_cuda-0.20.0.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -57
- numba_cuda-0.19.0.dist-info/RECORD +0 -301
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/top_level.txt +0 -0
|
@@ -1,10 +1,274 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import functools
|
|
5
|
+
import warnings
|
|
2
6
|
import numpy as np
|
|
3
7
|
import unittest
|
|
4
8
|
|
|
5
|
-
from numba import
|
|
9
|
+
from numba import cuda, types, njit, typeof
|
|
10
|
+
from numba.cuda import config
|
|
11
|
+
from numba.np import numpy_support
|
|
6
12
|
from numba.cuda.tests.support import TestCase
|
|
7
|
-
from numba.tests.
|
|
13
|
+
from numba.cuda.tests.support import MemoryLeakMixin
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BaseUFuncTest(MemoryLeakMixin):
|
|
17
|
+
def setUp(self):
|
|
18
|
+
super(BaseUFuncTest, self).setUp()
|
|
19
|
+
self.inputs = [
|
|
20
|
+
(np.uint32(0), types.uint32),
|
|
21
|
+
(np.uint32(1), types.uint32),
|
|
22
|
+
(np.int32(-1), types.int32),
|
|
23
|
+
(np.int32(0), types.int32),
|
|
24
|
+
(np.int32(1), types.int32),
|
|
25
|
+
(np.uint64(0), types.uint64),
|
|
26
|
+
(np.uint64(1), types.uint64),
|
|
27
|
+
(np.int64(-1), types.int64),
|
|
28
|
+
(np.int64(0), types.int64),
|
|
29
|
+
(np.int64(1), types.int64),
|
|
30
|
+
(np.float32(-0.5), types.float32),
|
|
31
|
+
(np.float32(0.0), types.float32),
|
|
32
|
+
(np.float32(0.5), types.float32),
|
|
33
|
+
(np.float64(-0.5), types.float64),
|
|
34
|
+
(np.float64(0.0), types.float64),
|
|
35
|
+
(np.float64(0.5), types.float64),
|
|
36
|
+
(np.array([0, 1], dtype="u4"), types.Array(types.uint32, 1, "C")),
|
|
37
|
+
(np.array([0, 1], dtype="u8"), types.Array(types.uint64, 1, "C")),
|
|
38
|
+
(
|
|
39
|
+
np.array([-1, 0, 1], dtype="i4"),
|
|
40
|
+
types.Array(types.int32, 1, "C"),
|
|
41
|
+
),
|
|
42
|
+
(
|
|
43
|
+
np.array([-1, 0, 1], dtype="i8"),
|
|
44
|
+
types.Array(types.int64, 1, "C"),
|
|
45
|
+
),
|
|
46
|
+
(
|
|
47
|
+
np.array([-0.5, 0.0, 0.5], dtype="f4"),
|
|
48
|
+
types.Array(types.float32, 1, "C"),
|
|
49
|
+
),
|
|
50
|
+
(
|
|
51
|
+
np.array([-0.5, 0.0, 0.5], dtype="f8"),
|
|
52
|
+
types.Array(types.float64, 1, "C"),
|
|
53
|
+
),
|
|
54
|
+
(np.array([0, 1], dtype=np.int8), types.Array(types.int8, 1, "C")),
|
|
55
|
+
(
|
|
56
|
+
np.array([0, 1], dtype=np.int16),
|
|
57
|
+
types.Array(types.int16, 1, "C"),
|
|
58
|
+
),
|
|
59
|
+
(
|
|
60
|
+
np.array([0, 1], dtype=np.uint8),
|
|
61
|
+
types.Array(types.uint8, 1, "C"),
|
|
62
|
+
),
|
|
63
|
+
(
|
|
64
|
+
np.array([0, 1], dtype=np.uint16),
|
|
65
|
+
types.Array(types.uint16, 1, "C"),
|
|
66
|
+
),
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
@functools.lru_cache(maxsize=None)
|
|
70
|
+
def _compile(self, pyfunc, args, nrt=False):
|
|
71
|
+
# NOTE: to test the implementation of Numpy ufuncs, we disable
|
|
72
|
+
# rewriting of array expressions.
|
|
73
|
+
return njit(args, _nrt=nrt, no_rewrites=True)(pyfunc)
|
|
74
|
+
|
|
75
|
+
def _determine_output_type(
|
|
76
|
+
self, input_type, int_output_type=None, float_output_type=None
|
|
77
|
+
):
|
|
78
|
+
ty = input_type
|
|
79
|
+
if isinstance(ty, types.Array):
|
|
80
|
+
ndim = ty.ndim
|
|
81
|
+
ty = ty.dtype
|
|
82
|
+
else:
|
|
83
|
+
ndim = 1
|
|
84
|
+
|
|
85
|
+
if ty in types.signed_domain:
|
|
86
|
+
if int_output_type:
|
|
87
|
+
output_type = types.Array(int_output_type, ndim, "C")
|
|
88
|
+
else:
|
|
89
|
+
output_type = types.Array(ty, ndim, "C")
|
|
90
|
+
elif ty in types.unsigned_domain:
|
|
91
|
+
if int_output_type:
|
|
92
|
+
output_type = types.Array(int_output_type, ndim, "C")
|
|
93
|
+
else:
|
|
94
|
+
output_type = types.Array(ty, ndim, "C")
|
|
95
|
+
else:
|
|
96
|
+
if float_output_type:
|
|
97
|
+
output_type = types.Array(float_output_type, ndim, "C")
|
|
98
|
+
else:
|
|
99
|
+
output_type = types.Array(ty, ndim, "C")
|
|
100
|
+
return output_type
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class BasicUFuncTest(BaseUFuncTest):
|
|
104
|
+
def _make_ufunc_usecase(self, ufunc):
|
|
105
|
+
return _make_ufunc_usecase(ufunc)
|
|
106
|
+
|
|
107
|
+
def basic_ufunc_test(
|
|
108
|
+
self,
|
|
109
|
+
ufunc,
|
|
110
|
+
skip_inputs=[],
|
|
111
|
+
additional_inputs=[],
|
|
112
|
+
int_output_type=None,
|
|
113
|
+
float_output_type=None,
|
|
114
|
+
kinds="ifc",
|
|
115
|
+
positive_only=False,
|
|
116
|
+
):
|
|
117
|
+
# Necessary to avoid some Numpy warnings being silenced, despite
|
|
118
|
+
# the simplefilter() call below.
|
|
119
|
+
self.reset_module_warnings(__name__)
|
|
120
|
+
|
|
121
|
+
pyfunc = self._make_ufunc_usecase(ufunc)
|
|
122
|
+
|
|
123
|
+
inputs = list(self.inputs) + additional_inputs
|
|
124
|
+
|
|
125
|
+
for input_tuple in inputs:
|
|
126
|
+
input_operand = input_tuple[0]
|
|
127
|
+
input_type = input_tuple[1]
|
|
128
|
+
|
|
129
|
+
is_tuple = isinstance(input_operand, tuple)
|
|
130
|
+
if is_tuple:
|
|
131
|
+
args = input_operand
|
|
132
|
+
else:
|
|
133
|
+
args = (input_operand,) * ufunc.nin
|
|
134
|
+
|
|
135
|
+
if input_type in skip_inputs:
|
|
136
|
+
continue
|
|
137
|
+
if positive_only and np.any(args[0] < 0):
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
# Some ufuncs don't allow all kinds of arguments
|
|
141
|
+
if args[0].dtype.kind not in kinds:
|
|
142
|
+
continue
|
|
143
|
+
|
|
144
|
+
output_type = self._determine_output_type(
|
|
145
|
+
input_type, int_output_type, float_output_type
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
input_types = (input_type,) * ufunc.nin
|
|
149
|
+
output_types = (output_type,) * ufunc.nout
|
|
150
|
+
argtys = input_types + output_types
|
|
151
|
+
cfunc = self._compile(pyfunc, argtys)
|
|
152
|
+
|
|
153
|
+
if isinstance(args[0], np.ndarray):
|
|
154
|
+
results = [
|
|
155
|
+
np.zeros(args[0].shape, dtype=out_ty.dtype.name)
|
|
156
|
+
for out_ty in output_types
|
|
157
|
+
]
|
|
158
|
+
expected = [
|
|
159
|
+
np.zeros(args[0].shape, dtype=out_ty.dtype.name)
|
|
160
|
+
for out_ty in output_types
|
|
161
|
+
]
|
|
162
|
+
else:
|
|
163
|
+
results = [
|
|
164
|
+
np.zeros(1, dtype=out_ty.dtype.name)
|
|
165
|
+
for out_ty in output_types
|
|
166
|
+
]
|
|
167
|
+
expected = [
|
|
168
|
+
np.zeros(1, dtype=out_ty.dtype.name)
|
|
169
|
+
for out_ty in output_types
|
|
170
|
+
]
|
|
171
|
+
|
|
172
|
+
invalid_flag = False
|
|
173
|
+
with warnings.catch_warnings(record=True) as warnlist:
|
|
174
|
+
warnings.simplefilter("always")
|
|
175
|
+
pyfunc(*args, *expected)
|
|
176
|
+
|
|
177
|
+
warnmsg = "invalid value encountered"
|
|
178
|
+
for thiswarn in warnlist:
|
|
179
|
+
if issubclass(thiswarn.category, RuntimeWarning) and str(
|
|
180
|
+
thiswarn.message
|
|
181
|
+
).startswith(warnmsg):
|
|
182
|
+
invalid_flag = True
|
|
183
|
+
|
|
184
|
+
cfunc(*args, *results)
|
|
185
|
+
|
|
186
|
+
for expected_i, result_i in zip(expected, results):
|
|
187
|
+
msg = "\n".join(
|
|
188
|
+
[
|
|
189
|
+
"ufunc '{0}' failed",
|
|
190
|
+
"inputs ({1}):",
|
|
191
|
+
"{2}",
|
|
192
|
+
"got({3})",
|
|
193
|
+
"{4}",
|
|
194
|
+
"expected ({5}):",
|
|
195
|
+
"{6}",
|
|
196
|
+
]
|
|
197
|
+
).format(
|
|
198
|
+
ufunc.__name__,
|
|
199
|
+
input_type,
|
|
200
|
+
input_operand,
|
|
201
|
+
output_type,
|
|
202
|
+
result_i,
|
|
203
|
+
expected_i.dtype,
|
|
204
|
+
expected_i,
|
|
205
|
+
)
|
|
206
|
+
try:
|
|
207
|
+
np.testing.assert_array_almost_equal(
|
|
208
|
+
expected_i, result_i, decimal=5, err_msg=msg
|
|
209
|
+
)
|
|
210
|
+
except AssertionError:
|
|
211
|
+
if invalid_flag:
|
|
212
|
+
# Allow output to mismatch for invalid input
|
|
213
|
+
print(
|
|
214
|
+
"Output mismatch for invalid input",
|
|
215
|
+
input_tuple,
|
|
216
|
+
result_i,
|
|
217
|
+
expected_i,
|
|
218
|
+
)
|
|
219
|
+
else:
|
|
220
|
+
raise
|
|
221
|
+
|
|
222
|
+
def signed_unsigned_cmp_test(self, comparison_ufunc):
|
|
223
|
+
self.basic_ufunc_test(comparison_ufunc)
|
|
224
|
+
|
|
225
|
+
if numpy_support.numpy_version < (1, 25):
|
|
226
|
+
return
|
|
227
|
+
|
|
228
|
+
# Test additional implementations that specifically handle signed /
|
|
229
|
+
# unsigned comparisons added in NumPy 1.25:
|
|
230
|
+
# https://github.com/numpy/numpy/pull/23713
|
|
231
|
+
additional_inputs = (
|
|
232
|
+
(np.int64(-1), np.uint64(0)),
|
|
233
|
+
(np.int64(-1), np.uint64(1)),
|
|
234
|
+
(np.int64(0), np.uint64(0)),
|
|
235
|
+
(np.int64(0), np.uint64(1)),
|
|
236
|
+
(np.int64(1), np.uint64(0)),
|
|
237
|
+
(np.int64(1), np.uint64(1)),
|
|
238
|
+
(np.uint64(0), np.int64(-1)),
|
|
239
|
+
(np.uint64(0), np.int64(0)),
|
|
240
|
+
(np.uint64(0), np.int64(1)),
|
|
241
|
+
(np.uint64(1), np.int64(-1)),
|
|
242
|
+
(np.uint64(1), np.int64(0)),
|
|
243
|
+
(np.uint64(1), np.int64(1)),
|
|
244
|
+
(
|
|
245
|
+
np.array([-1, -1, 0, 0, 1, 1], dtype=np.int64),
|
|
246
|
+
np.array([0, 1, 0, 1, 0, 1], dtype=np.uint64),
|
|
247
|
+
),
|
|
248
|
+
(
|
|
249
|
+
np.array([0, 1, 0, 1, 0, 1], dtype=np.uint64),
|
|
250
|
+
np.array([-1, -1, 0, 0, 1, 1], dtype=np.int64),
|
|
251
|
+
),
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
pyfunc = self._make_ufunc_usecase(comparison_ufunc)
|
|
255
|
+
|
|
256
|
+
for a, b in additional_inputs:
|
|
257
|
+
input_types = (typeof(a), typeof(b))
|
|
258
|
+
output_type = types.Array(types.bool_, 1, "C")
|
|
259
|
+
argtys = input_types + (output_type,)
|
|
260
|
+
cfunc = self._compile(pyfunc, argtys)
|
|
261
|
+
|
|
262
|
+
if isinstance(a, np.ndarray):
|
|
263
|
+
result = np.zeros(a.shape, dtype=np.bool_)
|
|
264
|
+
else:
|
|
265
|
+
result = np.zeros(1, dtype=np.bool_)
|
|
266
|
+
|
|
267
|
+
expected = np.zeros_like(result)
|
|
268
|
+
|
|
269
|
+
pyfunc(a, b, expected)
|
|
270
|
+
cfunc(a, b, result)
|
|
271
|
+
np.testing.assert_equal(expected, result)
|
|
8
272
|
|
|
9
273
|
|
|
10
274
|
def _make_ufunc_usecase(ufunc):
|
|
@@ -1,6 +1,9 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
from numba.cuda.testing import unittest, CUDATestCase
|
|
2
5
|
from numba import cuda
|
|
3
|
-
from numba.core import config
|
|
6
|
+
from numba.cuda.core import config
|
|
4
7
|
|
|
5
8
|
|
|
6
9
|
class MyError(Exception):
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
"""
|
|
2
5
|
CUDA vector type tests. Note that this test file imports
|
|
3
6
|
`cuda.vector_type` module to programmatically test all the
|
|
@@ -8,7 +11,7 @@ corresponding vector type from `cuda` module in kernel to use them.
|
|
|
8
11
|
|
|
9
12
|
import numpy as np
|
|
10
13
|
|
|
11
|
-
from numba.core import config
|
|
14
|
+
from numba.cuda.core import config
|
|
12
15
|
from numba.cuda.testing import CUDATestCase
|
|
13
16
|
|
|
14
17
|
from numba import cuda
|
|
@@ -1,15 +1,124 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import numpy as np
|
|
5
|
+
import math
|
|
2
6
|
|
|
3
|
-
from numba import vectorize, cuda
|
|
4
|
-
from numba.tests.npyufunc.test_vectorize_decor import (
|
|
5
|
-
BaseVectorizeDecor,
|
|
6
|
-
BaseVectorizeNopythonArg,
|
|
7
|
-
BaseVectorizeUnrecognizedArg,
|
|
8
|
-
)
|
|
7
|
+
from numba import vectorize, cuda, int32, uint32, float32, float64
|
|
9
8
|
from numba.cuda.testing import skip_on_cudasim, CUDATestCase
|
|
9
|
+
from numba.cuda.tests.support import CheckWarningsMixin
|
|
10
|
+
|
|
10
11
|
import unittest
|
|
11
12
|
|
|
12
13
|
|
|
14
|
+
pi = math.pi
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def sinc(x):
|
|
18
|
+
if x == 0.0:
|
|
19
|
+
return 1.0
|
|
20
|
+
else:
|
|
21
|
+
return math.sin(x * pi) / (pi * x)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def scaled_sinc(x, scale):
|
|
25
|
+
if x == 0.0:
|
|
26
|
+
return scale
|
|
27
|
+
else:
|
|
28
|
+
return scale * (math.sin(x * pi) / (pi * x))
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def vector_add(a, b):
|
|
32
|
+
return a + b
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class BaseVectorizeDecor(object):
|
|
36
|
+
target = None
|
|
37
|
+
wrapper = None
|
|
38
|
+
funcs = {
|
|
39
|
+
"func1": sinc,
|
|
40
|
+
"func2": scaled_sinc,
|
|
41
|
+
"func3": vector_add,
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def _run_and_compare(cls, func, sig, A, *args, **kwargs):
|
|
46
|
+
if cls.wrapper is not None:
|
|
47
|
+
func = cls.wrapper(func)
|
|
48
|
+
numba_func = vectorize(sig, target=cls.target)(func)
|
|
49
|
+
numpy_func = np.vectorize(func)
|
|
50
|
+
result = numba_func(A, *args)
|
|
51
|
+
gold = numpy_func(A, *args)
|
|
52
|
+
np.testing.assert_allclose(result, gold, **kwargs)
|
|
53
|
+
|
|
54
|
+
def test_1(self):
|
|
55
|
+
sig = ["float64(float64)", "float32(float32)"]
|
|
56
|
+
func = self.funcs["func1"]
|
|
57
|
+
A = np.arange(100, dtype=np.float64)
|
|
58
|
+
self._run_and_compare(func, sig, A)
|
|
59
|
+
|
|
60
|
+
def test_2(self):
|
|
61
|
+
sig = [float64(float64), float32(float32)]
|
|
62
|
+
func = self.funcs["func1"]
|
|
63
|
+
A = np.arange(100, dtype=np.float64)
|
|
64
|
+
self._run_and_compare(func, sig, A)
|
|
65
|
+
|
|
66
|
+
def test_3(self):
|
|
67
|
+
sig = ["float64(float64, uint32)"]
|
|
68
|
+
func = self.funcs["func2"]
|
|
69
|
+
A = np.arange(100, dtype=np.float64)
|
|
70
|
+
scale = np.uint32(3)
|
|
71
|
+
self._run_and_compare(func, sig, A, scale, atol=1e-8)
|
|
72
|
+
|
|
73
|
+
def test_4(self):
|
|
74
|
+
sig = [
|
|
75
|
+
int32(int32, int32),
|
|
76
|
+
uint32(uint32, uint32),
|
|
77
|
+
float32(float32, float32),
|
|
78
|
+
float64(float64, float64),
|
|
79
|
+
]
|
|
80
|
+
func = self.funcs["func3"]
|
|
81
|
+
A = np.arange(100, dtype=np.float64)
|
|
82
|
+
self._run_and_compare(func, sig, A, A)
|
|
83
|
+
A = A.astype(np.float32)
|
|
84
|
+
self._run_and_compare(func, sig, A, A)
|
|
85
|
+
A = A.astype(np.int32)
|
|
86
|
+
self._run_and_compare(func, sig, A, A)
|
|
87
|
+
A = A.astype(np.uint32)
|
|
88
|
+
self._run_and_compare(func, sig, A, A)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class BaseVectorizeNopythonArg(unittest.TestCase, CheckWarningsMixin):
|
|
92
|
+
"""
|
|
93
|
+
Test passing the nopython argument to the vectorize decorator.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
def _test_target_nopython(self, target, warnings, with_sig=True):
|
|
97
|
+
a = np.array([2.0], dtype=np.float32)
|
|
98
|
+
b = np.array([3.0], dtype=np.float32)
|
|
99
|
+
sig = [float32(float32, float32)]
|
|
100
|
+
args = with_sig and [sig] or []
|
|
101
|
+
with self.check_warnings(warnings):
|
|
102
|
+
f = vectorize(*args, target=target, nopython=True)(vector_add)
|
|
103
|
+
f(a, b)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class BaseVectorizeUnrecognizedArg(unittest.TestCase, CheckWarningsMixin):
|
|
107
|
+
"""
|
|
108
|
+
Test passing an unrecognized argument to the vectorize decorator.
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
def _test_target_unrecognized_arg(self, target, with_sig=True):
|
|
112
|
+
a = np.array([2.0], dtype=np.float32)
|
|
113
|
+
b = np.array([3.0], dtype=np.float32)
|
|
114
|
+
sig = [float32(float32, float32)]
|
|
115
|
+
args = with_sig and [sig] or []
|
|
116
|
+
with self.assertRaises(KeyError) as raises:
|
|
117
|
+
f = vectorize(*args, target=target, nonexistent=2)(vector_add)
|
|
118
|
+
f(a, b)
|
|
119
|
+
self.assertIn("Unrecognized options", str(raises.exception))
|
|
120
|
+
|
|
121
|
+
|
|
13
122
|
@skip_on_cudasim("ufunc API unsupported in the simulator")
|
|
14
123
|
class TestVectorizeDecor(CUDATestCase, BaseVectorizeDecor):
|
|
15
124
|
"""
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import numpy as np
|
|
2
5
|
from numba import cuda
|
|
3
6
|
from numba.cuda.cudadrv import driver
|
|
@@ -12,7 +15,7 @@ from numba.cuda.tests.support import (
|
|
|
12
15
|
run_in_subprocess,
|
|
13
16
|
)
|
|
14
17
|
from numba.core.errors import NumbaPerformanceWarning
|
|
15
|
-
from numba.core import config
|
|
18
|
+
from numba.cuda.core import config
|
|
16
19
|
import warnings
|
|
17
20
|
|
|
18
21
|
|
|
@@ -1,10 +1,13 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import re
|
|
2
5
|
|
|
3
6
|
import numpy as np
|
|
4
7
|
from numba import cuda, int32, int64, float32, float64
|
|
5
8
|
from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
|
|
6
9
|
from numba.cuda.compiler import compile_ptx
|
|
7
|
-
from numba.core import config
|
|
10
|
+
from numba.cuda.core import config
|
|
8
11
|
|
|
9
12
|
|
|
10
13
|
def useful_syncwarp(ary):
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: BSD-2-Clause
|
|
4
|
+
*/
|
|
5
|
+
|
|
1
6
|
// Not all CUDA includes are safe to include in device code compiled by NVRTC,
|
|
2
7
|
// because it does not have paths to all system include directories. Headers
|
|
3
8
|
// such as cuda_device_runtime_api.h are safe to use in NVRTC without adding
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: BSD-2-Clause
|
|
4
|
+
*/
|
|
5
|
+
|
|
1
6
|
extern "C" __device__
|
|
2
7
|
int bar(int* out, int a) {
|
|
3
8
|
// Explicitly placed to generate a warning for testing the NVRTC program log
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: BSD-2-Clause
|
|
4
|
+
*/
|
|
5
|
+
|
|
1
6
|
// Templated multiplication function: mymul
|
|
2
7
|
template <typename T>
|
|
3
8
|
__device__ T mymul(T a, T b) { return a * b; }
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: BSD-2-Clause
|
|
4
|
+
*/
|
|
5
|
+
|
|
1
6
|
#include <add.cuh> // In numba/cuda/tests/data/include
|
|
2
7
|
#include <mul.cuh> // In numba/cuda/tests/doc_examples/ffi/include
|
|
3
8
|
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
# Contents in this file are referenced from the sphinx-generated docs.
|
|
2
5
|
# "magictoken" is used for markers as beginning and ending of example text.
|
|
3
6
|
|
|
@@ -7,13 +10,11 @@ from numba.cuda.testing import (
|
|
|
7
10
|
skip_on_cudasim,
|
|
8
11
|
skip_if_cudadevrt_missing,
|
|
9
12
|
skip_unless_cc_60,
|
|
10
|
-
skip_if_mvc_enabled,
|
|
11
13
|
)
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
@skip_if_cudadevrt_missing
|
|
15
17
|
@skip_unless_cc_60
|
|
16
|
-
@skip_if_mvc_enabled("CG not supported with MVC")
|
|
17
18
|
@skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
|
|
18
19
|
class TestCooperativeGroups(CUDATestCase):
|
|
19
20
|
def test_ex_grid_sync(self):
|