numba-cuda 0.18.1__py3-none-any.whl → 0.19.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- _numba_cuda_redirector.pth +3 -0
- _numba_cuda_redirector.py +3 -0
- numba_cuda/VERSION +1 -1
- numba_cuda/__init__.py +2 -1
- numba_cuda/_version.py +2 -13
- numba_cuda/numba/cuda/__init__.py +4 -1
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +5 -2
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +4 -1
- numba_cuda/numba/cuda/api.py +5 -7
- numba_cuda/numba/cuda/api_util.py +3 -0
- numba_cuda/numba/cuda/args.py +3 -0
- numba_cuda/numba/cuda/bf16.py +3 -0
- numba_cuda/numba/cuda/cg.py +3 -0
- numba_cuda/numba/cuda/cgutils.py +3 -0
- numba_cuda/numba/cuda/codegen.py +3 -0
- numba_cuda/numba/cuda/compiler.py +10 -4
- numba_cuda/numba/cuda/core/caching.py +3 -0
- numba_cuda/numba/cuda/core/callconv.py +3 -0
- numba_cuda/numba/cuda/core/codegen.py +3 -0
- numba_cuda/numba/cuda/core/compiler.py +3 -0
- numba_cuda/numba/cuda/core/interpreter.py +3595 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2644 -0
- numba_cuda/numba/cuda/core/sigutils.py +58 -0
- numba_cuda/numba/cuda/core/typed_passes.py +3 -0
- numba_cuda/numba/cuda/cuda_paths.py +12 -17
- numba_cuda/numba/cuda/cudadecl.py +4 -1
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +3 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +3 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +7 -19
- numba_cuda/numba/cuda/cudadrv/drvapi.py +3 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +3 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
- numba_cuda/numba/cuda/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +4 -2
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +3 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +3 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +3 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +47 -44
- numba_cuda/numba/cuda/cudadrv/nvvm.py +6 -18
- numba_cuda/numba/cuda/cudadrv/rtapi.py +3 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +15 -1
- numba_cuda/numba/cuda/cudaimpl.py +3 -0
- numba_cuda/numba/cuda/cudamath.py +4 -1
- numba_cuda/numba/cuda/debuginfo.py +3 -0
- numba_cuda/numba/cuda/decorators.py +7 -3
- numba_cuda/numba/cuda/descriptor.py +3 -0
- numba_cuda/numba/cuda/device_init.py +3 -0
- numba_cuda/numba/cuda/deviceufunc.py +5 -1
- numba_cuda/numba/cuda/dispatcher.py +6 -2
- numba_cuda/numba/cuda/errors.py +10 -0
- numba_cuda/numba/cuda/extending.py +4 -1
- numba_cuda/numba/cuda/flags.py +2 -0
- numba_cuda/numba/cuda/fp16.py +3 -0
- numba_cuda/numba/cuda/initialize.py +4 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -0
- numba_cuda/numba/cuda/intrinsics.py +3 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +3 -0
- numba_cuda/numba/cuda/kernels/transpose.py +3 -0
- numba_cuda/numba/cuda/libdevice.py +4 -0
- numba_cuda/numba/cuda/libdevicedecl.py +4 -1
- numba_cuda/numba/cuda/libdevicefuncs.py +4 -1
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -0
- numba_cuda/numba/cuda/locks.py +3 -0
- numba_cuda/numba/cuda/lowering.py +53 -16
- numba_cuda/numba/cuda/mathimpl.py +3 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +3 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +5 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +5 -1
- numba_cuda/numba/cuda/models.py +3 -0
- numba_cuda/numba/cuda/nvvmutils.py +3 -0
- numba_cuda/numba/cuda/printimpl.py +3 -0
- numba_cuda/numba/cuda/random.py +3 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +5 -0
- numba_cuda/numba/cuda/serialize.py +3 -0
- numba_cuda/numba/cuda/simulator/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +4 -1
- numba_cuda/numba/cuda/simulator/bf16.py +3 -0
- numba_cuda/numba/cuda/simulator/compiler.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +3 -7
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +4 -0
- numba_cuda/numba/cuda/simulator/kernel.py +3 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +3 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +3 -0
- numba_cuda/numba/cuda/simulator/reduction.py +3 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +3 -0
- numba_cuda/numba/cuda/simulator_init.py +3 -0
- numba_cuda/numba/cuda/stubs.py +3 -0
- numba_cuda/numba/cuda/target.py +4 -2
- numba_cuda/numba/cuda/testing.py +7 -6
- numba_cuda/numba/cuda/tests/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/complex_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +7 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -4
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +149 -3
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +3 -4
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +23 -284
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +298 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +8 -1
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +3 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +5 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +5 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +5 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +5 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +4 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +4 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +4 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +4 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +4 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +4 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +4 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +4 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +4 -1
- numba_cuda/numba/cuda/tests/enum_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +4 -1
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +3 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +5 -2
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +4 -1
- numba_cuda/numba/cuda/tests/support.py +755 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +6 -3
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +6 -2
- numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu +5 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +5 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +5 -0
- numba_cuda/numba/cuda/types.py +3 -0
- numba_cuda/numba/cuda/typing/__init__.py +11 -0
- numba_cuda/numba/cuda/typing/templates.py +1448 -0
- numba_cuda/numba/cuda/ufuncs.py +3 -0
- numba_cuda/numba/cuda/utils.py +3 -0
- numba_cuda/numba/cuda/vector_types.py +6 -3
- numba_cuda/numba/cuda/vectorizers.py +3 -0
- {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.1.dist-info}/METADATA +25 -29
- numba_cuda-0.19.1.dist-info/RECORD +302 -0
- {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.1.dist-info}/licenses/LICENSE +1 -0
- numba_cuda-0.19.1.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda/numba/cuda/include/11/cuda_bf16.h +0 -3749
- numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +0 -2683
- numba_cuda/numba/cuda/include/11/cuda_fp16.h +0 -3794
- numba_cuda/numba/cuda/include/11/cuda_fp16.hpp +0 -2614
- numba_cuda-0.18.1.dist-info/RECORD +0 -296
- {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.1.dist-info}/WHEEL +0 -0
- {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.1.dist-info}/top_level.txt +0 -0
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import multiprocessing as mp
|
|
2
5
|
import itertools
|
|
3
6
|
import traceback
|
|
@@ -14,7 +17,7 @@ from numba.cuda.testing import (
|
|
|
14
17
|
ContextResettingTestCase,
|
|
15
18
|
ForeignArray,
|
|
16
19
|
)
|
|
17
|
-
from numba.tests.support import linux_only, windows_only
|
|
20
|
+
from numba.cuda.tests.support import linux_only, windows_only
|
|
18
21
|
import unittest
|
|
19
22
|
|
|
20
23
|
|
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from numba.cuda.testing import CUDATestCase
|
|
5
|
+
import numba
|
|
6
|
+
from numba.core.registry import cpu_target
|
|
7
|
+
from numba.core.compiler import CompilerBase, Flags
|
|
8
|
+
from numba.core.compiler_machinery import PassManager
|
|
9
|
+
from numba.cuda.core import ir_utils
|
|
10
|
+
from numba.core import types, ir, bytecode, compiler, registry
|
|
11
|
+
from numba.core.untyped_passes import (
|
|
12
|
+
ExtractByteCode,
|
|
13
|
+
TranslateByteCode,
|
|
14
|
+
FixupArgs,
|
|
15
|
+
IRProcessing,
|
|
16
|
+
)
|
|
17
|
+
from numba.experimental import jitclass
|
|
18
|
+
from numba.core.typed_passes import (
|
|
19
|
+
NopythonTypeInference,
|
|
20
|
+
type_inference_stage,
|
|
21
|
+
DeadCodeElimination,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# global constant for testing find_const
|
|
26
|
+
GLOBAL_B = 11
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@jitclass([("val", numba.core.types.List(numba.intp))])
|
|
30
|
+
class Dummy(object):
|
|
31
|
+
def __init__(self, val):
|
|
32
|
+
self.val = val
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class TestIrUtils(CUDATestCase):
|
|
36
|
+
"""
|
|
37
|
+
Tests ir handling utility functions like find_callname.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def test_obj_func_match(self):
|
|
41
|
+
"""Test matching of an object method (other than Array see #3449)"""
|
|
42
|
+
|
|
43
|
+
def test_func():
|
|
44
|
+
d = Dummy([1])
|
|
45
|
+
d.val.append(2)
|
|
46
|
+
|
|
47
|
+
test_ir = compiler.run_frontend(test_func)
|
|
48
|
+
typingctx = cpu_target.typing_context
|
|
49
|
+
targetctx = cpu_target.target_context
|
|
50
|
+
typing_res = type_inference_stage(
|
|
51
|
+
typingctx, targetctx, test_ir, (), None
|
|
52
|
+
)
|
|
53
|
+
matched_call = ir_utils.find_callname(
|
|
54
|
+
test_ir, test_ir.blocks[0].body[7].value, typing_res.typemap
|
|
55
|
+
)
|
|
56
|
+
self.assertTrue(
|
|
57
|
+
isinstance(matched_call, tuple)
|
|
58
|
+
and len(matched_call) == 2
|
|
59
|
+
and matched_call[0] == "append"
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
def test_dead_code_elimination(self):
|
|
63
|
+
class Tester(CompilerBase):
|
|
64
|
+
@classmethod
|
|
65
|
+
def mk_pipeline(
|
|
66
|
+
cls,
|
|
67
|
+
args,
|
|
68
|
+
return_type=None,
|
|
69
|
+
flags=None,
|
|
70
|
+
locals=None,
|
|
71
|
+
library=None,
|
|
72
|
+
typing_context=None,
|
|
73
|
+
target_context=None,
|
|
74
|
+
):
|
|
75
|
+
if locals is None:
|
|
76
|
+
locals = {}
|
|
77
|
+
if not flags:
|
|
78
|
+
flags = Flags()
|
|
79
|
+
flags.nrt = True
|
|
80
|
+
if typing_context is None:
|
|
81
|
+
typing_context = registry.cpu_target.typing_context
|
|
82
|
+
if target_context is None:
|
|
83
|
+
target_context = registry.cpu_target.target_context
|
|
84
|
+
return cls(
|
|
85
|
+
typing_context,
|
|
86
|
+
target_context,
|
|
87
|
+
library,
|
|
88
|
+
args,
|
|
89
|
+
return_type,
|
|
90
|
+
flags,
|
|
91
|
+
locals,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
def compile_to_ir(self, func, DCE=False):
|
|
95
|
+
"""
|
|
96
|
+
Compile and return IR
|
|
97
|
+
"""
|
|
98
|
+
func_id = bytecode.FunctionIdentity.from_function(func)
|
|
99
|
+
self.state.func_id = func_id
|
|
100
|
+
ExtractByteCode().run_pass(self.state)
|
|
101
|
+
state = self.state
|
|
102
|
+
|
|
103
|
+
name = "DCE_testing"
|
|
104
|
+
pm = PassManager(name)
|
|
105
|
+
pm.add_pass(TranslateByteCode, "analyzing bytecode")
|
|
106
|
+
pm.add_pass(FixupArgs, "fix up args")
|
|
107
|
+
pm.add_pass(IRProcessing, "processing IR")
|
|
108
|
+
pm.add_pass(NopythonTypeInference, "nopython frontend")
|
|
109
|
+
if DCE is True:
|
|
110
|
+
pm.add_pass(DeadCodeElimination, "DCE after typing")
|
|
111
|
+
pm.finalize()
|
|
112
|
+
pm.run(state)
|
|
113
|
+
return state.func_ir
|
|
114
|
+
|
|
115
|
+
def check_initial_ir(the_ir):
|
|
116
|
+
# dead stuff:
|
|
117
|
+
# a const int value 0xdead
|
|
118
|
+
# an assign of above into to variable `dead`
|
|
119
|
+
# a const int above 0xdeaddead
|
|
120
|
+
# an assign of said int to variable `deaddead`
|
|
121
|
+
# this is 2 statements to remove
|
|
122
|
+
|
|
123
|
+
self.assertEqual(len(the_ir.blocks), 1)
|
|
124
|
+
block = the_ir.blocks[0]
|
|
125
|
+
deads = []
|
|
126
|
+
for x in block.find_insts(ir.Assign):
|
|
127
|
+
if isinstance(getattr(x, "target", None), ir.Var):
|
|
128
|
+
if "dead" in getattr(x.target, "name", ""):
|
|
129
|
+
deads.append(x)
|
|
130
|
+
|
|
131
|
+
self.assertEqual(len(deads), 2)
|
|
132
|
+
for d in deads:
|
|
133
|
+
# check the ir.Const is the definition and the value is expected
|
|
134
|
+
const_val = the_ir.get_definition(d.value)
|
|
135
|
+
self.assertTrue(
|
|
136
|
+
int("0x%s" % d.target.name, 16), const_val.value
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
return deads
|
|
140
|
+
|
|
141
|
+
def check_dce_ir(the_ir):
|
|
142
|
+
self.assertEqual(len(the_ir.blocks), 1)
|
|
143
|
+
block = the_ir.blocks[0]
|
|
144
|
+
deads = []
|
|
145
|
+
consts = []
|
|
146
|
+
for x in block.find_insts(ir.Assign):
|
|
147
|
+
if isinstance(getattr(x, "target", None), ir.Var):
|
|
148
|
+
if "dead" in getattr(x.target, "name", ""):
|
|
149
|
+
deads.append(x)
|
|
150
|
+
if isinstance(getattr(x, "value", None), ir.Const):
|
|
151
|
+
consts.append(x)
|
|
152
|
+
self.assertEqual(len(deads), 0)
|
|
153
|
+
|
|
154
|
+
# check the consts to make sure there's no reference to 0xdead or
|
|
155
|
+
# 0xdeaddead
|
|
156
|
+
for x in consts:
|
|
157
|
+
self.assertTrue(x.value.value not in [0xDEAD, 0xDEADDEAD])
|
|
158
|
+
|
|
159
|
+
def foo(x):
|
|
160
|
+
y = x + 1
|
|
161
|
+
dead = 0xDEAD # noqa
|
|
162
|
+
z = y + 2
|
|
163
|
+
deaddead = 0xDEADDEAD # noqa
|
|
164
|
+
ret = z * z
|
|
165
|
+
return ret
|
|
166
|
+
|
|
167
|
+
test_pipeline = Tester.mk_pipeline((types.intp,))
|
|
168
|
+
no_dce = test_pipeline.compile_to_ir(foo)
|
|
169
|
+
removed = check_initial_ir(no_dce)
|
|
170
|
+
|
|
171
|
+
test_pipeline = Tester.mk_pipeline((types.intp,))
|
|
172
|
+
w_dce = test_pipeline.compile_to_ir(foo, DCE=True)
|
|
173
|
+
check_dce_ir(w_dce)
|
|
174
|
+
|
|
175
|
+
# check that the count of initial - removed = dce
|
|
176
|
+
self.assertEqual(
|
|
177
|
+
len(no_dce.blocks[0].body) - len(removed), len(w_dce.blocks[0].body)
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
def test_find_const_global(self):
|
|
181
|
+
"""
|
|
182
|
+
Test find_const() for values in globals (ir.Global) and freevars
|
|
183
|
+
(ir.FreeVar) that are considered constants for compilation.
|
|
184
|
+
"""
|
|
185
|
+
FREEVAR_C = 12
|
|
186
|
+
|
|
187
|
+
def foo(a):
|
|
188
|
+
b = GLOBAL_B
|
|
189
|
+
c = FREEVAR_C
|
|
190
|
+
return a + b + c
|
|
191
|
+
|
|
192
|
+
f_ir = compiler.run_frontend(foo)
|
|
193
|
+
block = f_ir.blocks[0]
|
|
194
|
+
const_b = None
|
|
195
|
+
const_c = None
|
|
196
|
+
|
|
197
|
+
for inst in block.body:
|
|
198
|
+
if isinstance(inst, ir.Assign) and inst.target.name == "b":
|
|
199
|
+
const_b = ir_utils.guard(ir_utils.find_const, f_ir, inst.target)
|
|
200
|
+
if isinstance(inst, ir.Assign) and inst.target.name == "c":
|
|
201
|
+
const_c = ir_utils.guard(ir_utils.find_const, f_ir, inst.target)
|
|
202
|
+
|
|
203
|
+
self.assertEqual(const_b, GLOBAL_B)
|
|
204
|
+
self.assertEqual(const_c, FREEVAR_C)
|
|
205
|
+
|
|
206
|
+
def test_flatten_labels(self):
|
|
207
|
+
"""tests flatten_labels"""
|
|
208
|
+
|
|
209
|
+
def foo(a):
|
|
210
|
+
acc = 0
|
|
211
|
+
if a > 3:
|
|
212
|
+
acc += 1
|
|
213
|
+
if a > 19:
|
|
214
|
+
return 53
|
|
215
|
+
elif a < 1000:
|
|
216
|
+
if a >= 12:
|
|
217
|
+
acc += 1
|
|
218
|
+
for x in range(10):
|
|
219
|
+
acc -= 1
|
|
220
|
+
if acc < 2:
|
|
221
|
+
break
|
|
222
|
+
else:
|
|
223
|
+
acc += 7
|
|
224
|
+
else:
|
|
225
|
+
raise ValueError("some string")
|
|
226
|
+
# prevents inline of return on py310
|
|
227
|
+
py310_defeat1 = 1 # noqa
|
|
228
|
+
py310_defeat2 = 2 # noqa
|
|
229
|
+
py310_defeat3 = 3 # noqa
|
|
230
|
+
py310_defeat4 = 4 # noqa
|
|
231
|
+
return acc
|
|
232
|
+
|
|
233
|
+
def bar(a):
|
|
234
|
+
acc = 0
|
|
235
|
+
z = 12
|
|
236
|
+
if a > 3:
|
|
237
|
+
acc += 1
|
|
238
|
+
z += 12
|
|
239
|
+
if a > 19:
|
|
240
|
+
z += 12
|
|
241
|
+
return 53
|
|
242
|
+
elif a < 1000:
|
|
243
|
+
if a >= 12:
|
|
244
|
+
z += 12
|
|
245
|
+
acc += 1
|
|
246
|
+
for x in range(10):
|
|
247
|
+
z += 12
|
|
248
|
+
acc -= 1
|
|
249
|
+
if acc < 2:
|
|
250
|
+
break
|
|
251
|
+
else:
|
|
252
|
+
z += 12
|
|
253
|
+
acc += 7
|
|
254
|
+
else:
|
|
255
|
+
raise ValueError("some string")
|
|
256
|
+
py310_defeat1 = 1 # noqa
|
|
257
|
+
py310_defeat2 = 2 # noqa
|
|
258
|
+
py310_defeat3 = 3 # noqa
|
|
259
|
+
py310_defeat4 = 4 # noqa
|
|
260
|
+
return acc
|
|
261
|
+
|
|
262
|
+
def baz(a):
|
|
263
|
+
acc = 0
|
|
264
|
+
if a > 3:
|
|
265
|
+
acc += 1
|
|
266
|
+
if a > 19:
|
|
267
|
+
return 53
|
|
268
|
+
else: # extra control flow in comparison to foo
|
|
269
|
+
return 55
|
|
270
|
+
elif a < 1000:
|
|
271
|
+
if a >= 12:
|
|
272
|
+
acc += 1
|
|
273
|
+
for x in range(10):
|
|
274
|
+
acc -= 1
|
|
275
|
+
if acc < 2:
|
|
276
|
+
break
|
|
277
|
+
else:
|
|
278
|
+
acc += 7
|
|
279
|
+
else:
|
|
280
|
+
raise ValueError("some string")
|
|
281
|
+
py310_defeat1 = 1 # noqa
|
|
282
|
+
py310_defeat2 = 2 # noqa
|
|
283
|
+
py310_defeat3 = 3 # noqa
|
|
284
|
+
py310_defeat4 = 4 # noqa
|
|
285
|
+
return acc
|
|
286
|
+
|
|
287
|
+
def get_flat_cfg(func):
|
|
288
|
+
func_ir = ir_utils.compile_to_numba_ir(func, dict())
|
|
289
|
+
flat_blocks = ir_utils.flatten_labels(func_ir.blocks)
|
|
290
|
+
self.assertEqual(max(flat_blocks.keys()) + 1, len(func_ir.blocks))
|
|
291
|
+
return ir_utils.compute_cfg_from_blocks(flat_blocks)
|
|
292
|
+
|
|
293
|
+
foo_cfg = get_flat_cfg(foo)
|
|
294
|
+
bar_cfg = get_flat_cfg(bar)
|
|
295
|
+
baz_cfg = get_flat_cfg(baz)
|
|
296
|
+
|
|
297
|
+
self.assertEqual(foo_cfg, bar_cfg)
|
|
298
|
+
self.assertNotEqual(foo_cfg, baz_cfg)
|
|
@@ -1,7 +1,10 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
from numba import cuda, float32, int32
|
|
2
5
|
from numba.core.errors import NumbaInvalidConfigWarning
|
|
3
6
|
from numba.cuda.testing import CUDATestCase, skip_on_cudasim
|
|
4
|
-
from numba.tests.support import ignore_internal_warnings
|
|
7
|
+
from numba.cuda.tests.support import ignore_internal_warnings
|
|
5
8
|
import re
|
|
6
9
|
import unittest
|
|
7
10
|
import warnings
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
from numba import float64, uint32
|
|
2
5
|
from numba.cuda.compiler import compile_ptx
|
|
3
6
|
from numba.cuda.testing import skip_on_cudasim, unittest
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import numpy as np
|
|
2
5
|
from numba.cuda.testing import (
|
|
3
6
|
unittest,
|
|
@@ -9,7 +12,7 @@ from numba.cuda.testing import (
|
|
|
9
12
|
from numba import cuda
|
|
10
13
|
from numba.core import types
|
|
11
14
|
from numba.core.types import f2, b1
|
|
12
|
-
from numba.
|
|
15
|
+
from numba.cuda.typing import signature
|
|
13
16
|
import operator
|
|
14
17
|
import itertools
|
|
15
18
|
from numba.np.numpy_support import from_dtype
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
from numba import cuda, njit, types, version_info
|
|
2
5
|
from numba.core.errors import TypingError
|
|
3
6
|
from numba.core.extending import overload, overload_attribute
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
from numba import cuda
|
|
2
5
|
from numba.core.errors import TypingError
|
|
3
6
|
from numba.cuda.testing import CUDATestCase, skip_on_cudasim
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import numpy as np
|
|
2
5
|
from numba import cuda, int32, float32
|
|
3
6
|
from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase
|
|
@@ -1,9 +1,12 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import functools
|
|
2
5
|
import numpy as np
|
|
3
6
|
import unittest
|
|
4
7
|
|
|
5
8
|
from numba import config, cuda, types
|
|
6
|
-
from numba.tests.support import TestCase
|
|
9
|
+
from numba.cuda.tests.support import TestCase
|
|
7
10
|
from numba.tests.test_ufuncs import BasicUFuncTest
|
|
8
11
|
|
|
9
12
|
|