numba-cuda 0.21.1__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +4 -0
- _numba_cuda_redirector.py +89 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +6 -0
- numba_cuda/_version.py +11 -0
- numba_cuda/numba/cuda/__init__.py +70 -0
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
- numba_cuda/numba/cuda/api.py +577 -0
- numba_cuda/numba/cuda/api_util.py +76 -0
- numba_cuda/numba/cuda/args.py +72 -0
- numba_cuda/numba/cuda/bf16.py +397 -0
- numba_cuda/numba/cuda/cache_hints.py +287 -0
- numba_cuda/numba/cuda/cext/__init__.py +2 -0
- numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
- numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
- numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
- numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
- numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
- numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
- numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
- numba_cuda/numba/cuda/cext/_typeof.h +19 -0
- numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
- numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
- numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
- numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
- numba_cuda/numba/cuda/cg.py +67 -0
- numba_cuda/numba/cuda/cgutils.py +1294 -0
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +541 -0
- numba_cuda/numba/cuda/compiler.py +1396 -0
- numba_cuda/numba/cuda/core/analysis.py +758 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
- numba_cuda/numba/cuda/core/base.py +1332 -0
- numba_cuda/numba/cuda/core/boxing.py +1411 -0
- numba_cuda/numba/cuda/core/bytecode.py +728 -0
- numba_cuda/numba/cuda/core/byteflow.py +2346 -0
- numba_cuda/numba/cuda/core/caching.py +744 -0
- numba_cuda/numba/cuda/core/callconv.py +392 -0
- numba_cuda/numba/cuda/core/codegen.py +171 -0
- numba_cuda/numba/cuda/core/compiler.py +199 -0
- numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +650 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/controlflow.py +989 -0
- numba_cuda/numba/cuda/core/entrypoints.py +57 -0
- numba_cuda/numba/cuda/core/environment.py +66 -0
- numba_cuda/numba/cuda/core/errors.py +917 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/generators.py +387 -0
- numba_cuda/numba/cuda/core/imputils.py +509 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
- numba_cuda/numba/cuda/core/interpreter.py +3617 -0
- numba_cuda/numba/cuda/core/ir.py +1812 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
- numba_cuda/numba/cuda/core/optional.py +129 -0
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
- numba_cuda/numba/cuda/core/registry.py +46 -0
- numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
- numba_cuda/numba/cuda/core/sigutils.py +68 -0
- numba_cuda/numba/cuda/core/ssa.py +498 -0
- numba_cuda/numba/cuda/core/targetconfig.py +330 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +956 -0
- numba_cuda/numba/cuda/core/typed_passes.py +867 -0
- numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
- numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
- numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
- numba_cuda/numba/cuda/cpython/iterators.py +167 -0
- numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
- numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
- numba_cuda/numba/cuda/cpython/slicing.py +322 -0
- numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
- numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
- numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
- numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
- numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
- numba_cuda/numba/cuda/cuda_paths.py +691 -0
- numba_cuda/numba/cuda/cudadecl.py +556 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
- numba_cuda/numba/cuda/cudadrv/error.py +48 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
- numba_cuda/numba/cuda/cudaimpl.py +995 -0
- numba_cuda/numba/cuda/cudamath.py +149 -0
- numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
- numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
- numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
- numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
- numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
- numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
- numba_cuda/numba/cuda/datamodel/manager.py +11 -0
- numba_cuda/numba/cuda/datamodel/models.py +9 -0
- numba_cuda/numba/cuda/datamodel/packer.py +9 -0
- numba_cuda/numba/cuda/datamodel/registry.py +11 -0
- numba_cuda/numba/cuda/datamodel/testing.py +11 -0
- numba_cuda/numba/cuda/debuginfo.py +903 -0
- numba_cuda/numba/cuda/decorators.py +294 -0
- numba_cuda/numba/cuda/descriptor.py +35 -0
- numba_cuda/numba/cuda/device_init.py +158 -0
- numba_cuda/numba/cuda/deviceufunc.py +1021 -0
- numba_cuda/numba/cuda/dispatcher.py +2463 -0
- numba_cuda/numba/cuda/errors.py +72 -0
- numba_cuda/numba/cuda/extending.py +697 -0
- numba_cuda/numba/cuda/flags.py +178 -0
- numba_cuda/numba/cuda/fp16.py +357 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +24 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
- numba_cuda/numba/cuda/intrinsics.py +382 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +265 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3386 -0
- numba_cuda/numba/cuda/libdevicedecl.py +20 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
- numba_cuda/numba/cuda/locks.py +19 -0
- numba_cuda/numba/cuda/lowering.py +1951 -0
- numba_cuda/numba/cuda/mathimpl.py +374 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
- numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
- numba_cuda/numba/cuda/misc/appdirs.py +594 -0
- numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
- numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
- numba_cuda/numba/cuda/misc/dump_style.py +41 -0
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
- numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
- numba_cuda/numba/cuda/misc/literal.py +28 -0
- numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
- numba_cuda/numba/cuda/misc/special.py +94 -0
- numba_cuda/numba/cuda/models.py +56 -0
- numba_cuda/numba/cuda/np/arraymath.py +5130 -0
- numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
- numba_cuda/numba/cuda/np/extensions.py +11 -0
- numba_cuda/numba/cuda/np/linalg.py +3087 -0
- numba_cuda/numba/cuda/np/math/__init__.py +0 -0
- numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
- numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
- numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
- numba_cuda/numba/cuda/np/npdatetime.py +969 -0
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
- numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
- numba_cuda/numba/cuda/np/numpy_support.py +798 -0
- numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
- numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
- numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
- numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
- numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
- numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
- numba_cuda/numba/cuda/nvvmutils.py +254 -0
- numba_cuda/numba/cuda/printimpl.py +126 -0
- numba_cuda/numba/cuda/random.py +308 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
- numba_cuda/numba/cuda/serialize.py +267 -0
- numba_cuda/numba/cuda/simulator/__init__.py +63 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +179 -0
- numba_cuda/numba/cuda/simulator/bf16.py +4 -0
- numba_cuda/numba/cuda/simulator/compiler.py +38 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
- numba_cuda/numba/cuda/simulator/kernel.py +320 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
- numba_cuda/numba/cuda/simulator/reduction.py +19 -0
- numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
- numba_cuda/numba/cuda/simulator_init.py +18 -0
- numba_cuda/numba/cuda/stubs.py +635 -0
- numba_cuda/numba/cuda/target.py +505 -0
- numba_cuda/numba/cuda/testing.py +347 -0
- numba_cuda/numba/cuda/tests/__init__.py +62 -0
- numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
- numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
- numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
- numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
- numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
- numba_cuda/numba/cuda/tests/data/error.cu +12 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
- numba_cuda/numba/cuda/tests/support.py +900 -0
- numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
- numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
- numba_cuda/numba/cuda/typeconv/rules.py +63 -0
- numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
- numba_cuda/numba/cuda/types/__init__.py +233 -0
- numba_cuda/numba/cuda/types/__init__.pyi +167 -0
- numba_cuda/numba/cuda/types/abstract.py +9 -0
- numba_cuda/numba/cuda/types/common.py +9 -0
- numba_cuda/numba/cuda/types/containers.py +9 -0
- numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
- numba_cuda/numba/cuda/types/cuda_common.py +110 -0
- numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
- numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
- numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
- numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
- numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
- numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
- numba_cuda/numba/cuda/types/ext_types.py +101 -0
- numba_cuda/numba/cuda/types/function_type.py +11 -0
- numba_cuda/numba/cuda/types/functions.py +9 -0
- numba_cuda/numba/cuda/types/iterators.py +9 -0
- numba_cuda/numba/cuda/types/misc.py +9 -0
- numba_cuda/numba/cuda/types/npytypes.py +9 -0
- numba_cuda/numba/cuda/types/scalars.py +9 -0
- numba_cuda/numba/cuda/typing/__init__.py +19 -0
- numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
- numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
- numba_cuda/numba/cuda/typing/bufproto.py +70 -0
- numba_cuda/numba/cuda/typing/builtins.py +1209 -0
- numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
- numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
- numba_cuda/numba/cuda/typing/collections.py +138 -0
- numba_cuda/numba/cuda/typing/context.py +782 -0
- numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
- numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/listdecl.py +147 -0
- numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
- numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
- numba_cuda/numba/cuda/typing/npydecl.py +749 -0
- numba_cuda/numba/cuda/typing/setdecl.py +115 -0
- numba_cuda/numba/cuda/typing/templates.py +1446 -0
- numba_cuda/numba/cuda/typing/typeof.py +301 -0
- numba_cuda/numba/cuda/ufuncs.py +746 -0
- numba_cuda/numba/cuda/utils.py +724 -0
- numba_cuda/numba/cuda/vector_types.py +214 -0
- numba_cuda/numba/cuda/vectorizers.py +260 -0
- numba_cuda-0.21.1.dist-info/METADATA +109 -0
- numba_cuda-0.21.1.dist-info/RECORD +488 -0
- numba_cuda-0.21.1.dist-info/WHEEL +5 -0
- numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
- numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
# CUDA built-in Vector Types
|
|
5
|
+
# https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#built-in-vector-types
|
|
6
|
+
|
|
7
|
+
from typing import List, Tuple, Dict
|
|
8
|
+
|
|
9
|
+
from numba.cuda import types
|
|
10
|
+
from numba.cuda import cgutils
|
|
11
|
+
from numba.cuda.datamodel import models
|
|
12
|
+
from numba.cuda.core.imputils import Registry as ImplRegistry
|
|
13
|
+
from numba.cuda.typing.templates import ConcreteTemplate
|
|
14
|
+
from numba.cuda.typing.templates import Registry as TypingRegistry
|
|
15
|
+
from numba.cuda.typing.templates import signature
|
|
16
|
+
from numba.cuda import stubs
|
|
17
|
+
from numba.cuda.errors import CudaLoweringError
|
|
18
|
+
from numba.cuda.extending import make_attribute_wrapper, register_model
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
typing_registry = TypingRegistry()
|
|
22
|
+
impl_registry = ImplRegistry()
|
|
23
|
+
|
|
24
|
+
register = typing_registry.register
|
|
25
|
+
register_attr = typing_registry.register_attr
|
|
26
|
+
register_global = typing_registry.register_global
|
|
27
|
+
lower = impl_registry.lower
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class VectorType(types.Type):
|
|
31
|
+
def __init__(self, name, base_type, attr_names, user_facing_object):
|
|
32
|
+
self._base_type = base_type
|
|
33
|
+
self._attr_names = attr_names
|
|
34
|
+
self._user_facing_object = user_facing_object
|
|
35
|
+
super().__init__(name=name)
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def base_type(self):
|
|
39
|
+
return self._base_type
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def attr_names(self):
|
|
43
|
+
return self._attr_names
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def num_elements(self):
|
|
47
|
+
return len(self._attr_names)
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def user_facing_object(self):
|
|
51
|
+
return self._user_facing_object
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def make_vector_type(
|
|
55
|
+
name: str,
|
|
56
|
+
base_type: types.Type,
|
|
57
|
+
attr_names: Tuple[str, ...],
|
|
58
|
+
user_facing_object,
|
|
59
|
+
) -> types.Type:
|
|
60
|
+
"""Create a vector type.
|
|
61
|
+
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
name: str
|
|
65
|
+
The name of the type.
|
|
66
|
+
base_type: numba.cuda.types.Type
|
|
67
|
+
The primitive type for each element in the vector.
|
|
68
|
+
attr_names: tuple of str
|
|
69
|
+
Name for each attribute.
|
|
70
|
+
user_facing_object: object
|
|
71
|
+
The handle to be used in cuda kernel.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
class _VectorType(VectorType):
|
|
75
|
+
"""Internal instantiation of VectorType."""
|
|
76
|
+
|
|
77
|
+
pass
|
|
78
|
+
|
|
79
|
+
class VectorTypeModel(models.StructModel):
|
|
80
|
+
def __init__(self, dmm, fe_type):
|
|
81
|
+
members = [(attr_name, base_type) for attr_name in attr_names]
|
|
82
|
+
super().__init__(dmm, fe_type, members)
|
|
83
|
+
|
|
84
|
+
vector_type = _VectorType(name, base_type, attr_names, user_facing_object)
|
|
85
|
+
register_model(_VectorType)(VectorTypeModel)
|
|
86
|
+
for attr_name in attr_names:
|
|
87
|
+
make_attribute_wrapper(_VectorType, attr_name, attr_name)
|
|
88
|
+
|
|
89
|
+
return vector_type
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def enable_vector_type_ctor(
|
|
93
|
+
vector_type: VectorType, overloads: List[List[types.Type]]
|
|
94
|
+
):
|
|
95
|
+
"""Create typing and lowering for vector type constructor.
|
|
96
|
+
|
|
97
|
+
Parameters
|
|
98
|
+
----------
|
|
99
|
+
vector_type: VectorType
|
|
100
|
+
The type whose constructor to type and lower.
|
|
101
|
+
overloads: List of argument types
|
|
102
|
+
A list containing different overloads of the constructor. Each base type
|
|
103
|
+
in the argument list should either be primitive type or VectorType.
|
|
104
|
+
"""
|
|
105
|
+
ctor = vector_type.user_facing_object
|
|
106
|
+
|
|
107
|
+
@register
|
|
108
|
+
class CtorTemplate(ConcreteTemplate):
|
|
109
|
+
key = ctor
|
|
110
|
+
cases = [signature(vector_type, *arglist) for arglist in overloads]
|
|
111
|
+
|
|
112
|
+
register_global(ctor, types.Function(CtorTemplate))
|
|
113
|
+
|
|
114
|
+
# Lowering
|
|
115
|
+
|
|
116
|
+
def make_lowering(fml_arg_list):
|
|
117
|
+
"""Meta function to create a lowering for the constructor. Flattens
|
|
118
|
+
the arguments by converting vector_type into load instructions for each
|
|
119
|
+
of its attributes. Such as float2 -> float2.x, float2.y.
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
def lowering(context, builder, sig, actual_args):
|
|
123
|
+
# A list of elements to assign from
|
|
124
|
+
source_list = []
|
|
125
|
+
# Convert the list of argument types to a list of load IRs.
|
|
126
|
+
for argidx, fml_arg in enumerate(fml_arg_list):
|
|
127
|
+
if isinstance(fml_arg, VectorType):
|
|
128
|
+
pxy = cgutils.create_struct_proxy(fml_arg)(
|
|
129
|
+
context, builder, actual_args[argidx]
|
|
130
|
+
)
|
|
131
|
+
source_list += [
|
|
132
|
+
getattr(pxy, attr) for attr in fml_arg.attr_names
|
|
133
|
+
]
|
|
134
|
+
else:
|
|
135
|
+
# assumed primitive type
|
|
136
|
+
source_list.append(actual_args[argidx])
|
|
137
|
+
|
|
138
|
+
if len(source_list) != vector_type.num_elements:
|
|
139
|
+
raise CudaLoweringError(
|
|
140
|
+
f"Unmatched number of source elements ({len(source_list)}) "
|
|
141
|
+
"and target elements ({vector_type.num_elements})."
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
out = cgutils.create_struct_proxy(vector_type)(context, builder)
|
|
145
|
+
|
|
146
|
+
for attr_name, source in zip(vector_type.attr_names, source_list):
|
|
147
|
+
setattr(out, attr_name, source)
|
|
148
|
+
return out._getvalue()
|
|
149
|
+
|
|
150
|
+
return lowering
|
|
151
|
+
|
|
152
|
+
for arglist in overloads:
|
|
153
|
+
lowering = make_lowering(arglist)
|
|
154
|
+
lower(ctor, *arglist)(lowering)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
vector_types: Dict[str, VectorType] = {}
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def build_constructor_overloads(base_type, vty_name, num_elements, arglists, l):
|
|
161
|
+
"""
|
|
162
|
+
For a given vector type, build a list of overloads for its constructor.
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
# TODO: speed up with memoization
|
|
166
|
+
if num_elements == 0:
|
|
167
|
+
arglists.append(l[:])
|
|
168
|
+
|
|
169
|
+
for i in range(1, num_elements + 1):
|
|
170
|
+
if i == 1:
|
|
171
|
+
# For 1-element component, it can construct with either a
|
|
172
|
+
# primitive type or other 1-element component.
|
|
173
|
+
l.append(base_type)
|
|
174
|
+
build_constructor_overloads(
|
|
175
|
+
base_type, vty_name, num_elements - i, arglists, l
|
|
176
|
+
)
|
|
177
|
+
l.pop(-1)
|
|
178
|
+
|
|
179
|
+
l.append(vector_types[f"{vty_name[:-1]}1"])
|
|
180
|
+
build_constructor_overloads(
|
|
181
|
+
base_type, vty_name, num_elements - i, arglists, l
|
|
182
|
+
)
|
|
183
|
+
l.pop(-1)
|
|
184
|
+
else:
|
|
185
|
+
l.append(vector_types[f"{vty_name[:-1]}{i}"])
|
|
186
|
+
build_constructor_overloads(
|
|
187
|
+
base_type, vty_name, num_elements - i, arglists, l
|
|
188
|
+
)
|
|
189
|
+
l.pop(-1)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _initialize():
|
|
193
|
+
"""
|
|
194
|
+
Construct the vector types, populate `vector_types` dictionary, and
|
|
195
|
+
enable the constructors.
|
|
196
|
+
"""
|
|
197
|
+
vector_type_attribute_names = ("x", "y", "z", "w")
|
|
198
|
+
for stub in stubs._vector_type_stubs:
|
|
199
|
+
type_name = stub.__name__
|
|
200
|
+
base_type = getattr(types, type_name[:-2])
|
|
201
|
+
num_elements = int(type_name[-1])
|
|
202
|
+
attributes = vector_type_attribute_names[:num_elements]
|
|
203
|
+
vector_type = make_vector_type(type_name, base_type, attributes, stub)
|
|
204
|
+
vector_types[type_name] = vector_type
|
|
205
|
+
|
|
206
|
+
for vty in vector_types.values():
|
|
207
|
+
arglists, l = [], []
|
|
208
|
+
build_constructor_overloads(
|
|
209
|
+
vty.base_type, vty.name, vty.num_elements, arglists, l
|
|
210
|
+
)
|
|
211
|
+
enable_vector_type_ctor(vty, arglists)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
_initialize()
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from numba import cuda
|
|
5
|
+
from numpy import array as np_array
|
|
6
|
+
from numba.cuda import deviceufunc
|
|
7
|
+
from numba.cuda.deviceufunc import (
|
|
8
|
+
UFuncMechanism,
|
|
9
|
+
GeneralizedUFunc,
|
|
10
|
+
GUFuncCallSteps,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class CUDAUFuncDispatcher(object):
|
|
15
|
+
"""
|
|
16
|
+
Invoke the CUDA ufunc specialization for the given inputs.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, types_to_retty_kernels, pyfunc):
|
|
20
|
+
self.functions = types_to_retty_kernels
|
|
21
|
+
self.__name__ = pyfunc.__name__
|
|
22
|
+
|
|
23
|
+
def __call__(self, *args, **kws):
|
|
24
|
+
"""
|
|
25
|
+
*args: numpy arrays or DeviceArrayBase (created by cuda.to_device).
|
|
26
|
+
Cannot mix the two types in one call.
|
|
27
|
+
|
|
28
|
+
**kws:
|
|
29
|
+
stream -- cuda stream; when defined, asynchronous mode is used.
|
|
30
|
+
out -- output array. Can be a numpy array or DeviceArrayBase
|
|
31
|
+
depending on the input arguments. Type must match
|
|
32
|
+
the input arguments.
|
|
33
|
+
"""
|
|
34
|
+
return CUDAUFuncMechanism.call(self.functions, args, kws)
|
|
35
|
+
|
|
36
|
+
def reduce(self, arg, stream=0):
|
|
37
|
+
assert len(list(self.functions.keys())[0]) == 2, (
|
|
38
|
+
"must be a binary ufunc"
|
|
39
|
+
)
|
|
40
|
+
assert arg.ndim == 1, "must use 1d array"
|
|
41
|
+
|
|
42
|
+
n = arg.shape[0]
|
|
43
|
+
gpu_mems = []
|
|
44
|
+
|
|
45
|
+
if n == 0:
|
|
46
|
+
raise TypeError("Reduction on an empty array.")
|
|
47
|
+
elif n == 1: # nothing to do
|
|
48
|
+
return arg[0]
|
|
49
|
+
|
|
50
|
+
# always use a stream
|
|
51
|
+
stream = stream or cuda.stream()
|
|
52
|
+
with stream.auto_synchronize():
|
|
53
|
+
# transfer memory to device if necessary
|
|
54
|
+
if cuda.cudadrv.devicearray.is_cuda_ndarray(arg):
|
|
55
|
+
mem = arg
|
|
56
|
+
else:
|
|
57
|
+
mem = cuda.to_device(arg, stream)
|
|
58
|
+
# do reduction
|
|
59
|
+
out = self.__reduce(mem, gpu_mems, stream)
|
|
60
|
+
# use a small buffer to store the result element
|
|
61
|
+
buf = np_array((1,), dtype=arg.dtype)
|
|
62
|
+
out.copy_to_host(buf, stream=stream)
|
|
63
|
+
|
|
64
|
+
return buf[0]
|
|
65
|
+
|
|
66
|
+
def __reduce(self, mem, gpu_mems, stream):
|
|
67
|
+
n = mem.shape[0]
|
|
68
|
+
if n % 2 != 0: # odd?
|
|
69
|
+
fatcut, thincut = mem.split(n - 1)
|
|
70
|
+
# prevent freeing during async mode
|
|
71
|
+
gpu_mems.append(fatcut)
|
|
72
|
+
gpu_mems.append(thincut)
|
|
73
|
+
# execute the kernel
|
|
74
|
+
out = self.__reduce(fatcut, gpu_mems, stream)
|
|
75
|
+
gpu_mems.append(out)
|
|
76
|
+
return self(out, thincut, out=out, stream=stream)
|
|
77
|
+
else: # even?
|
|
78
|
+
left, right = mem.split(n // 2)
|
|
79
|
+
# prevent freeing during async mode
|
|
80
|
+
gpu_mems.append(left)
|
|
81
|
+
gpu_mems.append(right)
|
|
82
|
+
# execute the kernel
|
|
83
|
+
self(left, right, out=left, stream=stream)
|
|
84
|
+
if n // 2 > 1:
|
|
85
|
+
return self.__reduce(left, gpu_mems, stream)
|
|
86
|
+
else:
|
|
87
|
+
return left
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class _CUDAGUFuncCallSteps(GUFuncCallSteps):
|
|
91
|
+
__slots__ = [
|
|
92
|
+
"_stream",
|
|
93
|
+
]
|
|
94
|
+
|
|
95
|
+
def __init__(self, nin, nout, args, kwargs):
|
|
96
|
+
super().__init__(nin, nout, args, kwargs)
|
|
97
|
+
self._stream = kwargs.get("stream", 0)
|
|
98
|
+
|
|
99
|
+
def is_device_array(self, obj):
|
|
100
|
+
return cuda.is_cuda_array(obj)
|
|
101
|
+
|
|
102
|
+
def as_device_array(self, obj):
|
|
103
|
+
# We don't want to call as_cuda_array on objects that are already Numba
|
|
104
|
+
# device arrays, because this results in exporting the array as a
|
|
105
|
+
# Producer then importing it as a Consumer, which causes a
|
|
106
|
+
# synchronization on the array's stream (if it has one) by default.
|
|
107
|
+
# When we have a Numba device array, we can simply return it.
|
|
108
|
+
if cuda.cudadrv.devicearray.is_cuda_ndarray(obj):
|
|
109
|
+
return obj
|
|
110
|
+
return cuda.as_cuda_array(obj)
|
|
111
|
+
|
|
112
|
+
def to_device(self, hostary):
|
|
113
|
+
return cuda.to_device(hostary, stream=self._stream)
|
|
114
|
+
|
|
115
|
+
def to_host(self, devary, hostary):
|
|
116
|
+
out = devary.copy_to_host(hostary, stream=self._stream)
|
|
117
|
+
return out
|
|
118
|
+
|
|
119
|
+
def allocate_device_array(self, shape, dtype):
|
|
120
|
+
return cuda.device_array(shape=shape, dtype=dtype, stream=self._stream)
|
|
121
|
+
|
|
122
|
+
def launch_kernel(self, kernel, nelem, args):
|
|
123
|
+
kernel.forall(nelem, stream=self._stream)(*args)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class CUDAGeneralizedUFunc(GeneralizedUFunc):
|
|
127
|
+
def __init__(self, kernelmap, engine, pyfunc):
|
|
128
|
+
self.__name__ = pyfunc.__name__
|
|
129
|
+
super().__init__(kernelmap, engine)
|
|
130
|
+
|
|
131
|
+
@property
|
|
132
|
+
def _call_steps(self):
|
|
133
|
+
return _CUDAGUFuncCallSteps
|
|
134
|
+
|
|
135
|
+
def _broadcast_scalar_input(self, ary, shape):
|
|
136
|
+
return cuda.cudadrv.devicearray.DeviceNDArray(
|
|
137
|
+
shape=shape, strides=(0,), dtype=ary.dtype, gpu_data=ary.gpu_data
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
def _broadcast_add_axis(self, ary, newshape):
|
|
141
|
+
newax = len(newshape) - len(ary.shape)
|
|
142
|
+
# Add 0 strides for missing dimension
|
|
143
|
+
newstrides = (0,) * newax + ary.strides
|
|
144
|
+
return cuda.cudadrv.devicearray.DeviceNDArray(
|
|
145
|
+
shape=newshape,
|
|
146
|
+
strides=newstrides,
|
|
147
|
+
dtype=ary.dtype,
|
|
148
|
+
gpu_data=ary.gpu_data,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class CUDAUFuncMechanism(UFuncMechanism):
|
|
153
|
+
"""
|
|
154
|
+
Provide CUDA specialization
|
|
155
|
+
"""
|
|
156
|
+
|
|
157
|
+
DEFAULT_STREAM = 0
|
|
158
|
+
|
|
159
|
+
def launch(self, func, count, stream, args):
|
|
160
|
+
func.forall(count, stream=stream)(*args)
|
|
161
|
+
|
|
162
|
+
def is_device_array(self, obj):
|
|
163
|
+
return cuda.is_cuda_array(obj)
|
|
164
|
+
|
|
165
|
+
def as_device_array(self, obj):
|
|
166
|
+
# We don't want to call as_cuda_array on objects that are already Numba
|
|
167
|
+
# device arrays, because this results in exporting the array as a
|
|
168
|
+
# Producer then importing it as a Consumer, which causes a
|
|
169
|
+
# synchronization on the array's stream (if it has one) by default.
|
|
170
|
+
# When we have a Numba device array, we can simply return it.
|
|
171
|
+
if cuda.cudadrv.devicearray.is_cuda_ndarray(obj):
|
|
172
|
+
return obj
|
|
173
|
+
return cuda.as_cuda_array(obj)
|
|
174
|
+
|
|
175
|
+
def to_device(self, hostary, stream):
|
|
176
|
+
return cuda.to_device(hostary, stream=stream)
|
|
177
|
+
|
|
178
|
+
def to_host(self, devary, stream):
|
|
179
|
+
return devary.copy_to_host(stream=stream)
|
|
180
|
+
|
|
181
|
+
def allocate_device_array(self, shape, dtype, stream):
|
|
182
|
+
return cuda.device_array(shape=shape, dtype=dtype, stream=stream)
|
|
183
|
+
|
|
184
|
+
def broadcast_device(self, ary, shape):
|
|
185
|
+
ax_differs = [
|
|
186
|
+
ax
|
|
187
|
+
for ax in range(len(shape))
|
|
188
|
+
if ax >= ary.ndim or ary.shape[ax] != shape[ax]
|
|
189
|
+
]
|
|
190
|
+
|
|
191
|
+
missingdim = len(shape) - len(ary.shape)
|
|
192
|
+
strides = [0] * missingdim + list(ary.strides)
|
|
193
|
+
|
|
194
|
+
for ax in ax_differs:
|
|
195
|
+
strides[ax] = 0
|
|
196
|
+
|
|
197
|
+
return cuda.cudadrv.devicearray.DeviceNDArray(
|
|
198
|
+
shape=shape, strides=strides, dtype=ary.dtype, gpu_data=ary.gpu_data
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
vectorizer_stager_source = """
|
|
203
|
+
def __vectorized_{name}({args}, __out__):
|
|
204
|
+
__tid__ = __cuda__.grid(1)
|
|
205
|
+
if __tid__ < __out__.shape[0]:
|
|
206
|
+
__out__[__tid__] = __core__({argitems})
|
|
207
|
+
"""
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
class CUDAVectorize(deviceufunc.DeviceVectorize):
|
|
211
|
+
def _compile_core(self, sig):
|
|
212
|
+
cudevfn = cuda.jit(sig, device=True, inline="always")(self.pyfunc)
|
|
213
|
+
return cudevfn, cudevfn.overloads[sig.args].signature.return_type
|
|
214
|
+
|
|
215
|
+
def _get_globals(self, corefn):
|
|
216
|
+
glbl = self.pyfunc.__globals__.copy()
|
|
217
|
+
glbl.update({"__cuda__": cuda, "__core__": corefn})
|
|
218
|
+
return glbl
|
|
219
|
+
|
|
220
|
+
def _compile_kernel(self, fnobj, sig):
|
|
221
|
+
return cuda.jit(fnobj)
|
|
222
|
+
|
|
223
|
+
def build_ufunc(self):
|
|
224
|
+
return CUDAUFuncDispatcher(self.kernelmap, self.pyfunc)
|
|
225
|
+
|
|
226
|
+
@property
|
|
227
|
+
def _kernel_template(self):
|
|
228
|
+
return vectorizer_stager_source
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
# ------------------------------------------------------------------------------
|
|
232
|
+
# Generalized CUDA ufuncs
|
|
233
|
+
|
|
234
|
+
_gufunc_stager_source = """
|
|
235
|
+
def __gufunc_{name}({args}):
|
|
236
|
+
__tid__ = __cuda__.grid(1)
|
|
237
|
+
if __tid__ < {checkedarg}:
|
|
238
|
+
__core__({argitems})
|
|
239
|
+
"""
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
class CUDAGUFuncVectorize(deviceufunc.DeviceGUFuncVectorize):
|
|
243
|
+
def build_ufunc(self):
|
|
244
|
+
engine = deviceufunc.GUFuncEngine(self.inputsig, self.outputsig)
|
|
245
|
+
return CUDAGeneralizedUFunc(
|
|
246
|
+
kernelmap=self.kernelmap, engine=engine, pyfunc=self.pyfunc
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
def _compile_kernel(self, fnobj, sig):
|
|
250
|
+
return cuda.jit(sig)(fnobj)
|
|
251
|
+
|
|
252
|
+
@property
|
|
253
|
+
def _kernel_template(self):
|
|
254
|
+
return _gufunc_stager_source
|
|
255
|
+
|
|
256
|
+
def _get_globals(self, sig):
|
|
257
|
+
corefn = cuda.jit(sig, device=True)(self.pyfunc)
|
|
258
|
+
glbls = self.py_func.__globals__.copy()
|
|
259
|
+
glbls.update({"__cuda__": cuda, "__core__": corefn})
|
|
260
|
+
return glbls
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: numba-cuda
|
|
3
|
+
Version: 0.21.1
|
|
4
|
+
Summary: CUDA target for Numba
|
|
5
|
+
Author: Anaconda Inc., NVIDIA Corporation
|
|
6
|
+
License-Expression: BSD-2-Clause
|
|
7
|
+
Project-URL: Homepage, https://nvidia.github.io/numba-cuda/
|
|
8
|
+
Project-URL: Documentation, https://nvidia.github.io/numba-cuda/
|
|
9
|
+
Project-URL: Repository, https://github.com/NVIDIA/numba-cuda
|
|
10
|
+
Project-URL: License, https://github.com/NVIDIA/numba-cuda/blob/main/LICENSE
|
|
11
|
+
Project-URL: Issues, https://github.com/NVIDIA/numba-cuda/issues
|
|
12
|
+
Requires-Python: >=3.9
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
License-File: LICENSE
|
|
15
|
+
License-File: LICENSE.numba
|
|
16
|
+
Requires-Dist: numba>=0.60.0
|
|
17
|
+
Requires-Dist: cuda-bindings<14.0.0,>=12.9.1
|
|
18
|
+
Requires-Dist: cuda-core<1.0.0,>=0.3.2
|
|
19
|
+
Provides-Extra: cu12
|
|
20
|
+
Requires-Dist: cuda-bindings<13.0.0,>=12.9.1; extra == "cu12"
|
|
21
|
+
Requires-Dist: cuda-core<1.0.0,>=0.3.0; extra == "cu12"
|
|
22
|
+
Requires-Dist: cuda-python==12.9.*; extra == "cu12"
|
|
23
|
+
Requires-Dist: nvidia-cuda-nvcc-cu12; extra == "cu12"
|
|
24
|
+
Requires-Dist: nvidia-cuda-runtime-cu12; extra == "cu12"
|
|
25
|
+
Requires-Dist: nvidia-cuda-nvrtc-cu12; extra == "cu12"
|
|
26
|
+
Requires-Dist: nvidia-nvjitlink-cu12; extra == "cu12"
|
|
27
|
+
Requires-Dist: nvidia-cuda-cccl-cu12; extra == "cu12"
|
|
28
|
+
Provides-Extra: cu13
|
|
29
|
+
Requires-Dist: cuda-bindings==13.*; extra == "cu13"
|
|
30
|
+
Requires-Dist: cuda-core<1.0.0,>=0.3.2; extra == "cu13"
|
|
31
|
+
Requires-Dist: cuda-python==13.*; extra == "cu13"
|
|
32
|
+
Requires-Dist: nvidia-nvvm==13.*; extra == "cu13"
|
|
33
|
+
Requires-Dist: nvidia-cuda-runtime==13.*; extra == "cu13"
|
|
34
|
+
Requires-Dist: nvidia-cuda-nvrtc==13.*; extra == "cu13"
|
|
35
|
+
Requires-Dist: nvidia-nvjitlink==13.*; extra == "cu13"
|
|
36
|
+
Requires-Dist: nvidia-cuda-cccl==13.*; extra == "cu13"
|
|
37
|
+
Dynamic: license-file
|
|
38
|
+
|
|
39
|
+
<div align="center"><img src="docs/source/_static/numba-green-icon-rgb.svg" width="200"/></div>
|
|
40
|
+
|
|
41
|
+
# Numba CUDA Target
|
|
42
|
+
|
|
43
|
+
The CUDA target for Numba. Please visit the [official
|
|
44
|
+
documentation](https://nvidia.github.io/numba-cuda) to get started!
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
To report issues or file feature requests, please use the [issue
|
|
48
|
+
tracker](https://github.com/NVIDIA/numba-cuda/issues).
|
|
49
|
+
|
|
50
|
+
To raise questions or initiate discussions, please use the [Numba Discourse
|
|
51
|
+
forum](https://numba.discourse.group).
|
|
52
|
+
|
|
53
|
+
## Installation with pip or conda
|
|
54
|
+
|
|
55
|
+
Please refer to the [Installation documentation](https://nvidia.github.io/numba-cuda/user/installation.html#installation-with-a-python-package-manager).
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
## Installation from source
|
|
59
|
+
|
|
60
|
+
Install as an editable install:
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
pip install -e .
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
If you want to manage all run-time dependencies yourself, also pass the `--no-deps` flag.
|
|
67
|
+
|
|
68
|
+
## Running tests
|
|
69
|
+
|
|
70
|
+
Tests must be run from the `testing` folder, which contains the pytest
|
|
71
|
+
configuration and code to generate binaries used during the tests. The test
|
|
72
|
+
binaries need to be built on the system on which the tests are run, so that
|
|
73
|
+
they are compiled for the appropriate compute capability.
|
|
74
|
+
|
|
75
|
+
```
|
|
76
|
+
cd testing
|
|
77
|
+
# Optionally, build test binaries and point to their location for the test suite
|
|
78
|
+
make -j $(nproc)
|
|
79
|
+
export NUMBA_CUDA_TEST_BIN_DIR=`pwd`
|
|
80
|
+
# Execute tests
|
|
81
|
+
pytest -n auto -v --dist loadscope
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Alternatively, you can use [pixi](https://pixi.sh/latest/installation/) to wrap all of that up for you:
|
|
85
|
+
|
|
86
|
+
```
|
|
87
|
+
# run tests against CUDA 13
|
|
88
|
+
pixi run -e cu13 test -n auto -v --dist loadscope
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
Testing should discover the `numba.cuda` module from the `numba_cuda` package. You
|
|
93
|
+
can check where `numba.cuda` files are being located by running
|
|
94
|
+
|
|
95
|
+
```
|
|
96
|
+
python -c "from numba import cuda; print(cuda.__file__)"
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
which will show a path like:
|
|
100
|
+
|
|
101
|
+
```
|
|
102
|
+
<path to numba-cuda repo>/numba_cuda/numba/cuda/__init__.py
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Contributing Guide
|
|
106
|
+
|
|
107
|
+
Review the
|
|
108
|
+
[CONTRIBUTING.md](https://github.com/NVIDIA/numba-cuda/blob/main/CONTRIBUTING.md)
|
|
109
|
+
file for information on how to contribute code and issues to the project.
|