numba-cuda 0.21.1__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +4 -0
- _numba_cuda_redirector.py +89 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +6 -0
- numba_cuda/_version.py +11 -0
- numba_cuda/numba/cuda/__init__.py +70 -0
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
- numba_cuda/numba/cuda/api.py +577 -0
- numba_cuda/numba/cuda/api_util.py +76 -0
- numba_cuda/numba/cuda/args.py +72 -0
- numba_cuda/numba/cuda/bf16.py +397 -0
- numba_cuda/numba/cuda/cache_hints.py +287 -0
- numba_cuda/numba/cuda/cext/__init__.py +2 -0
- numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
- numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
- numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
- numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
- numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
- numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
- numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
- numba_cuda/numba/cuda/cext/_typeof.h +19 -0
- numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
- numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
- numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
- numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
- numba_cuda/numba/cuda/cg.py +67 -0
- numba_cuda/numba/cuda/cgutils.py +1294 -0
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +541 -0
- numba_cuda/numba/cuda/compiler.py +1396 -0
- numba_cuda/numba/cuda/core/analysis.py +758 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
- numba_cuda/numba/cuda/core/base.py +1332 -0
- numba_cuda/numba/cuda/core/boxing.py +1411 -0
- numba_cuda/numba/cuda/core/bytecode.py +728 -0
- numba_cuda/numba/cuda/core/byteflow.py +2346 -0
- numba_cuda/numba/cuda/core/caching.py +744 -0
- numba_cuda/numba/cuda/core/callconv.py +392 -0
- numba_cuda/numba/cuda/core/codegen.py +171 -0
- numba_cuda/numba/cuda/core/compiler.py +199 -0
- numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +650 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/controlflow.py +989 -0
- numba_cuda/numba/cuda/core/entrypoints.py +57 -0
- numba_cuda/numba/cuda/core/environment.py +66 -0
- numba_cuda/numba/cuda/core/errors.py +917 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/generators.py +387 -0
- numba_cuda/numba/cuda/core/imputils.py +509 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
- numba_cuda/numba/cuda/core/interpreter.py +3617 -0
- numba_cuda/numba/cuda/core/ir.py +1812 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
- numba_cuda/numba/cuda/core/optional.py +129 -0
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
- numba_cuda/numba/cuda/core/registry.py +46 -0
- numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
- numba_cuda/numba/cuda/core/sigutils.py +68 -0
- numba_cuda/numba/cuda/core/ssa.py +498 -0
- numba_cuda/numba/cuda/core/targetconfig.py +330 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +956 -0
- numba_cuda/numba/cuda/core/typed_passes.py +867 -0
- numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
- numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
- numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
- numba_cuda/numba/cuda/cpython/iterators.py +167 -0
- numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
- numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
- numba_cuda/numba/cuda/cpython/slicing.py +322 -0
- numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
- numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
- numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
- numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
- numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
- numba_cuda/numba/cuda/cuda_paths.py +691 -0
- numba_cuda/numba/cuda/cudadecl.py +556 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
- numba_cuda/numba/cuda/cudadrv/error.py +48 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
- numba_cuda/numba/cuda/cudaimpl.py +995 -0
- numba_cuda/numba/cuda/cudamath.py +149 -0
- numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
- numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
- numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
- numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
- numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
- numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
- numba_cuda/numba/cuda/datamodel/manager.py +11 -0
- numba_cuda/numba/cuda/datamodel/models.py +9 -0
- numba_cuda/numba/cuda/datamodel/packer.py +9 -0
- numba_cuda/numba/cuda/datamodel/registry.py +11 -0
- numba_cuda/numba/cuda/datamodel/testing.py +11 -0
- numba_cuda/numba/cuda/debuginfo.py +903 -0
- numba_cuda/numba/cuda/decorators.py +294 -0
- numba_cuda/numba/cuda/descriptor.py +35 -0
- numba_cuda/numba/cuda/device_init.py +158 -0
- numba_cuda/numba/cuda/deviceufunc.py +1021 -0
- numba_cuda/numba/cuda/dispatcher.py +2463 -0
- numba_cuda/numba/cuda/errors.py +72 -0
- numba_cuda/numba/cuda/extending.py +697 -0
- numba_cuda/numba/cuda/flags.py +178 -0
- numba_cuda/numba/cuda/fp16.py +357 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +24 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
- numba_cuda/numba/cuda/intrinsics.py +382 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +265 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3386 -0
- numba_cuda/numba/cuda/libdevicedecl.py +20 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
- numba_cuda/numba/cuda/locks.py +19 -0
- numba_cuda/numba/cuda/lowering.py +1951 -0
- numba_cuda/numba/cuda/mathimpl.py +374 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
- numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
- numba_cuda/numba/cuda/misc/appdirs.py +594 -0
- numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
- numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
- numba_cuda/numba/cuda/misc/dump_style.py +41 -0
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
- numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
- numba_cuda/numba/cuda/misc/literal.py +28 -0
- numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
- numba_cuda/numba/cuda/misc/special.py +94 -0
- numba_cuda/numba/cuda/models.py +56 -0
- numba_cuda/numba/cuda/np/arraymath.py +5130 -0
- numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
- numba_cuda/numba/cuda/np/extensions.py +11 -0
- numba_cuda/numba/cuda/np/linalg.py +3087 -0
- numba_cuda/numba/cuda/np/math/__init__.py +0 -0
- numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
- numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
- numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
- numba_cuda/numba/cuda/np/npdatetime.py +969 -0
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
- numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
- numba_cuda/numba/cuda/np/numpy_support.py +798 -0
- numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
- numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
- numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
- numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
- numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
- numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
- numba_cuda/numba/cuda/nvvmutils.py +254 -0
- numba_cuda/numba/cuda/printimpl.py +126 -0
- numba_cuda/numba/cuda/random.py +308 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
- numba_cuda/numba/cuda/serialize.py +267 -0
- numba_cuda/numba/cuda/simulator/__init__.py +63 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +179 -0
- numba_cuda/numba/cuda/simulator/bf16.py +4 -0
- numba_cuda/numba/cuda/simulator/compiler.py +38 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
- numba_cuda/numba/cuda/simulator/kernel.py +320 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
- numba_cuda/numba/cuda/simulator/reduction.py +19 -0
- numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
- numba_cuda/numba/cuda/simulator_init.py +18 -0
- numba_cuda/numba/cuda/stubs.py +635 -0
- numba_cuda/numba/cuda/target.py +505 -0
- numba_cuda/numba/cuda/testing.py +347 -0
- numba_cuda/numba/cuda/tests/__init__.py +62 -0
- numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
- numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
- numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
- numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
- numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
- numba_cuda/numba/cuda/tests/data/error.cu +12 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
- numba_cuda/numba/cuda/tests/support.py +900 -0
- numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
- numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
- numba_cuda/numba/cuda/typeconv/rules.py +63 -0
- numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
- numba_cuda/numba/cuda/types/__init__.py +233 -0
- numba_cuda/numba/cuda/types/__init__.pyi +167 -0
- numba_cuda/numba/cuda/types/abstract.py +9 -0
- numba_cuda/numba/cuda/types/common.py +9 -0
- numba_cuda/numba/cuda/types/containers.py +9 -0
- numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
- numba_cuda/numba/cuda/types/cuda_common.py +110 -0
- numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
- numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
- numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
- numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
- numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
- numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
- numba_cuda/numba/cuda/types/ext_types.py +101 -0
- numba_cuda/numba/cuda/types/function_type.py +11 -0
- numba_cuda/numba/cuda/types/functions.py +9 -0
- numba_cuda/numba/cuda/types/iterators.py +9 -0
- numba_cuda/numba/cuda/types/misc.py +9 -0
- numba_cuda/numba/cuda/types/npytypes.py +9 -0
- numba_cuda/numba/cuda/types/scalars.py +9 -0
- numba_cuda/numba/cuda/typing/__init__.py +19 -0
- numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
- numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
- numba_cuda/numba/cuda/typing/bufproto.py +70 -0
- numba_cuda/numba/cuda/typing/builtins.py +1209 -0
- numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
- numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
- numba_cuda/numba/cuda/typing/collections.py +138 -0
- numba_cuda/numba/cuda/typing/context.py +782 -0
- numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
- numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/listdecl.py +147 -0
- numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
- numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
- numba_cuda/numba/cuda/typing/npydecl.py +749 -0
- numba_cuda/numba/cuda/typing/setdecl.py +115 -0
- numba_cuda/numba/cuda/typing/templates.py +1446 -0
- numba_cuda/numba/cuda/typing/typeof.py +301 -0
- numba_cuda/numba/cuda/ufuncs.py +746 -0
- numba_cuda/numba/cuda/utils.py +724 -0
- numba_cuda/numba/cuda/vector_types.py +214 -0
- numba_cuda/numba/cuda/vectorizers.py +260 -0
- numba_cuda-0.21.1.dist-info/METADATA +109 -0
- numba_cuda-0.21.1.dist-info/RECORD +488 -0
- numba_cuda-0.21.1.dist-info/WHEEL +5 -0
- numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
- numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from warnings import warn
|
|
5
|
+
from numba.cuda import types
|
|
6
|
+
from numba.cuda.core.errors import DeprecationError, NumbaInvalidConfigWarning
|
|
7
|
+
from numba.cuda.compiler import declare_device_function
|
|
8
|
+
from numba.cuda.core import sigutils, config
|
|
9
|
+
from numba.cuda.dispatcher import CUDADispatcher
|
|
10
|
+
from numba.cuda.simulator.kernel import FakeCUDAKernel
|
|
11
|
+
from numba.cuda.cudadrv.driver import _have_nvjitlink
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
_msg_deprecated_signature_arg = (
|
|
15
|
+
"Deprecated keyword argument `{0}`. "
|
|
16
|
+
"Signatures should be passed as the first "
|
|
17
|
+
"positional argument."
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def jit(
|
|
22
|
+
func_or_sig=None,
|
|
23
|
+
device=False,
|
|
24
|
+
inline="never",
|
|
25
|
+
forceinline=False,
|
|
26
|
+
link=[],
|
|
27
|
+
debug=None,
|
|
28
|
+
opt=None,
|
|
29
|
+
lineinfo=False,
|
|
30
|
+
cache=False,
|
|
31
|
+
launch_bounds=None,
|
|
32
|
+
lto=None,
|
|
33
|
+
**kws,
|
|
34
|
+
):
|
|
35
|
+
"""
|
|
36
|
+
JIT compile a Python function for CUDA GPUs.
|
|
37
|
+
|
|
38
|
+
:param func_or_sig: A function to JIT compile, or *signatures* of a
|
|
39
|
+
function to compile. If a function is supplied, then a
|
|
40
|
+
:class:`Dispatcher <numba.cuda.dispatcher.CUDADispatcher>` is returned.
|
|
41
|
+
Otherwise, ``func_or_sig`` may be a signature or a list of signatures,
|
|
42
|
+
and a function is returned. The returned function accepts another
|
|
43
|
+
function, which it will compile and then return a :class:`Dispatcher
|
|
44
|
+
<numba.cuda.dispatcher.CUDADispatcher>`. See :ref:`jit-decorator` for
|
|
45
|
+
more information about passing signatures.
|
|
46
|
+
|
|
47
|
+
.. note:: A kernel cannot have any return value.
|
|
48
|
+
:param device: Indicates whether this is a device function.
|
|
49
|
+
:type device: bool
|
|
50
|
+
:param inline: Enables inlining at the Numba IR level when set to
|
|
51
|
+
``"always"``. See `Notes on Inlining
|
|
52
|
+
<https://numba.readthedocs.io/en/stable/developer/inlining.html>`_.
|
|
53
|
+
:type inline: str
|
|
54
|
+
:param forceinline: Enables inlining at the NVVM IR level when set to
|
|
55
|
+
``True``. This is accomplished by adding the ``alwaysinline`` function
|
|
56
|
+
attribute to the function definition.
|
|
57
|
+
:type forceinline: bool
|
|
58
|
+
:param link: A list of files containing PTX or CUDA C/C++ source to link
|
|
59
|
+
with the function
|
|
60
|
+
:type link: list
|
|
61
|
+
:param debug: If True, check for exceptions thrown when executing the
|
|
62
|
+
kernel. Since this degrades performance, this should only be used for
|
|
63
|
+
debugging purposes. If set to True, then ``opt`` should be set to False.
|
|
64
|
+
Defaults to False. (The default value can be overridden by setting
|
|
65
|
+
environment variable ``NUMBA_CUDA_DEBUGINFO=1``.)
|
|
66
|
+
:param fastmath: When True, enables fastmath optimizations as outlined in
|
|
67
|
+
the :ref:`CUDA Fast Math documentation <cuda-fast-math>`.
|
|
68
|
+
:param max_registers: Request that the kernel is limited to using at most
|
|
69
|
+
this number of registers per thread. The limit may not be respected if
|
|
70
|
+
the ABI requires a greater number of registers than that requested.
|
|
71
|
+
Useful for increasing occupancy.
|
|
72
|
+
:param opt: Whether to compile with optimization enabled. If unspecified,
|
|
73
|
+
the OPT configuration variable is decided by ``NUMBA_OPT```; all
|
|
74
|
+
non-zero values will enable optimization.
|
|
75
|
+
:type opt: bool
|
|
76
|
+
:param lineinfo: If True, generate a line mapping between source code and
|
|
77
|
+
assembly code. This enables inspection of the source code in NVIDIA
|
|
78
|
+
profiling tools and correlation with program counter sampling.
|
|
79
|
+
:type lineinfo: bool
|
|
80
|
+
:param cache: If True, enables the file-based cache for this function.
|
|
81
|
+
:type cache: bool
|
|
82
|
+
:param launch_bounds: Kernel launch bounds, specified as a scalar or a tuple
|
|
83
|
+
of between one and three items. Tuple items provide:
|
|
84
|
+
|
|
85
|
+
- The maximum number of threads per block,
|
|
86
|
+
- The minimum number of blocks per SM,
|
|
87
|
+
- The maximum number of blocks per cluster.
|
|
88
|
+
|
|
89
|
+
If a scalar is provided, it is used as the maximum
|
|
90
|
+
number of threads per block.
|
|
91
|
+
:type launch_bounds: int | tuple[int]
|
|
92
|
+
:param lto: Whether to enable LTO. If unspecified, LTO is enabled by
|
|
93
|
+
default when nvjitlink is available, except for kernels where
|
|
94
|
+
``debug=True``.
|
|
95
|
+
:type lto: bool
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
if link and config.ENABLE_CUDASIM:
|
|
99
|
+
raise NotImplementedError("Cannot link PTX in the simulator")
|
|
100
|
+
|
|
101
|
+
if kws.get("boundscheck"):
|
|
102
|
+
raise NotImplementedError("bounds checking is not supported for CUDA")
|
|
103
|
+
|
|
104
|
+
if kws.get("argtypes") is not None:
|
|
105
|
+
msg = _msg_deprecated_signature_arg.format("argtypes")
|
|
106
|
+
raise DeprecationError(msg)
|
|
107
|
+
if kws.get("restype") is not None:
|
|
108
|
+
msg = _msg_deprecated_signature_arg.format("restype")
|
|
109
|
+
raise DeprecationError(msg)
|
|
110
|
+
if kws.get("bind") is not None:
|
|
111
|
+
msg = _msg_deprecated_signature_arg.format("bind")
|
|
112
|
+
raise DeprecationError(msg)
|
|
113
|
+
|
|
114
|
+
if isinstance(inline, bool):
|
|
115
|
+
DeprecationWarning(
|
|
116
|
+
"Passing bool to inline argument is deprecated, please refer to "
|
|
117
|
+
"Numba's documentation on inlining: "
|
|
118
|
+
"https://numba.readthedocs.io/en/stable/developer/inlining.html. "
|
|
119
|
+
"You may have wanted the forceinline argument instead, to force "
|
|
120
|
+
"inlining at the NVVM IR level."
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
inline = "always" if inline else "never"
|
|
124
|
+
|
|
125
|
+
debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug
|
|
126
|
+
opt = (config.OPT != 0) if opt is None else opt
|
|
127
|
+
fastmath = kws.get("fastmath", False)
|
|
128
|
+
extensions = kws.get("extensions", [])
|
|
129
|
+
|
|
130
|
+
if debug and opt:
|
|
131
|
+
msg = (
|
|
132
|
+
"debug=True with opt=True "
|
|
133
|
+
"is not supported by CUDA. This may result in a crash"
|
|
134
|
+
" - set debug=False or opt=False."
|
|
135
|
+
)
|
|
136
|
+
warn(NumbaInvalidConfigWarning(msg))
|
|
137
|
+
|
|
138
|
+
if debug and lineinfo:
|
|
139
|
+
msg = (
|
|
140
|
+
"debug and lineinfo are mutually exclusive. Use debug to get "
|
|
141
|
+
"full debug info (this disables some optimizations), or "
|
|
142
|
+
"lineinfo for line info only with code generation unaffected."
|
|
143
|
+
)
|
|
144
|
+
warn(NumbaInvalidConfigWarning(msg))
|
|
145
|
+
|
|
146
|
+
if device and kws.get("link"):
|
|
147
|
+
raise ValueError("link keyword invalid for device function")
|
|
148
|
+
|
|
149
|
+
if lto is None:
|
|
150
|
+
# Default to using LTO if nvjitlink is available and we're not debugging
|
|
151
|
+
lto = _have_nvjitlink() and not debug
|
|
152
|
+
else:
|
|
153
|
+
if lto and not _have_nvjitlink():
|
|
154
|
+
raise RuntimeError(
|
|
155
|
+
"LTO requires nvjitlink, which is not available"
|
|
156
|
+
"or not sufficiently recent (>=12.3)"
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
if sigutils.is_signature(func_or_sig):
|
|
160
|
+
signatures = [func_or_sig]
|
|
161
|
+
specialized = True
|
|
162
|
+
elif isinstance(func_or_sig, list):
|
|
163
|
+
signatures = func_or_sig
|
|
164
|
+
specialized = False
|
|
165
|
+
else:
|
|
166
|
+
signatures = None
|
|
167
|
+
|
|
168
|
+
if signatures is not None:
|
|
169
|
+
if config.ENABLE_CUDASIM:
|
|
170
|
+
|
|
171
|
+
def jitwrapper(func):
|
|
172
|
+
return FakeCUDAKernel(func, device=device, fastmath=fastmath)
|
|
173
|
+
|
|
174
|
+
return jitwrapper
|
|
175
|
+
|
|
176
|
+
def _jit(func):
|
|
177
|
+
targetoptions = kws.copy()
|
|
178
|
+
targetoptions["debug"] = debug
|
|
179
|
+
targetoptions["lineinfo"] = lineinfo
|
|
180
|
+
targetoptions["link"] = link
|
|
181
|
+
targetoptions["opt"] = opt
|
|
182
|
+
targetoptions["fastmath"] = fastmath
|
|
183
|
+
targetoptions["device"] = device
|
|
184
|
+
targetoptions["inline"] = inline
|
|
185
|
+
targetoptions["forceinline"] = forceinline
|
|
186
|
+
targetoptions["extensions"] = extensions
|
|
187
|
+
targetoptions["launch_bounds"] = launch_bounds
|
|
188
|
+
targetoptions["lto"] = lto
|
|
189
|
+
|
|
190
|
+
disp = CUDADispatcher(func, targetoptions=targetoptions)
|
|
191
|
+
|
|
192
|
+
if cache:
|
|
193
|
+
disp.enable_caching()
|
|
194
|
+
|
|
195
|
+
for sig in signatures:
|
|
196
|
+
argtypes, restype = sigutils.normalize_signature(sig)
|
|
197
|
+
|
|
198
|
+
if restype and not device and restype != types.void:
|
|
199
|
+
raise TypeError("CUDA kernel must have void return type.")
|
|
200
|
+
|
|
201
|
+
if device:
|
|
202
|
+
from numba.cuda.core import typeinfer
|
|
203
|
+
|
|
204
|
+
with typeinfer.register_dispatcher(disp):
|
|
205
|
+
disp.compile_device(argtypes, restype)
|
|
206
|
+
else:
|
|
207
|
+
disp.compile(argtypes)
|
|
208
|
+
|
|
209
|
+
disp._specialized = specialized
|
|
210
|
+
disp.disable_compile()
|
|
211
|
+
|
|
212
|
+
return disp
|
|
213
|
+
|
|
214
|
+
return _jit
|
|
215
|
+
else:
|
|
216
|
+
if func_or_sig is None:
|
|
217
|
+
if config.ENABLE_CUDASIM:
|
|
218
|
+
|
|
219
|
+
def autojitwrapper(func):
|
|
220
|
+
return FakeCUDAKernel(
|
|
221
|
+
func, device=device, fastmath=fastmath
|
|
222
|
+
)
|
|
223
|
+
else:
|
|
224
|
+
|
|
225
|
+
def autojitwrapper(func):
|
|
226
|
+
return jit(
|
|
227
|
+
func,
|
|
228
|
+
device=device,
|
|
229
|
+
inline=inline,
|
|
230
|
+
forceinline=forceinline,
|
|
231
|
+
debug=debug,
|
|
232
|
+
opt=opt,
|
|
233
|
+
lineinfo=lineinfo,
|
|
234
|
+
link=link,
|
|
235
|
+
cache=cache,
|
|
236
|
+
launch_bounds=launch_bounds,
|
|
237
|
+
**kws,
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
return autojitwrapper
|
|
241
|
+
# func_or_sig is a function
|
|
242
|
+
else:
|
|
243
|
+
if config.ENABLE_CUDASIM:
|
|
244
|
+
return FakeCUDAKernel(
|
|
245
|
+
func_or_sig, device=device, fastmath=fastmath
|
|
246
|
+
)
|
|
247
|
+
else:
|
|
248
|
+
targetoptions = kws.copy()
|
|
249
|
+
targetoptions["debug"] = debug
|
|
250
|
+
targetoptions["lineinfo"] = lineinfo
|
|
251
|
+
targetoptions["opt"] = opt
|
|
252
|
+
targetoptions["link"] = link
|
|
253
|
+
targetoptions["fastmath"] = fastmath
|
|
254
|
+
targetoptions["device"] = device
|
|
255
|
+
targetoptions["inline"] = inline
|
|
256
|
+
targetoptions["forceinline"] = forceinline
|
|
257
|
+
targetoptions["extensions"] = extensions
|
|
258
|
+
targetoptions["launch_bounds"] = launch_bounds
|
|
259
|
+
targetoptions["lto"] = lto
|
|
260
|
+
disp = CUDADispatcher(func_or_sig, targetoptions=targetoptions)
|
|
261
|
+
|
|
262
|
+
if cache:
|
|
263
|
+
disp.enable_caching()
|
|
264
|
+
|
|
265
|
+
return disp
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def declare_device(name, sig, link=None, use_cooperative=False):
|
|
269
|
+
"""
|
|
270
|
+
Declare the signature of a foreign function. Returns a descriptor that can
|
|
271
|
+
be used to call the function from a Python kernel.
|
|
272
|
+
|
|
273
|
+
:param name: The name of the foreign function.
|
|
274
|
+
:type name: str
|
|
275
|
+
:param sig: The Numba signature of the function.
|
|
276
|
+
:param link: External code to link when calling the function.
|
|
277
|
+
:param use_cooperative: External code requires cooperative launch.
|
|
278
|
+
"""
|
|
279
|
+
if link is None:
|
|
280
|
+
link = tuple()
|
|
281
|
+
else:
|
|
282
|
+
if not isinstance(link, (list, tuple, set)):
|
|
283
|
+
link = (link,)
|
|
284
|
+
|
|
285
|
+
argtypes, restype = sigutils.normalize_signature(sig)
|
|
286
|
+
if restype is None:
|
|
287
|
+
msg = "Return type must be provided for device declarations"
|
|
288
|
+
raise TypeError(msg)
|
|
289
|
+
|
|
290
|
+
template = declare_device_function(
|
|
291
|
+
name, restype, argtypes, link, use_cooperative
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
return template.key
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from numba.cuda.core.options import TargetOptions
|
|
5
|
+
from .target import CUDATargetContext, CUDATypingContext
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class CUDATargetOptions(TargetOptions):
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class CUDATarget:
|
|
13
|
+
def __init__(self, name):
|
|
14
|
+
self.options = CUDATargetOptions
|
|
15
|
+
# The typing and target contexts are initialized only when needed -
|
|
16
|
+
# this prevents an attempt to load CUDA libraries at import time on
|
|
17
|
+
# systems that might not have them present.
|
|
18
|
+
self._typingctx = None
|
|
19
|
+
self._targetctx = None
|
|
20
|
+
self._target_name = name
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def typing_context(self):
|
|
24
|
+
if self._typingctx is None:
|
|
25
|
+
self._typingctx = CUDATypingContext()
|
|
26
|
+
return self._typingctx
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def target_context(self):
|
|
30
|
+
if self._targetctx is None:
|
|
31
|
+
self._targetctx = CUDATargetContext(self._typingctx)
|
|
32
|
+
return self._targetctx
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
cuda_target = CUDATarget("cuda")
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
# Re export
|
|
5
|
+
import sys
|
|
6
|
+
from numba.cuda import cg
|
|
7
|
+
from numba.cuda.cache_hints import (
|
|
8
|
+
ldca,
|
|
9
|
+
ldcg,
|
|
10
|
+
ldcs,
|
|
11
|
+
ldlu,
|
|
12
|
+
ldcv,
|
|
13
|
+
stcg,
|
|
14
|
+
stcs,
|
|
15
|
+
stwb,
|
|
16
|
+
stwt,
|
|
17
|
+
)
|
|
18
|
+
from .stubs import (
|
|
19
|
+
threadIdx,
|
|
20
|
+
blockIdx,
|
|
21
|
+
blockDim,
|
|
22
|
+
gridDim,
|
|
23
|
+
laneid,
|
|
24
|
+
warpsize,
|
|
25
|
+
syncwarp,
|
|
26
|
+
shared,
|
|
27
|
+
local,
|
|
28
|
+
const,
|
|
29
|
+
atomic,
|
|
30
|
+
vote_sync_intrinsic,
|
|
31
|
+
match_any_sync,
|
|
32
|
+
match_all_sync,
|
|
33
|
+
threadfence_block,
|
|
34
|
+
threadfence_system,
|
|
35
|
+
threadfence,
|
|
36
|
+
selp,
|
|
37
|
+
popc,
|
|
38
|
+
brev,
|
|
39
|
+
clz,
|
|
40
|
+
ffs,
|
|
41
|
+
fma,
|
|
42
|
+
cbrt,
|
|
43
|
+
activemask,
|
|
44
|
+
lanemask_lt,
|
|
45
|
+
nanosleep,
|
|
46
|
+
_vector_type_stubs,
|
|
47
|
+
)
|
|
48
|
+
from .intrinsics import (
|
|
49
|
+
grid,
|
|
50
|
+
gridsize,
|
|
51
|
+
syncthreads,
|
|
52
|
+
syncthreads_and,
|
|
53
|
+
syncthreads_count,
|
|
54
|
+
syncthreads_or,
|
|
55
|
+
shfl_sync,
|
|
56
|
+
shfl_up_sync,
|
|
57
|
+
shfl_down_sync,
|
|
58
|
+
shfl_xor_sync,
|
|
59
|
+
)
|
|
60
|
+
from .cudadrv.error import CudaSupportError
|
|
61
|
+
from numba.cuda.cudadrv.driver import (
|
|
62
|
+
BaseCUDAMemoryManager,
|
|
63
|
+
HostOnlyCUDAMemoryManager,
|
|
64
|
+
GetIpcHandleMixin,
|
|
65
|
+
MemoryPointer,
|
|
66
|
+
MappedMemory,
|
|
67
|
+
PinnedMemory,
|
|
68
|
+
MemoryInfo,
|
|
69
|
+
IpcHandle,
|
|
70
|
+
set_memory_manager,
|
|
71
|
+
)
|
|
72
|
+
from numba.cuda.cudadrv.runtime import runtime
|
|
73
|
+
from .cudadrv import nvvm
|
|
74
|
+
from numba.cuda import initialize
|
|
75
|
+
from .errors import KernelRuntimeError
|
|
76
|
+
|
|
77
|
+
from .decorators import jit, declare_device
|
|
78
|
+
from .api import *
|
|
79
|
+
from .api import _auto_device
|
|
80
|
+
from .args import In, Out, InOut
|
|
81
|
+
|
|
82
|
+
from .intrinsic_wrapper import (
|
|
83
|
+
all_sync,
|
|
84
|
+
any_sync,
|
|
85
|
+
eq_sync,
|
|
86
|
+
ballot_sync,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
from .kernels import reduction
|
|
90
|
+
from numba.cuda.cudadrv.linkable_code import (
|
|
91
|
+
Archive,
|
|
92
|
+
CUSource,
|
|
93
|
+
Cubin,
|
|
94
|
+
Fatbin,
|
|
95
|
+
LinkableCode,
|
|
96
|
+
LTOIR,
|
|
97
|
+
Object,
|
|
98
|
+
PTXSource,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
from numba.cuda.misc.special import literal_unroll
|
|
102
|
+
from numba.cuda.misc import literal
|
|
103
|
+
|
|
104
|
+
reduce = Reduce = reduction.Reduce
|
|
105
|
+
|
|
106
|
+
# Expose vector type constructors and aliases as module level attributes.
|
|
107
|
+
for vector_type_stub in _vector_type_stubs:
|
|
108
|
+
setattr(sys.modules[__name__], vector_type_stub.__name__, vector_type_stub)
|
|
109
|
+
for alias in vector_type_stub.aliases:
|
|
110
|
+
setattr(sys.modules[__name__], alias, vector_type_stub)
|
|
111
|
+
del vector_type_stub, _vector_type_stubs
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def is_available():
|
|
115
|
+
"""Returns a boolean to indicate the availability of a CUDA GPU.
|
|
116
|
+
|
|
117
|
+
This will initialize the driver if it hasn't been initialized.
|
|
118
|
+
"""
|
|
119
|
+
# whilst `driver.is_available` will init the driver itself,
|
|
120
|
+
# the driver initialization may raise and as a result break
|
|
121
|
+
# test discovery/orchestration as `cuda.is_available` is often
|
|
122
|
+
# used as a guard for whether to run a CUDA test, the try/except
|
|
123
|
+
# below is to handle this case.
|
|
124
|
+
driver_is_available = False
|
|
125
|
+
try:
|
|
126
|
+
driver_is_available = driver.driver.is_available
|
|
127
|
+
except CudaSupportError:
|
|
128
|
+
pass
|
|
129
|
+
|
|
130
|
+
return driver_is_available and nvvm.is_available()
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def is_supported_version():
|
|
134
|
+
"""Returns True if the CUDA Runtime is a supported version.
|
|
135
|
+
|
|
136
|
+
Unsupported versions (e.g. newer versions than those known to Numba)
|
|
137
|
+
may still work; this function provides a facility to check whether the
|
|
138
|
+
current Numba version is tested and known to work with the current
|
|
139
|
+
runtime version. If the current version is unsupported, the caller can
|
|
140
|
+
decide how to act. Options include:
|
|
141
|
+
|
|
142
|
+
- Continuing silently,
|
|
143
|
+
- Emitting a warning,
|
|
144
|
+
- Generating an error or otherwise preventing the use of CUDA.
|
|
145
|
+
"""
|
|
146
|
+
|
|
147
|
+
return runtime.is_supported_version()
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def cuda_error():
|
|
151
|
+
"""Returns None if there was no error initializing the CUDA driver.
|
|
152
|
+
If there was an error initializing the driver, a string describing the
|
|
153
|
+
error is returned.
|
|
154
|
+
"""
|
|
155
|
+
return driver.driver.initialization_error
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
initialize.initialize_all()
|