numba-cuda 0.21.1__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +4 -0
- _numba_cuda_redirector.py +89 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +6 -0
- numba_cuda/_version.py +11 -0
- numba_cuda/numba/cuda/__init__.py +70 -0
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
- numba_cuda/numba/cuda/api.py +577 -0
- numba_cuda/numba/cuda/api_util.py +76 -0
- numba_cuda/numba/cuda/args.py +72 -0
- numba_cuda/numba/cuda/bf16.py +397 -0
- numba_cuda/numba/cuda/cache_hints.py +287 -0
- numba_cuda/numba/cuda/cext/__init__.py +2 -0
- numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
- numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
- numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
- numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
- numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
- numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
- numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
- numba_cuda/numba/cuda/cext/_typeof.h +19 -0
- numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
- numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
- numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
- numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
- numba_cuda/numba/cuda/cg.py +67 -0
- numba_cuda/numba/cuda/cgutils.py +1294 -0
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +541 -0
- numba_cuda/numba/cuda/compiler.py +1396 -0
- numba_cuda/numba/cuda/core/analysis.py +758 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
- numba_cuda/numba/cuda/core/base.py +1332 -0
- numba_cuda/numba/cuda/core/boxing.py +1411 -0
- numba_cuda/numba/cuda/core/bytecode.py +728 -0
- numba_cuda/numba/cuda/core/byteflow.py +2346 -0
- numba_cuda/numba/cuda/core/caching.py +744 -0
- numba_cuda/numba/cuda/core/callconv.py +392 -0
- numba_cuda/numba/cuda/core/codegen.py +171 -0
- numba_cuda/numba/cuda/core/compiler.py +199 -0
- numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +650 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/controlflow.py +989 -0
- numba_cuda/numba/cuda/core/entrypoints.py +57 -0
- numba_cuda/numba/cuda/core/environment.py +66 -0
- numba_cuda/numba/cuda/core/errors.py +917 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/generators.py +387 -0
- numba_cuda/numba/cuda/core/imputils.py +509 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
- numba_cuda/numba/cuda/core/interpreter.py +3617 -0
- numba_cuda/numba/cuda/core/ir.py +1812 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
- numba_cuda/numba/cuda/core/optional.py +129 -0
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
- numba_cuda/numba/cuda/core/registry.py +46 -0
- numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
- numba_cuda/numba/cuda/core/sigutils.py +68 -0
- numba_cuda/numba/cuda/core/ssa.py +498 -0
- numba_cuda/numba/cuda/core/targetconfig.py +330 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +956 -0
- numba_cuda/numba/cuda/core/typed_passes.py +867 -0
- numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
- numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
- numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
- numba_cuda/numba/cuda/cpython/iterators.py +167 -0
- numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
- numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
- numba_cuda/numba/cuda/cpython/slicing.py +322 -0
- numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
- numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
- numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
- numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
- numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
- numba_cuda/numba/cuda/cuda_paths.py +691 -0
- numba_cuda/numba/cuda/cudadecl.py +556 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
- numba_cuda/numba/cuda/cudadrv/error.py +48 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
- numba_cuda/numba/cuda/cudaimpl.py +995 -0
- numba_cuda/numba/cuda/cudamath.py +149 -0
- numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
- numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
- numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
- numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
- numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
- numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
- numba_cuda/numba/cuda/datamodel/manager.py +11 -0
- numba_cuda/numba/cuda/datamodel/models.py +9 -0
- numba_cuda/numba/cuda/datamodel/packer.py +9 -0
- numba_cuda/numba/cuda/datamodel/registry.py +11 -0
- numba_cuda/numba/cuda/datamodel/testing.py +11 -0
- numba_cuda/numba/cuda/debuginfo.py +903 -0
- numba_cuda/numba/cuda/decorators.py +294 -0
- numba_cuda/numba/cuda/descriptor.py +35 -0
- numba_cuda/numba/cuda/device_init.py +158 -0
- numba_cuda/numba/cuda/deviceufunc.py +1021 -0
- numba_cuda/numba/cuda/dispatcher.py +2463 -0
- numba_cuda/numba/cuda/errors.py +72 -0
- numba_cuda/numba/cuda/extending.py +697 -0
- numba_cuda/numba/cuda/flags.py +178 -0
- numba_cuda/numba/cuda/fp16.py +357 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +24 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
- numba_cuda/numba/cuda/intrinsics.py +382 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +265 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3386 -0
- numba_cuda/numba/cuda/libdevicedecl.py +20 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
- numba_cuda/numba/cuda/locks.py +19 -0
- numba_cuda/numba/cuda/lowering.py +1951 -0
- numba_cuda/numba/cuda/mathimpl.py +374 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
- numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
- numba_cuda/numba/cuda/misc/appdirs.py +594 -0
- numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
- numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
- numba_cuda/numba/cuda/misc/dump_style.py +41 -0
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
- numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
- numba_cuda/numba/cuda/misc/literal.py +28 -0
- numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
- numba_cuda/numba/cuda/misc/special.py +94 -0
- numba_cuda/numba/cuda/models.py +56 -0
- numba_cuda/numba/cuda/np/arraymath.py +5130 -0
- numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
- numba_cuda/numba/cuda/np/extensions.py +11 -0
- numba_cuda/numba/cuda/np/linalg.py +3087 -0
- numba_cuda/numba/cuda/np/math/__init__.py +0 -0
- numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
- numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
- numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
- numba_cuda/numba/cuda/np/npdatetime.py +969 -0
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
- numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
- numba_cuda/numba/cuda/np/numpy_support.py +798 -0
- numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
- numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
- numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
- numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
- numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
- numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
- numba_cuda/numba/cuda/nvvmutils.py +254 -0
- numba_cuda/numba/cuda/printimpl.py +126 -0
- numba_cuda/numba/cuda/random.py +308 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
- numba_cuda/numba/cuda/serialize.py +267 -0
- numba_cuda/numba/cuda/simulator/__init__.py +63 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +179 -0
- numba_cuda/numba/cuda/simulator/bf16.py +4 -0
- numba_cuda/numba/cuda/simulator/compiler.py +38 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
- numba_cuda/numba/cuda/simulator/kernel.py +320 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
- numba_cuda/numba/cuda/simulator/reduction.py +19 -0
- numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
- numba_cuda/numba/cuda/simulator_init.py +18 -0
- numba_cuda/numba/cuda/stubs.py +635 -0
- numba_cuda/numba/cuda/target.py +505 -0
- numba_cuda/numba/cuda/testing.py +347 -0
- numba_cuda/numba/cuda/tests/__init__.py +62 -0
- numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
- numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
- numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
- numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
- numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
- numba_cuda/numba/cuda/tests/data/error.cu +12 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
- numba_cuda/numba/cuda/tests/support.py +900 -0
- numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
- numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
- numba_cuda/numba/cuda/typeconv/rules.py +63 -0
- numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
- numba_cuda/numba/cuda/types/__init__.py +233 -0
- numba_cuda/numba/cuda/types/__init__.pyi +167 -0
- numba_cuda/numba/cuda/types/abstract.py +9 -0
- numba_cuda/numba/cuda/types/common.py +9 -0
- numba_cuda/numba/cuda/types/containers.py +9 -0
- numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
- numba_cuda/numba/cuda/types/cuda_common.py +110 -0
- numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
- numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
- numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
- numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
- numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
- numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
- numba_cuda/numba/cuda/types/ext_types.py +101 -0
- numba_cuda/numba/cuda/types/function_type.py +11 -0
- numba_cuda/numba/cuda/types/functions.py +9 -0
- numba_cuda/numba/cuda/types/iterators.py +9 -0
- numba_cuda/numba/cuda/types/misc.py +9 -0
- numba_cuda/numba/cuda/types/npytypes.py +9 -0
- numba_cuda/numba/cuda/types/scalars.py +9 -0
- numba_cuda/numba/cuda/typing/__init__.py +19 -0
- numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
- numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
- numba_cuda/numba/cuda/typing/bufproto.py +70 -0
- numba_cuda/numba/cuda/typing/builtins.py +1209 -0
- numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
- numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
- numba_cuda/numba/cuda/typing/collections.py +138 -0
- numba_cuda/numba/cuda/typing/context.py +782 -0
- numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
- numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/listdecl.py +147 -0
- numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
- numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
- numba_cuda/numba/cuda/typing/npydecl.py +749 -0
- numba_cuda/numba/cuda/typing/setdecl.py +115 -0
- numba_cuda/numba/cuda/typing/templates.py +1446 -0
- numba_cuda/numba/cuda/typing/typeof.py +301 -0
- numba_cuda/numba/cuda/ufuncs.py +746 -0
- numba_cuda/numba/cuda/utils.py +724 -0
- numba_cuda/numba/cuda/vector_types.py +214 -0
- numba_cuda/numba/cuda/vectorizers.py +260 -0
- numba_cuda-0.21.1.dist-info/METADATA +109 -0
- numba_cuda-0.21.1.dist-info/RECORD +488 -0
- numba_cuda-0.21.1.dist-info/WHEEL +5 -0
- numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
- numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
// SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
#include <cstring>
|
|
5
|
+
#include <cstdio>
|
|
6
|
+
#include <algorithm>
|
|
7
|
+
#include <limits.h>
|
|
8
|
+
|
|
9
|
+
#include "typeconv.hpp"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
// ------ TypeManager ------
|
|
13
|
+
|
|
14
|
+
TCCMap::TCCMap()
|
|
15
|
+
: nb_records(0)
|
|
16
|
+
{
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
size_t TCCMap::hash(const TypePair &key) const {
|
|
20
|
+
return std::hash<size_t>()(std::hash<Type>()(key.first)) ^
|
|
21
|
+
std::hash<Type>()(key.second);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
void TCCMap::insert(const TypePair &key, TypeCompatibleCode val) {
|
|
25
|
+
size_t i = hash(key) & (TCCMAP_SIZE - 1);
|
|
26
|
+
TCCMapBin &bin = records[i];
|
|
27
|
+
for (unsigned int j = 0; j < bin.size(); ++j) {
|
|
28
|
+
if (bin[j].key == key) {
|
|
29
|
+
bin[j].val = val;
|
|
30
|
+
return;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
bin.push_back({key, val});
|
|
34
|
+
nb_records++;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
TypeCompatibleCode TCCMap::find(const TypePair &key) const {
|
|
38
|
+
size_t i = hash(key) & (TCCMAP_SIZE - 1);
|
|
39
|
+
const TCCMapBin &bin = records[i];
|
|
40
|
+
for (unsigned int j = 0; j < bin.size(); ++j) {
|
|
41
|
+
if (bin[j].key == key) {
|
|
42
|
+
return bin[j].val;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
return TCC_FALSE;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// ----- Ratings -----
|
|
49
|
+
Rating::Rating() : promote(0), safe_convert(0), unsafe_convert(0) { }
|
|
50
|
+
|
|
51
|
+
inline bool Rating::operator < (const Rating &other) const {
|
|
52
|
+
if (unsafe_convert < other.unsafe_convert)
|
|
53
|
+
return true;
|
|
54
|
+
else if (unsafe_convert > other.unsafe_convert)
|
|
55
|
+
return false;
|
|
56
|
+
if (safe_convert < other.safe_convert)
|
|
57
|
+
return true;
|
|
58
|
+
else if (safe_convert > other.safe_convert)
|
|
59
|
+
return false;
|
|
60
|
+
return (promote < other.promote);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
inline bool Rating::operator == (const Rating &other) const {
|
|
64
|
+
return promote == other.promote && safe_convert == other.safe_convert &&
|
|
65
|
+
unsafe_convert == other.unsafe_convert;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// ------ TypeManager ------
|
|
69
|
+
|
|
70
|
+
bool TypeManager::canPromote(Type from, Type to) const {
|
|
71
|
+
return isCompatible(from, to) == TCC_PROMOTE;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
bool TypeManager::canSafeConvert(Type from, Type to) const {
|
|
75
|
+
return isCompatible(from, to) == TCC_CONVERT_SAFE;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
bool TypeManager::canUnsafeConvert(Type from, Type to) const {
|
|
79
|
+
return isCompatible(from, to) == TCC_CONVERT_UNSAFE;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
void TypeManager::addPromotion(Type from, Type to) {
|
|
83
|
+
return addCompatibility(from, to, TCC_PROMOTE);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
void TypeManager::addUnsafeConversion(Type from, Type to) {
|
|
87
|
+
return addCompatibility(from, to, TCC_CONVERT_UNSAFE);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
void TypeManager::addSafeConversion(Type from, Type to) {
|
|
91
|
+
return addCompatibility(from, to, TCC_CONVERT_SAFE);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
void TypeManager::addCompatibility(Type from, Type to, TypeCompatibleCode tcc) {
|
|
95
|
+
TypePair pair(from, to);
|
|
96
|
+
tccmap.insert(pair, tcc);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
TypeCompatibleCode TypeManager::isCompatible(Type from, Type to) const {
|
|
100
|
+
if (from == to)
|
|
101
|
+
return TCC_EXACT;
|
|
102
|
+
TypePair pair(from, to);
|
|
103
|
+
return tccmap.find(pair);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
int TypeManager::selectOverload(const Type sig[], const Type ovsigs[],
|
|
108
|
+
int &selected,
|
|
109
|
+
int sigsz, int ovct, bool allow_unsafe,
|
|
110
|
+
bool exact_match_required
|
|
111
|
+
) const {
|
|
112
|
+
int count;
|
|
113
|
+
if (ovct <= 16) {
|
|
114
|
+
Rating ratings[16];
|
|
115
|
+
int candidates[16];
|
|
116
|
+
count = _selectOverload(sig, ovsigs, selected, sigsz, ovct,
|
|
117
|
+
allow_unsafe, exact_match_required, ratings,
|
|
118
|
+
candidates);
|
|
119
|
+
}
|
|
120
|
+
else {
|
|
121
|
+
Rating *ratings = new Rating[ovct];
|
|
122
|
+
int *candidates = new int[ovct];
|
|
123
|
+
count = _selectOverload(sig, ovsigs, selected, sigsz, ovct,
|
|
124
|
+
allow_unsafe, exact_match_required, ratings,
|
|
125
|
+
candidates);
|
|
126
|
+
delete [] ratings;
|
|
127
|
+
delete [] candidates;
|
|
128
|
+
}
|
|
129
|
+
return count;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
int TypeManager::_selectOverload(const Type sig[], const Type ovsigs[],
|
|
133
|
+
int &selected, int sigsz, int ovct,
|
|
134
|
+
bool allow_unsafe, bool exact_match_required,
|
|
135
|
+
Rating ratings[], int candidates[]) const {
|
|
136
|
+
// Generate rating table
|
|
137
|
+
// Use a penalize scheme.
|
|
138
|
+
int nb_candidates = 0;
|
|
139
|
+
|
|
140
|
+
for (int i = 0; i < ovct; ++i) {
|
|
141
|
+
const Type *entry = &ovsigs[i * sigsz];
|
|
142
|
+
Rating rate;
|
|
143
|
+
|
|
144
|
+
for (int j = 0; j < sigsz; ++j) {
|
|
145
|
+
TypeCompatibleCode tcc = isCompatible(sig[j], entry[j]);
|
|
146
|
+
if (tcc == TCC_FALSE ||
|
|
147
|
+
(tcc == TCC_CONVERT_UNSAFE && !allow_unsafe) ||
|
|
148
|
+
(tcc != TCC_EXACT && exact_match_required)) {
|
|
149
|
+
// stop the loop early
|
|
150
|
+
goto _incompatible;
|
|
151
|
+
}
|
|
152
|
+
switch(tcc) {
|
|
153
|
+
case TCC_PROMOTE:
|
|
154
|
+
rate.promote += 1;
|
|
155
|
+
break;
|
|
156
|
+
case TCC_CONVERT_SAFE:
|
|
157
|
+
rate.safe_convert += 1;
|
|
158
|
+
break;
|
|
159
|
+
case TCC_CONVERT_UNSAFE:
|
|
160
|
+
rate.unsafe_convert += 1;
|
|
161
|
+
break;
|
|
162
|
+
default:
|
|
163
|
+
break;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
ratings[nb_candidates] = rate;
|
|
167
|
+
candidates[nb_candidates] = i;
|
|
168
|
+
nb_candidates++;
|
|
169
|
+
_incompatible:
|
|
170
|
+
;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Bail if no match
|
|
174
|
+
if (nb_candidates == 0)
|
|
175
|
+
return 0;
|
|
176
|
+
|
|
177
|
+
// Find lowest rating
|
|
178
|
+
Rating best = ratings[0];
|
|
179
|
+
selected = candidates[0];
|
|
180
|
+
|
|
181
|
+
int matchcount = 1;
|
|
182
|
+
for (int i = 1; i < nb_candidates; ++i) {
|
|
183
|
+
if (ratings[i] < best) {
|
|
184
|
+
best = ratings[i];
|
|
185
|
+
selected = candidates[i];
|
|
186
|
+
matchcount = 1;
|
|
187
|
+
}
|
|
188
|
+
else if (ratings[i] == best) {
|
|
189
|
+
matchcount += 1;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
return matchcount;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// ----- utils -----
|
|
196
|
+
|
|
197
|
+
const char* TCCString(TypeCompatibleCode tcc) {
|
|
198
|
+
switch(tcc) {
|
|
199
|
+
case TCC_EXACT:
|
|
200
|
+
return "exact";
|
|
201
|
+
case TCC_SUBTYPE:
|
|
202
|
+
return "subtype";
|
|
203
|
+
case TCC_PROMOTE:
|
|
204
|
+
return "promote";
|
|
205
|
+
case TCC_CONVERT_SAFE:
|
|
206
|
+
return "safe_convert";
|
|
207
|
+
case TCC_CONVERT_UNSAFE:
|
|
208
|
+
return "unsafe_convert";
|
|
209
|
+
default:
|
|
210
|
+
return "false";
|
|
211
|
+
}
|
|
212
|
+
}
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
// SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
#ifndef NUMBA_TYPECONV_HPP_
|
|
5
|
+
#define NUMBA_TYPECONV_HPP_
|
|
6
|
+
#include <string>
|
|
7
|
+
#include <vector>
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
typedef int Type;
|
|
11
|
+
|
|
12
|
+
enum TypeCompatibleCode{
|
|
13
|
+
// No match
|
|
14
|
+
TCC_FALSE = 0,
|
|
15
|
+
// Exact match
|
|
16
|
+
TCC_EXACT,
|
|
17
|
+
// Subtype is UNUSED
|
|
18
|
+
TCC_SUBTYPE,
|
|
19
|
+
// Promotion with no precision loss
|
|
20
|
+
TCC_PROMOTE,
|
|
21
|
+
// Conversion with no precision loss
|
|
22
|
+
// e.g. int32 to double
|
|
23
|
+
TCC_CONVERT_SAFE,
|
|
24
|
+
// Conversion with precision loss
|
|
25
|
+
// e.g. int64 to double (53 bits precision)
|
|
26
|
+
TCC_CONVERT_UNSAFE,
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
typedef std::pair<Type, Type> TypePair;
|
|
30
|
+
|
|
31
|
+
struct TCCRecord {
|
|
32
|
+
TypePair key;
|
|
33
|
+
TypeCompatibleCode val;
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
typedef std::vector<TCCRecord> TCCMapBin;
|
|
37
|
+
|
|
38
|
+
class TCCMap {
|
|
39
|
+
public:
|
|
40
|
+
TCCMap();
|
|
41
|
+
|
|
42
|
+
void insert(const TypePair &key, TypeCompatibleCode val);
|
|
43
|
+
TypeCompatibleCode find(const TypePair &key) const;
|
|
44
|
+
private:
|
|
45
|
+
size_t hash(const TypePair &key) const;
|
|
46
|
+
|
|
47
|
+
/* Must be a power of two */
|
|
48
|
+
static const size_t TCCMAP_SIZE = 512;
|
|
49
|
+
TCCMapBin records[TCCMAP_SIZE];
|
|
50
|
+
int nb_records;
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
struct Rating {
|
|
54
|
+
unsigned int promote;
|
|
55
|
+
unsigned int safe_convert;
|
|
56
|
+
unsigned int unsafe_convert;
|
|
57
|
+
|
|
58
|
+
Rating();
|
|
59
|
+
|
|
60
|
+
bool operator < (const Rating &other) const;
|
|
61
|
+
bool operator == (const Rating &other) const;
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class TypeManager{
|
|
66
|
+
public:
|
|
67
|
+
bool canPromote(Type from, Type to) const;
|
|
68
|
+
bool canUnsafeConvert(Type from, Type to) const;
|
|
69
|
+
bool canSafeConvert(Type from, Type to) const;
|
|
70
|
+
|
|
71
|
+
void addPromotion(Type from, Type to);
|
|
72
|
+
void addUnsafeConversion(Type from, Type to);
|
|
73
|
+
void addSafeConversion(Type from, Type to);
|
|
74
|
+
void addCompatibility(Type from, Type to, TypeCompatibleCode by);
|
|
75
|
+
|
|
76
|
+
TypeCompatibleCode isCompatible(Type from, Type to) const;
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
Output stored in selected.
|
|
80
|
+
Returns
|
|
81
|
+
Number of matches
|
|
82
|
+
*/
|
|
83
|
+
int selectOverload(const Type sig[], const Type ovsigs[], int &selected,
|
|
84
|
+
int sigsz, int ovct, bool allow_unsafe,
|
|
85
|
+
bool exact_match_required
|
|
86
|
+
) const;
|
|
87
|
+
|
|
88
|
+
private:
|
|
89
|
+
int _selectOverload(const Type sig[], const Type ovsigs[], int &selected,
|
|
90
|
+
int sigsz, int ovct, bool allow_unsafe,
|
|
91
|
+
bool exact_match_required,
|
|
92
|
+
Rating ratings[], int candidates[]) const;
|
|
93
|
+
|
|
94
|
+
TCCMap tccmap;
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
const char* TCCString(TypeCompatibleCode tcc);
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
#endif // NUMBA_TYPECONV_HPP_
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from numba.cuda import types
|
|
5
|
+
from numba.cuda.extending import overload, overload_method
|
|
6
|
+
from numba.cuda.typing import signature
|
|
7
|
+
from numba.cuda import nvvmutils
|
|
8
|
+
from numba.cuda.extending import intrinsic
|
|
9
|
+
from numba.cuda.types.ext_types import grid_group, GridGroup as GridGroupClass
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class GridGroup:
|
|
13
|
+
"""A cooperative group representing the entire grid"""
|
|
14
|
+
|
|
15
|
+
def sync() -> None:
|
|
16
|
+
"""Synchronize this grid group"""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def this_grid() -> GridGroup:
|
|
20
|
+
"""Get the current grid group."""
|
|
21
|
+
return GridGroup()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@intrinsic
|
|
25
|
+
def _this_grid(typingctx):
|
|
26
|
+
sig = signature(grid_group)
|
|
27
|
+
|
|
28
|
+
def codegen(context, builder, sig, args):
|
|
29
|
+
context.active_code_library.use_cooperative = True
|
|
30
|
+
one = context.get_constant(types.int32, 1)
|
|
31
|
+
mod = builder.module
|
|
32
|
+
return builder.call(
|
|
33
|
+
nvvmutils.declare_cudaCGGetIntrinsicHandle(mod), (one,)
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
return sig, codegen
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@overload(this_grid, target="cuda")
|
|
40
|
+
def _ol_this_grid():
|
|
41
|
+
def impl():
|
|
42
|
+
return _this_grid()
|
|
43
|
+
|
|
44
|
+
return impl
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@intrinsic
|
|
48
|
+
def _grid_group_sync(typingctx, group):
|
|
49
|
+
sig = signature(types.int32, group)
|
|
50
|
+
|
|
51
|
+
def codegen(context, builder, sig, args):
|
|
52
|
+
context.active_code_library.use_cooperative = True
|
|
53
|
+
flags = context.get_constant(types.int32, 0)
|
|
54
|
+
mod = builder.module
|
|
55
|
+
return builder.call(
|
|
56
|
+
nvvmutils.declare_cudaCGSynchronize(mod), (*args, flags)
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
return sig, codegen
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@overload_method(GridGroupClass, "sync", target="cuda")
|
|
63
|
+
def _ol_grid_group_sync(group):
|
|
64
|
+
def impl(group):
|
|
65
|
+
return _grid_group_sync(group)
|
|
66
|
+
|
|
67
|
+
return impl
|