numba-cuda 0.21.1__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +4 -0
- _numba_cuda_redirector.py +89 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +6 -0
- numba_cuda/_version.py +11 -0
- numba_cuda/numba/cuda/__init__.py +70 -0
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
- numba_cuda/numba/cuda/api.py +577 -0
- numba_cuda/numba/cuda/api_util.py +76 -0
- numba_cuda/numba/cuda/args.py +72 -0
- numba_cuda/numba/cuda/bf16.py +397 -0
- numba_cuda/numba/cuda/cache_hints.py +287 -0
- numba_cuda/numba/cuda/cext/__init__.py +2 -0
- numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
- numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
- numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
- numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
- numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
- numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
- numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
- numba_cuda/numba/cuda/cext/_typeof.h +19 -0
- numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
- numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
- numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
- numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
- numba_cuda/numba/cuda/cg.py +67 -0
- numba_cuda/numba/cuda/cgutils.py +1294 -0
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +541 -0
- numba_cuda/numba/cuda/compiler.py +1396 -0
- numba_cuda/numba/cuda/core/analysis.py +758 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
- numba_cuda/numba/cuda/core/base.py +1332 -0
- numba_cuda/numba/cuda/core/boxing.py +1411 -0
- numba_cuda/numba/cuda/core/bytecode.py +728 -0
- numba_cuda/numba/cuda/core/byteflow.py +2346 -0
- numba_cuda/numba/cuda/core/caching.py +744 -0
- numba_cuda/numba/cuda/core/callconv.py +392 -0
- numba_cuda/numba/cuda/core/codegen.py +171 -0
- numba_cuda/numba/cuda/core/compiler.py +199 -0
- numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +650 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/controlflow.py +989 -0
- numba_cuda/numba/cuda/core/entrypoints.py +57 -0
- numba_cuda/numba/cuda/core/environment.py +66 -0
- numba_cuda/numba/cuda/core/errors.py +917 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/generators.py +387 -0
- numba_cuda/numba/cuda/core/imputils.py +509 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
- numba_cuda/numba/cuda/core/interpreter.py +3617 -0
- numba_cuda/numba/cuda/core/ir.py +1812 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
- numba_cuda/numba/cuda/core/optional.py +129 -0
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
- numba_cuda/numba/cuda/core/registry.py +46 -0
- numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
- numba_cuda/numba/cuda/core/sigutils.py +68 -0
- numba_cuda/numba/cuda/core/ssa.py +498 -0
- numba_cuda/numba/cuda/core/targetconfig.py +330 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +956 -0
- numba_cuda/numba/cuda/core/typed_passes.py +867 -0
- numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
- numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
- numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
- numba_cuda/numba/cuda/cpython/iterators.py +167 -0
- numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
- numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
- numba_cuda/numba/cuda/cpython/slicing.py +322 -0
- numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
- numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
- numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
- numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
- numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
- numba_cuda/numba/cuda/cuda_paths.py +691 -0
- numba_cuda/numba/cuda/cudadecl.py +556 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
- numba_cuda/numba/cuda/cudadrv/error.py +48 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
- numba_cuda/numba/cuda/cudaimpl.py +995 -0
- numba_cuda/numba/cuda/cudamath.py +149 -0
- numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
- numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
- numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
- numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
- numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
- numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
- numba_cuda/numba/cuda/datamodel/manager.py +11 -0
- numba_cuda/numba/cuda/datamodel/models.py +9 -0
- numba_cuda/numba/cuda/datamodel/packer.py +9 -0
- numba_cuda/numba/cuda/datamodel/registry.py +11 -0
- numba_cuda/numba/cuda/datamodel/testing.py +11 -0
- numba_cuda/numba/cuda/debuginfo.py +903 -0
- numba_cuda/numba/cuda/decorators.py +294 -0
- numba_cuda/numba/cuda/descriptor.py +35 -0
- numba_cuda/numba/cuda/device_init.py +158 -0
- numba_cuda/numba/cuda/deviceufunc.py +1021 -0
- numba_cuda/numba/cuda/dispatcher.py +2463 -0
- numba_cuda/numba/cuda/errors.py +72 -0
- numba_cuda/numba/cuda/extending.py +697 -0
- numba_cuda/numba/cuda/flags.py +178 -0
- numba_cuda/numba/cuda/fp16.py +357 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +24 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
- numba_cuda/numba/cuda/intrinsics.py +382 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +265 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3386 -0
- numba_cuda/numba/cuda/libdevicedecl.py +20 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
- numba_cuda/numba/cuda/locks.py +19 -0
- numba_cuda/numba/cuda/lowering.py +1951 -0
- numba_cuda/numba/cuda/mathimpl.py +374 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
- numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
- numba_cuda/numba/cuda/misc/appdirs.py +594 -0
- numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
- numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
- numba_cuda/numba/cuda/misc/dump_style.py +41 -0
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
- numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
- numba_cuda/numba/cuda/misc/literal.py +28 -0
- numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
- numba_cuda/numba/cuda/misc/special.py +94 -0
- numba_cuda/numba/cuda/models.py +56 -0
- numba_cuda/numba/cuda/np/arraymath.py +5130 -0
- numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
- numba_cuda/numba/cuda/np/extensions.py +11 -0
- numba_cuda/numba/cuda/np/linalg.py +3087 -0
- numba_cuda/numba/cuda/np/math/__init__.py +0 -0
- numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
- numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
- numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
- numba_cuda/numba/cuda/np/npdatetime.py +969 -0
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
- numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
- numba_cuda/numba/cuda/np/numpy_support.py +798 -0
- numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
- numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
- numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
- numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
- numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
- numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
- numba_cuda/numba/cuda/nvvmutils.py +254 -0
- numba_cuda/numba/cuda/printimpl.py +126 -0
- numba_cuda/numba/cuda/random.py +308 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
- numba_cuda/numba/cuda/serialize.py +267 -0
- numba_cuda/numba/cuda/simulator/__init__.py +63 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +179 -0
- numba_cuda/numba/cuda/simulator/bf16.py +4 -0
- numba_cuda/numba/cuda/simulator/compiler.py +38 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
- numba_cuda/numba/cuda/simulator/kernel.py +320 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
- numba_cuda/numba/cuda/simulator/reduction.py +19 -0
- numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
- numba_cuda/numba/cuda/simulator_init.py +18 -0
- numba_cuda/numba/cuda/stubs.py +635 -0
- numba_cuda/numba/cuda/target.py +505 -0
- numba_cuda/numba/cuda/testing.py +347 -0
- numba_cuda/numba/cuda/tests/__init__.py +62 -0
- numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
- numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
- numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
- numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
- numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
- numba_cuda/numba/cuda/tests/data/error.cu +12 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
- numba_cuda/numba/cuda/tests/support.py +900 -0
- numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
- numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
- numba_cuda/numba/cuda/typeconv/rules.py +63 -0
- numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
- numba_cuda/numba/cuda/types/__init__.py +233 -0
- numba_cuda/numba/cuda/types/__init__.pyi +167 -0
- numba_cuda/numba/cuda/types/abstract.py +9 -0
- numba_cuda/numba/cuda/types/common.py +9 -0
- numba_cuda/numba/cuda/types/containers.py +9 -0
- numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
- numba_cuda/numba/cuda/types/cuda_common.py +110 -0
- numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
- numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
- numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
- numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
- numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
- numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
- numba_cuda/numba/cuda/types/ext_types.py +101 -0
- numba_cuda/numba/cuda/types/function_type.py +11 -0
- numba_cuda/numba/cuda/types/functions.py +9 -0
- numba_cuda/numba/cuda/types/iterators.py +9 -0
- numba_cuda/numba/cuda/types/misc.py +9 -0
- numba_cuda/numba/cuda/types/npytypes.py +9 -0
- numba_cuda/numba/cuda/types/scalars.py +9 -0
- numba_cuda/numba/cuda/typing/__init__.py +19 -0
- numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
- numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
- numba_cuda/numba/cuda/typing/bufproto.py +70 -0
- numba_cuda/numba/cuda/typing/builtins.py +1209 -0
- numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
- numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
- numba_cuda/numba/cuda/typing/collections.py +138 -0
- numba_cuda/numba/cuda/typing/context.py +782 -0
- numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
- numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/listdecl.py +147 -0
- numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
- numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
- numba_cuda/numba/cuda/typing/npydecl.py +749 -0
- numba_cuda/numba/cuda/typing/setdecl.py +115 -0
- numba_cuda/numba/cuda/typing/templates.py +1446 -0
- numba_cuda/numba/cuda/typing/typeof.py +301 -0
- numba_cuda/numba/cuda/ufuncs.py +746 -0
- numba_cuda/numba/cuda/utils.py +724 -0
- numba_cuda/numba/cuda/vector_types.py +214 -0
- numba_cuda/numba/cuda/vectorizers.py +260 -0
- numba_cuda-0.21.1.dist-info/METADATA +109 -0
- numba_cuda-0.21.1.dist-info/RECORD +488 -0
- numba_cuda-0.21.1.dist-info/WHEEL +5 -0
- numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
- numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,683 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import multiprocessing
|
|
5
|
+
import os
|
|
6
|
+
import shutil
|
|
7
|
+
import unittest
|
|
8
|
+
import warnings
|
|
9
|
+
import sys
|
|
10
|
+
import stat
|
|
11
|
+
import subprocess
|
|
12
|
+
|
|
13
|
+
from numba import cuda
|
|
14
|
+
from numba.cuda.core.errors import NumbaWarning
|
|
15
|
+
from numba.cuda.testing import (
|
|
16
|
+
CUDATestCase,
|
|
17
|
+
skip_on_cudasim,
|
|
18
|
+
skip_unless_cc_60,
|
|
19
|
+
skip_if_cudadevrt_missing,
|
|
20
|
+
test_data_dir,
|
|
21
|
+
skip_on_standalone_numba_cuda,
|
|
22
|
+
)
|
|
23
|
+
from numba.cuda.tests.support import (
|
|
24
|
+
TestCase,
|
|
25
|
+
temp_directory,
|
|
26
|
+
import_dynamic,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class BaseCacheTest(TestCase):
|
|
31
|
+
# The source file that will be copied
|
|
32
|
+
usecases_file = None
|
|
33
|
+
# Make sure this doesn't conflict with another module
|
|
34
|
+
modname = None
|
|
35
|
+
|
|
36
|
+
def setUp(self):
|
|
37
|
+
self.tempdir = temp_directory("test_cache")
|
|
38
|
+
sys.path.insert(0, self.tempdir)
|
|
39
|
+
self.modfile = os.path.join(self.tempdir, self.modname + ".py")
|
|
40
|
+
self.cache_dir = os.path.join(self.tempdir, "__pycache__")
|
|
41
|
+
shutil.copy(self.usecases_file, self.modfile)
|
|
42
|
+
os.chmod(self.modfile, stat.S_IREAD | stat.S_IWRITE)
|
|
43
|
+
self.maxDiff = None
|
|
44
|
+
|
|
45
|
+
def tearDown(self):
|
|
46
|
+
sys.modules.pop(self.modname, None)
|
|
47
|
+
sys.path.remove(self.tempdir)
|
|
48
|
+
|
|
49
|
+
def import_module(self):
|
|
50
|
+
# Import a fresh version of the test module. All jitted functions
|
|
51
|
+
# in the test module will start anew and load overloads from
|
|
52
|
+
# the on-disk cache if possible.
|
|
53
|
+
old = sys.modules.pop(self.modname, None)
|
|
54
|
+
if old is not None:
|
|
55
|
+
# Make sure cached bytecode is removed
|
|
56
|
+
cached = [old.__cached__]
|
|
57
|
+
for fn in cached:
|
|
58
|
+
try:
|
|
59
|
+
os.unlink(fn)
|
|
60
|
+
except FileNotFoundError:
|
|
61
|
+
pass
|
|
62
|
+
mod = import_dynamic(self.modname)
|
|
63
|
+
self.assertEqual(mod.__file__.rstrip("co"), self.modfile)
|
|
64
|
+
return mod
|
|
65
|
+
|
|
66
|
+
def cache_contents(self):
|
|
67
|
+
try:
|
|
68
|
+
return [
|
|
69
|
+
fn
|
|
70
|
+
for fn in os.listdir(self.cache_dir)
|
|
71
|
+
if not fn.endswith((".pyc", ".pyo"))
|
|
72
|
+
]
|
|
73
|
+
except FileNotFoundError:
|
|
74
|
+
return []
|
|
75
|
+
|
|
76
|
+
def get_cache_mtimes(self):
|
|
77
|
+
return dict(
|
|
78
|
+
(fn, os.path.getmtime(os.path.join(self.cache_dir, fn)))
|
|
79
|
+
for fn in sorted(self.cache_contents())
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
def check_pycache(self, n):
|
|
83
|
+
c = self.cache_contents()
|
|
84
|
+
self.assertEqual(len(c), n, c)
|
|
85
|
+
|
|
86
|
+
def dummy_test(self):
|
|
87
|
+
pass
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class DispatcherCacheUsecasesTest(BaseCacheTest):
|
|
91
|
+
here = os.path.dirname(__file__)
|
|
92
|
+
usecases_file = os.path.join(here, "cache_usecases.py")
|
|
93
|
+
modname = "dispatcher_caching_test_fodder"
|
|
94
|
+
|
|
95
|
+
def run_in_separate_process(self, *, envvars={}):
|
|
96
|
+
# Cached functions can be run from a distinct process.
|
|
97
|
+
# Also stresses issue #1603: uncached function calling cached function
|
|
98
|
+
# shouldn't fail compiling.
|
|
99
|
+
code = """if 1:
|
|
100
|
+
import sys
|
|
101
|
+
|
|
102
|
+
sys.path.insert(0, %(tempdir)r)
|
|
103
|
+
mod = __import__(%(modname)r)
|
|
104
|
+
mod.self_test()
|
|
105
|
+
""" % dict(tempdir=self.tempdir, modname=self.modname)
|
|
106
|
+
|
|
107
|
+
subp_env = os.environ.copy()
|
|
108
|
+
subp_env.update(envvars)
|
|
109
|
+
popen = subprocess.Popen(
|
|
110
|
+
[sys.executable, "-c", code],
|
|
111
|
+
stdout=subprocess.PIPE,
|
|
112
|
+
stderr=subprocess.PIPE,
|
|
113
|
+
env=subp_env,
|
|
114
|
+
)
|
|
115
|
+
out, err = popen.communicate()
|
|
116
|
+
if popen.returncode != 0:
|
|
117
|
+
raise AssertionError(
|
|
118
|
+
"process failed with code %s: \n"
|
|
119
|
+
"stdout follows\n%s\n"
|
|
120
|
+
"stderr follows\n%s\n"
|
|
121
|
+
% (popen.returncode, out.decode(), err.decode()),
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
def check_hits(self, func, hits, misses=None):
|
|
125
|
+
st = func.stats
|
|
126
|
+
self.assertEqual(sum(st.cache_hits.values()), hits, st.cache_hits)
|
|
127
|
+
if misses is not None:
|
|
128
|
+
self.assertEqual(
|
|
129
|
+
sum(st.cache_misses.values()), misses, st.cache_misses
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def check_access_is_preventable():
|
|
134
|
+
# This exists to check whether it is possible to prevent access to
|
|
135
|
+
# a file/directory through the use of `chmod 500`. If a user has
|
|
136
|
+
# elevated rights (e.g. root) then writes are likely to be possible
|
|
137
|
+
# anyway. Tests that require functioning access prevention are
|
|
138
|
+
# therefore skipped based on the result of this check.
|
|
139
|
+
tempdir = temp_directory("test_cache")
|
|
140
|
+
test_dir = os.path.join(tempdir, "writable_test")
|
|
141
|
+
os.mkdir(test_dir)
|
|
142
|
+
# check a write is possible
|
|
143
|
+
with open(os.path.join(test_dir, "write_ok"), "wt") as f:
|
|
144
|
+
f.write("check1")
|
|
145
|
+
# now forbid access
|
|
146
|
+
os.chmod(test_dir, 0o500)
|
|
147
|
+
try:
|
|
148
|
+
with open(os.path.join(test_dir, "write_forbidden"), "wt") as f:
|
|
149
|
+
f.write("check2")
|
|
150
|
+
# access prevention is not possible
|
|
151
|
+
return False
|
|
152
|
+
except PermissionError:
|
|
153
|
+
# Check that the cause of the exception is due to access/permission
|
|
154
|
+
# as per
|
|
155
|
+
# https://github.com/conda/conda/blob/4.5.0/conda/gateways/disk/permissions.py#L35-L37 # noqa: E501
|
|
156
|
+
# errno reports access/perm fail so access prevention via
|
|
157
|
+
# `chmod 500` works for this user.
|
|
158
|
+
return True
|
|
159
|
+
finally:
|
|
160
|
+
os.chmod(test_dir, 0o775)
|
|
161
|
+
shutil.rmtree(test_dir)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
_access_preventable = check_access_is_preventable()
|
|
165
|
+
_access_msg = "Cannot create a directory to which writes are preventable"
|
|
166
|
+
skip_bad_access = unittest.skipUnless(_access_preventable, _access_msg)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
@skip_on_cudasim("Simulator does not implement caching")
|
|
170
|
+
class CUDACachingTest(DispatcherCacheUsecasesTest):
|
|
171
|
+
here = os.path.dirname(__file__)
|
|
172
|
+
usecases_file = os.path.join(here, "cache_usecases.py")
|
|
173
|
+
modname = "cuda_caching_test_fodder"
|
|
174
|
+
|
|
175
|
+
def setUp(self):
|
|
176
|
+
DispatcherCacheUsecasesTest.setUp(self)
|
|
177
|
+
CUDATestCase.setUp(self)
|
|
178
|
+
|
|
179
|
+
def tearDown(self):
|
|
180
|
+
CUDATestCase.tearDown(self)
|
|
181
|
+
DispatcherCacheUsecasesTest.tearDown(self)
|
|
182
|
+
|
|
183
|
+
def test_caching(self):
|
|
184
|
+
self.check_pycache(0)
|
|
185
|
+
mod = self.import_module()
|
|
186
|
+
self.check_pycache(0)
|
|
187
|
+
|
|
188
|
+
f = mod.add_usecase
|
|
189
|
+
self.assertPreciseEqual(f(2, 3), 6)
|
|
190
|
+
self.check_pycache(2) # 1 index, 1 data
|
|
191
|
+
self.assertPreciseEqual(f(2.5, 3), 6.5)
|
|
192
|
+
self.check_pycache(3) # 1 index, 2 data
|
|
193
|
+
self.check_hits(f.func, 0, 2)
|
|
194
|
+
|
|
195
|
+
f = mod.record_return_aligned
|
|
196
|
+
rec = f(mod.aligned_arr, 1)
|
|
197
|
+
self.assertPreciseEqual(tuple(rec), (2, 43.5))
|
|
198
|
+
|
|
199
|
+
f = mod.record_return_packed
|
|
200
|
+
rec = f(mod.packed_arr, 1)
|
|
201
|
+
self.assertPreciseEqual(tuple(rec), (2, 43.5))
|
|
202
|
+
self.check_pycache(6) # 2 index, 4 data
|
|
203
|
+
self.check_hits(f.func, 0, 2)
|
|
204
|
+
|
|
205
|
+
# Check the code runs ok from another process
|
|
206
|
+
self.run_in_separate_process()
|
|
207
|
+
|
|
208
|
+
def test_no_caching(self):
|
|
209
|
+
mod = self.import_module()
|
|
210
|
+
|
|
211
|
+
f = mod.add_nocache_usecase
|
|
212
|
+
self.assertPreciseEqual(f(2, 3), 6)
|
|
213
|
+
self.check_pycache(0)
|
|
214
|
+
|
|
215
|
+
def test_many_locals(self):
|
|
216
|
+
# Declaring many local arrays creates a very large LLVM IR, which
|
|
217
|
+
# cannot be pickled due to the level of recursion it requires to
|
|
218
|
+
# pickle. This test ensures that kernels with many locals (and
|
|
219
|
+
# therefore large IR) can be cached. See Issue #8373:
|
|
220
|
+
# https://github.com/numba/numba/issues/8373
|
|
221
|
+
self.check_pycache(0)
|
|
222
|
+
mod = self.import_module()
|
|
223
|
+
f = mod.many_locals
|
|
224
|
+
f[1, 1]()
|
|
225
|
+
self.check_pycache(2) # 1 index, 1 data
|
|
226
|
+
|
|
227
|
+
def test_closure(self):
|
|
228
|
+
mod = self.import_module()
|
|
229
|
+
|
|
230
|
+
with warnings.catch_warnings():
|
|
231
|
+
warnings.simplefilter("error", NumbaWarning)
|
|
232
|
+
|
|
233
|
+
f = mod.closure1
|
|
234
|
+
self.assertPreciseEqual(f(3), 6) # 3 + 3 = 6
|
|
235
|
+
f = mod.closure2
|
|
236
|
+
self.assertPreciseEqual(f(3), 8) # 3 + 5 = 8
|
|
237
|
+
f = mod.closure3
|
|
238
|
+
self.assertPreciseEqual(f(3), 10) # 3 + 7 = 10
|
|
239
|
+
f = mod.closure4
|
|
240
|
+
self.assertPreciseEqual(f(3), 12) # 3 + 9 = 12
|
|
241
|
+
self.check_pycache(5) # 1 nbi, 4 nbc
|
|
242
|
+
|
|
243
|
+
def test_cache_reuse(self):
|
|
244
|
+
mod = self.import_module()
|
|
245
|
+
mod.add_usecase(2, 3)
|
|
246
|
+
mod.add_usecase(2.5, 3.5)
|
|
247
|
+
mod.outer_uncached(2, 3)
|
|
248
|
+
mod.outer(2, 3)
|
|
249
|
+
mod.record_return_packed(mod.packed_arr, 0)
|
|
250
|
+
mod.record_return_aligned(mod.aligned_arr, 1)
|
|
251
|
+
mod.simple_usecase_caller(2)
|
|
252
|
+
mtimes = self.get_cache_mtimes()
|
|
253
|
+
# Two signatures compiled
|
|
254
|
+
self.check_hits(mod.add_usecase.func, 0, 2)
|
|
255
|
+
|
|
256
|
+
mod2 = self.import_module()
|
|
257
|
+
self.assertIsNot(mod, mod2)
|
|
258
|
+
f = mod2.add_usecase
|
|
259
|
+
f(2, 3)
|
|
260
|
+
self.check_hits(f.func, 1, 0)
|
|
261
|
+
f(2.5, 3.5)
|
|
262
|
+
self.check_hits(f.func, 2, 0)
|
|
263
|
+
|
|
264
|
+
# The files haven't changed
|
|
265
|
+
self.assertEqual(self.get_cache_mtimes(), mtimes)
|
|
266
|
+
|
|
267
|
+
self.run_in_separate_process()
|
|
268
|
+
self.assertEqual(self.get_cache_mtimes(), mtimes)
|
|
269
|
+
|
|
270
|
+
def test_cache_invalidate(self):
|
|
271
|
+
mod = self.import_module()
|
|
272
|
+
f = mod.add_usecase
|
|
273
|
+
self.assertPreciseEqual(f(2, 3), 6)
|
|
274
|
+
|
|
275
|
+
# This should change the functions' results
|
|
276
|
+
with open(self.modfile, "a") as f:
|
|
277
|
+
f.write("\nZ = 10\n")
|
|
278
|
+
|
|
279
|
+
mod = self.import_module()
|
|
280
|
+
f = mod.add_usecase
|
|
281
|
+
self.assertPreciseEqual(f(2, 3), 15)
|
|
282
|
+
|
|
283
|
+
def test_recompile(self):
|
|
284
|
+
# Explicit call to recompile() should overwrite the cache
|
|
285
|
+
mod = self.import_module()
|
|
286
|
+
f = mod.add_usecase
|
|
287
|
+
self.assertPreciseEqual(f(2, 3), 6)
|
|
288
|
+
|
|
289
|
+
mod = self.import_module()
|
|
290
|
+
f = mod.add_usecase
|
|
291
|
+
mod.Z = 10
|
|
292
|
+
self.assertPreciseEqual(f(2, 3), 6)
|
|
293
|
+
f.func.recompile()
|
|
294
|
+
self.assertPreciseEqual(f(2, 3), 15)
|
|
295
|
+
|
|
296
|
+
# Freshly recompiled version is re-used from other imports
|
|
297
|
+
mod = self.import_module()
|
|
298
|
+
f = mod.add_usecase
|
|
299
|
+
self.assertPreciseEqual(f(2, 3), 15)
|
|
300
|
+
|
|
301
|
+
def test_same_names(self):
|
|
302
|
+
# Function with the same names should still disambiguate
|
|
303
|
+
mod = self.import_module()
|
|
304
|
+
f = mod.renamed_function1
|
|
305
|
+
self.assertPreciseEqual(f(2), 4)
|
|
306
|
+
f = mod.renamed_function2
|
|
307
|
+
self.assertPreciseEqual(f(2), 8)
|
|
308
|
+
|
|
309
|
+
def _test_pycache_fallback(self):
|
|
310
|
+
"""
|
|
311
|
+
With a disabled __pycache__, test there is a working fallback
|
|
312
|
+
(e.g. on the user-wide cache dir)
|
|
313
|
+
"""
|
|
314
|
+
mod = self.import_module()
|
|
315
|
+
f = mod.add_usecase
|
|
316
|
+
# Remove this function's cache files at the end, to avoid accumulation
|
|
317
|
+
# across test calls.
|
|
318
|
+
self.addCleanup(
|
|
319
|
+
shutil.rmtree, f.func.stats.cache_path, ignore_errors=True
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
self.assertPreciseEqual(f(2, 3), 6)
|
|
323
|
+
# It's a cache miss since the file was copied to a new temp location
|
|
324
|
+
self.check_hits(f.func, 0, 1)
|
|
325
|
+
|
|
326
|
+
# Test re-use
|
|
327
|
+
mod2 = self.import_module()
|
|
328
|
+
f = mod2.add_usecase
|
|
329
|
+
self.assertPreciseEqual(f(2, 3), 6)
|
|
330
|
+
self.check_hits(f.func, 1, 0)
|
|
331
|
+
|
|
332
|
+
# The __pycache__ is empty (otherwise the test's preconditions
|
|
333
|
+
# wouldn't be met)
|
|
334
|
+
self.check_pycache(0)
|
|
335
|
+
|
|
336
|
+
@skip_bad_access
|
|
337
|
+
@unittest.skipIf(
|
|
338
|
+
os.name == "nt", "cannot easily make a directory read-only on Windows"
|
|
339
|
+
)
|
|
340
|
+
def test_non_creatable_pycache(self):
|
|
341
|
+
# Make it impossible to create the __pycache__ directory
|
|
342
|
+
old_perms = os.stat(self.tempdir).st_mode
|
|
343
|
+
os.chmod(self.tempdir, 0o500)
|
|
344
|
+
self.addCleanup(os.chmod, self.tempdir, old_perms)
|
|
345
|
+
|
|
346
|
+
self._test_pycache_fallback()
|
|
347
|
+
|
|
348
|
+
@skip_bad_access
|
|
349
|
+
@unittest.skipIf(
|
|
350
|
+
os.name == "nt", "cannot easily make a directory read-only on Windows"
|
|
351
|
+
)
|
|
352
|
+
def test_non_writable_pycache(self):
|
|
353
|
+
# Make it impossible to write to the __pycache__ directory
|
|
354
|
+
pycache = os.path.join(self.tempdir, "__pycache__")
|
|
355
|
+
os.mkdir(pycache)
|
|
356
|
+
old_perms = os.stat(pycache).st_mode
|
|
357
|
+
os.chmod(pycache, 0o500)
|
|
358
|
+
self.addCleanup(os.chmod, pycache, old_perms)
|
|
359
|
+
|
|
360
|
+
self._test_pycache_fallback()
|
|
361
|
+
|
|
362
|
+
def test_cannot_cache_linking_libraries(self):
|
|
363
|
+
link = str(test_data_dir / "jitlink.ptx")
|
|
364
|
+
msg = "Cannot pickle CUDACodeLibrary with linking files"
|
|
365
|
+
with self.assertRaisesRegex(RuntimeError, msg):
|
|
366
|
+
|
|
367
|
+
@cuda.jit("void()", cache=True, link=[link])
|
|
368
|
+
def f():
|
|
369
|
+
pass
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
@skip_on_cudasim("Simulator does not implement caching")
|
|
373
|
+
class CUDACooperativeGroupTest(DispatcherCacheUsecasesTest):
|
|
374
|
+
# See Issue #9432: https://github.com/numba/numba/issues/9432
|
|
375
|
+
# If a cached function using CG sync was the first thing to compile,
|
|
376
|
+
# the compile would fail.
|
|
377
|
+
here = os.path.dirname(__file__)
|
|
378
|
+
usecases_file = os.path.join(here, "cg_cache_usecases.py")
|
|
379
|
+
modname = "cuda_cooperative_caching_test_fodder"
|
|
380
|
+
|
|
381
|
+
def setUp(self):
|
|
382
|
+
DispatcherCacheUsecasesTest.setUp(self)
|
|
383
|
+
CUDATestCase.setUp(self)
|
|
384
|
+
|
|
385
|
+
def tearDown(self):
|
|
386
|
+
CUDATestCase.tearDown(self)
|
|
387
|
+
DispatcherCacheUsecasesTest.tearDown(self)
|
|
388
|
+
|
|
389
|
+
@skip_unless_cc_60
|
|
390
|
+
@skip_if_cudadevrt_missing
|
|
391
|
+
def test_cache_cg(self):
|
|
392
|
+
# Functions using cooperative groups should be cacheable. See Issue
|
|
393
|
+
# #8888: https://github.com/numba/numba/issues/8888
|
|
394
|
+
self.check_pycache(0)
|
|
395
|
+
mod = self.import_module()
|
|
396
|
+
self.check_pycache(0)
|
|
397
|
+
|
|
398
|
+
mod.cg_usecase(0)
|
|
399
|
+
self.check_pycache(2) # 1 index, 1 data
|
|
400
|
+
|
|
401
|
+
# Check the code runs ok from another process
|
|
402
|
+
self.run_in_separate_process()
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
@skip_on_cudasim("Simulator does not implement caching")
|
|
406
|
+
class CUDAAndCPUCachingTest(DispatcherCacheUsecasesTest):
|
|
407
|
+
here = os.path.dirname(__file__)
|
|
408
|
+
usecases_file = os.path.join(here, "cache_with_cpu_usecases.py")
|
|
409
|
+
modname = "cuda_and_cpu_caching_test_fodder"
|
|
410
|
+
|
|
411
|
+
def setUp(self):
|
|
412
|
+
DispatcherCacheUsecasesTest.setUp(self)
|
|
413
|
+
CUDATestCase.setUp(self)
|
|
414
|
+
|
|
415
|
+
def tearDown(self):
|
|
416
|
+
CUDATestCase.tearDown(self)
|
|
417
|
+
DispatcherCacheUsecasesTest.tearDown(self)
|
|
418
|
+
|
|
419
|
+
@skip_on_standalone_numba_cuda
|
|
420
|
+
def test_cpu_and_cuda_targets(self):
|
|
421
|
+
# The same function jitted for CPU and CUDA targets should maintain
|
|
422
|
+
# separate caches for each target.
|
|
423
|
+
self.check_pycache(0)
|
|
424
|
+
mod = self.import_module()
|
|
425
|
+
self.check_pycache(0)
|
|
426
|
+
|
|
427
|
+
f_cpu = mod.assign_cpu
|
|
428
|
+
f_cuda = mod.assign_cuda
|
|
429
|
+
self.assertPreciseEqual(f_cpu(5), 5)
|
|
430
|
+
self.check_pycache(2) # 1 index, 1 data
|
|
431
|
+
self.assertPreciseEqual(f_cuda(5), 5)
|
|
432
|
+
self.check_pycache(3) # 1 index, 2 data
|
|
433
|
+
|
|
434
|
+
self.check_hits(f_cpu.func, 0, 1)
|
|
435
|
+
self.check_hits(f_cuda.func, 0, 1)
|
|
436
|
+
|
|
437
|
+
self.assertPreciseEqual(f_cpu(5.5), 5.5)
|
|
438
|
+
self.check_pycache(4) # 1 index, 3 data
|
|
439
|
+
self.assertPreciseEqual(f_cuda(5.5), 5.5)
|
|
440
|
+
self.check_pycache(5) # 1 index, 4 data
|
|
441
|
+
|
|
442
|
+
self.check_hits(f_cpu.func, 0, 2)
|
|
443
|
+
self.check_hits(f_cuda.func, 0, 2)
|
|
444
|
+
|
|
445
|
+
@skip_on_standalone_numba_cuda
|
|
446
|
+
def test_cpu_and_cuda_reuse(self):
|
|
447
|
+
# Existing cache files for the CPU and CUDA targets are reused.
|
|
448
|
+
mod = self.import_module()
|
|
449
|
+
mod.assign_cpu(5)
|
|
450
|
+
mod.assign_cpu(5.5)
|
|
451
|
+
mod.assign_cuda(5)
|
|
452
|
+
mod.assign_cuda(5.5)
|
|
453
|
+
|
|
454
|
+
mtimes = self.get_cache_mtimes()
|
|
455
|
+
|
|
456
|
+
# Two signatures compiled
|
|
457
|
+
self.check_hits(mod.assign_cpu.func, 0, 2)
|
|
458
|
+
self.check_hits(mod.assign_cuda.func, 0, 2)
|
|
459
|
+
|
|
460
|
+
mod2 = self.import_module()
|
|
461
|
+
self.assertIsNot(mod, mod2)
|
|
462
|
+
f_cpu = mod2.assign_cpu
|
|
463
|
+
f_cuda = mod2.assign_cuda
|
|
464
|
+
|
|
465
|
+
f_cpu(2)
|
|
466
|
+
self.check_hits(f_cpu.func, 1, 0)
|
|
467
|
+
f_cpu(2.5)
|
|
468
|
+
self.check_hits(f_cpu.func, 2, 0)
|
|
469
|
+
f_cuda(2)
|
|
470
|
+
self.check_hits(f_cuda.func, 1, 0)
|
|
471
|
+
f_cuda(2.5)
|
|
472
|
+
self.check_hits(f_cuda.func, 2, 0)
|
|
473
|
+
|
|
474
|
+
# The files haven't changed
|
|
475
|
+
self.assertEqual(self.get_cache_mtimes(), mtimes)
|
|
476
|
+
|
|
477
|
+
self.run_in_separate_process()
|
|
478
|
+
self.assertEqual(self.get_cache_mtimes(), mtimes)
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
def get_different_cc_gpus():
|
|
482
|
+
# Find two GPUs with different Compute Capabilities and return them as a
|
|
483
|
+
# tuple. If two GPUs with distinct Compute Capabilities cannot be found,
|
|
484
|
+
# then None is returned.
|
|
485
|
+
first_gpu = cuda.gpus[0]
|
|
486
|
+
with first_gpu:
|
|
487
|
+
first_cc = cuda.current_context().device.compute_capability
|
|
488
|
+
|
|
489
|
+
for gpu in cuda.gpus[1:]:
|
|
490
|
+
with gpu:
|
|
491
|
+
cc = cuda.current_context().device.compute_capability
|
|
492
|
+
if cc != first_cc:
|
|
493
|
+
return (first_gpu, gpu)
|
|
494
|
+
|
|
495
|
+
return None
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
@skip_on_cudasim("Simulator does not implement caching")
|
|
499
|
+
class TestMultiCCCaching(DispatcherCacheUsecasesTest):
|
|
500
|
+
here = os.path.dirname(__file__)
|
|
501
|
+
usecases_file = os.path.join(here, "cache_usecases.py")
|
|
502
|
+
modname = "cuda_multi_cc_caching_test_fodder"
|
|
503
|
+
|
|
504
|
+
def setUp(self):
|
|
505
|
+
DispatcherCacheUsecasesTest.setUp(self)
|
|
506
|
+
CUDATestCase.setUp(self)
|
|
507
|
+
|
|
508
|
+
def tearDown(self):
|
|
509
|
+
CUDATestCase.tearDown(self)
|
|
510
|
+
DispatcherCacheUsecasesTest.tearDown(self)
|
|
511
|
+
|
|
512
|
+
def test_cache(self):
|
|
513
|
+
gpus = get_different_cc_gpus()
|
|
514
|
+
if not gpus:
|
|
515
|
+
self.skipTest("Need two different CCs for multi-CC cache test")
|
|
516
|
+
|
|
517
|
+
self.check_pycache(0)
|
|
518
|
+
mod = self.import_module()
|
|
519
|
+
self.check_pycache(0)
|
|
520
|
+
|
|
521
|
+
# Step 1. Populate the cache with the first GPU
|
|
522
|
+
with gpus[0]:
|
|
523
|
+
f = mod.add_usecase
|
|
524
|
+
self.assertPreciseEqual(f(2, 3), 6)
|
|
525
|
+
self.check_pycache(2) # 1 index, 1 data
|
|
526
|
+
self.assertPreciseEqual(f(2.5, 3), 6.5)
|
|
527
|
+
self.check_pycache(3) # 1 index, 2 data
|
|
528
|
+
self.check_hits(f.func, 0, 2)
|
|
529
|
+
|
|
530
|
+
f = mod.record_return_aligned
|
|
531
|
+
rec = f(mod.aligned_arr, 1)
|
|
532
|
+
self.assertPreciseEqual(tuple(rec), (2, 43.5))
|
|
533
|
+
|
|
534
|
+
f = mod.record_return_packed
|
|
535
|
+
rec = f(mod.packed_arr, 1)
|
|
536
|
+
self.assertPreciseEqual(tuple(rec), (2, 43.5))
|
|
537
|
+
self.check_pycache(6) # 2 index, 4 data
|
|
538
|
+
self.check_hits(f.func, 0, 2)
|
|
539
|
+
|
|
540
|
+
# Step 2. Run with the second GPU - under present behaviour this
|
|
541
|
+
# doesn't further populate the cache.
|
|
542
|
+
with gpus[1]:
|
|
543
|
+
f = mod.add_usecase
|
|
544
|
+
self.assertPreciseEqual(f(2, 3), 6)
|
|
545
|
+
self.check_pycache(6) # cache unchanged
|
|
546
|
+
self.assertPreciseEqual(f(2.5, 3), 6.5)
|
|
547
|
+
self.check_pycache(6) # cache unchanged
|
|
548
|
+
self.check_hits(f.func, 0, 2)
|
|
549
|
+
|
|
550
|
+
f = mod.record_return_aligned
|
|
551
|
+
rec = f(mod.aligned_arr, 1)
|
|
552
|
+
self.assertPreciseEqual(tuple(rec), (2, 43.5))
|
|
553
|
+
|
|
554
|
+
f = mod.record_return_packed
|
|
555
|
+
rec = f(mod.packed_arr, 1)
|
|
556
|
+
self.assertPreciseEqual(tuple(rec), (2, 43.5))
|
|
557
|
+
self.check_pycache(6) # cache unchanged
|
|
558
|
+
self.check_hits(f.func, 0, 2)
|
|
559
|
+
|
|
560
|
+
# Step 3. Run in a separate module with the second GPU - this populates
|
|
561
|
+
# the cache for the second CC.
|
|
562
|
+
mod2 = self.import_module()
|
|
563
|
+
self.assertIsNot(mod, mod2)
|
|
564
|
+
|
|
565
|
+
with gpus[1]:
|
|
566
|
+
f = mod2.add_usecase
|
|
567
|
+
self.assertPreciseEqual(f(2, 3), 6)
|
|
568
|
+
self.check_pycache(7) # 2 index, 5 data
|
|
569
|
+
self.assertPreciseEqual(f(2.5, 3), 6.5)
|
|
570
|
+
self.check_pycache(8) # 2 index, 6 data
|
|
571
|
+
self.check_hits(f.func, 0, 2)
|
|
572
|
+
|
|
573
|
+
f = mod2.record_return_aligned
|
|
574
|
+
rec = f(mod.aligned_arr, 1)
|
|
575
|
+
self.assertPreciseEqual(tuple(rec), (2, 43.5))
|
|
576
|
+
|
|
577
|
+
f = mod2.record_return_packed
|
|
578
|
+
rec = f(mod.packed_arr, 1)
|
|
579
|
+
self.assertPreciseEqual(tuple(rec), (2, 43.5))
|
|
580
|
+
self.check_pycache(10) # 2 index, 8 data
|
|
581
|
+
self.check_hits(f.func, 0, 2)
|
|
582
|
+
|
|
583
|
+
# The following steps check that we can use the NVVM IR loaded from the
|
|
584
|
+
# cache to generate PTX for a different compute capability to the
|
|
585
|
+
# cached cubin's CC. To check this, we create another module that loads
|
|
586
|
+
# the cached version containing a cubin for GPU 1. There will be no
|
|
587
|
+
# cubin for GPU 0, so when we try to use it the PTX must be generated.
|
|
588
|
+
|
|
589
|
+
mod3 = self.import_module()
|
|
590
|
+
self.assertIsNot(mod, mod3)
|
|
591
|
+
|
|
592
|
+
# Step 4. Run with GPU 1 and get a cache hit, loading the cache created
|
|
593
|
+
# during Step 3.
|
|
594
|
+
with gpus[1]:
|
|
595
|
+
f = mod3.add_usecase
|
|
596
|
+
self.assertPreciseEqual(f(2, 3), 6)
|
|
597
|
+
self.assertPreciseEqual(f(2.5, 3), 6.5)
|
|
598
|
+
|
|
599
|
+
f = mod3.record_return_aligned
|
|
600
|
+
rec = f(mod.aligned_arr, 1)
|
|
601
|
+
self.assertPreciseEqual(tuple(rec), (2, 43.5))
|
|
602
|
+
|
|
603
|
+
f = mod3.record_return_packed
|
|
604
|
+
rec = f(mod.packed_arr, 1)
|
|
605
|
+
self.assertPreciseEqual(tuple(rec), (2, 43.5))
|
|
606
|
+
|
|
607
|
+
# Step 5. Run with GPU 0 using the module from Step 4, to force PTX
|
|
608
|
+
# generation from cached NVVM IR.
|
|
609
|
+
with gpus[0]:
|
|
610
|
+
f = mod3.add_usecase
|
|
611
|
+
self.assertPreciseEqual(f(2, 3), 6)
|
|
612
|
+
self.assertPreciseEqual(f(2.5, 3), 6.5)
|
|
613
|
+
|
|
614
|
+
f = mod3.record_return_aligned
|
|
615
|
+
rec = f(mod.aligned_arr, 1)
|
|
616
|
+
self.assertPreciseEqual(tuple(rec), (2, 43.5))
|
|
617
|
+
|
|
618
|
+
f = mod3.record_return_packed
|
|
619
|
+
rec = f(mod.packed_arr, 1)
|
|
620
|
+
self.assertPreciseEqual(tuple(rec), (2, 43.5))
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
def child_initializer():
|
|
624
|
+
# Disable occupancy and implicit copy warnings in processes in a
|
|
625
|
+
# multiprocessing pool.
|
|
626
|
+
from numba.cuda.core import config
|
|
627
|
+
|
|
628
|
+
config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
|
|
629
|
+
config.CUDA_WARN_ON_IMPLICIT_COPY = 0
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
@skip_on_cudasim("Simulator does not implement caching")
|
|
633
|
+
class TestMultiprocessCache(DispatcherCacheUsecasesTest):
|
|
634
|
+
here = os.path.dirname(__file__)
|
|
635
|
+
usecases_file = os.path.join(here, "cache_usecases.py")
|
|
636
|
+
modname = "cuda_mp_caching_test_fodder"
|
|
637
|
+
|
|
638
|
+
def setUp(self):
|
|
639
|
+
DispatcherCacheUsecasesTest.setUp(self)
|
|
640
|
+
CUDATestCase.setUp(self)
|
|
641
|
+
|
|
642
|
+
def tearDown(self):
|
|
643
|
+
CUDATestCase.tearDown(self)
|
|
644
|
+
DispatcherCacheUsecasesTest.tearDown(self)
|
|
645
|
+
|
|
646
|
+
def test_multiprocessing(self):
|
|
647
|
+
# Check caching works from multiple processes at once (#2028)
|
|
648
|
+
mod = self.import_module()
|
|
649
|
+
# Calling a pure Python caller of the JIT-compiled function is
|
|
650
|
+
# necessary to reproduce the issue.
|
|
651
|
+
f = mod.simple_usecase_caller
|
|
652
|
+
n = 3
|
|
653
|
+
try:
|
|
654
|
+
ctx = multiprocessing.get_context("spawn")
|
|
655
|
+
except AttributeError:
|
|
656
|
+
ctx = multiprocessing
|
|
657
|
+
|
|
658
|
+
pool = ctx.Pool(n, child_initializer)
|
|
659
|
+
|
|
660
|
+
try:
|
|
661
|
+
res = sum(pool.imap(f, range(n)))
|
|
662
|
+
finally:
|
|
663
|
+
pool.close()
|
|
664
|
+
self.assertEqual(res, n * (n - 1) // 2)
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
@skip_on_cudasim("Simulator does not implement the CUDACodeLibrary")
|
|
668
|
+
class TestCUDACodeLibrary(CUDATestCase):
|
|
669
|
+
# For tests of miscellaneous CUDACodeLibrary behaviour that we wish to
|
|
670
|
+
# explicitly check
|
|
671
|
+
|
|
672
|
+
def test_cannot_serialize_unfinalized(self):
|
|
673
|
+
# The CUDA codegen failes to import under the simulator, so we cannot
|
|
674
|
+
# import it at the top level
|
|
675
|
+
from numba.cuda.codegen import CUDACodeLibrary
|
|
676
|
+
|
|
677
|
+
# Usually a CodeLibrary requires a real CodeGen, but since we don't
|
|
678
|
+
# interact with it, anything will do
|
|
679
|
+
codegen = object()
|
|
680
|
+
name = "library"
|
|
681
|
+
cl = CUDACodeLibrary(codegen, name)
|
|
682
|
+
with self.assertRaisesRegex(RuntimeError, "Cannot pickle unfinalized"):
|
|
683
|
+
cl._reduce_states()
|