numba-cuda 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +232 -113
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_fp16.h +661 -661
- numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
- numba_cuda/numba/cuda/cuda_paths.py +291 -99
- numba_cuda/numba/cuda/cudadecl.py +125 -69
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +463 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +317 -233
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +8 -6
- numba_cuda/numba/cuda/decorators.py +75 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +69 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +1 -1
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
- numba_cuda/numba/cuda/intrinsics.py +31 -27
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +139 -102
- numba_cuda/numba/cuda/target.py +64 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +2 -2
- numba_cuda/numba/cuda/vectorizers.py +37 -32
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
- numba_cuda-0.9.0.dist-info/RECORD +253 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.0.dist-info/RECORD +0 -251
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -3,8 +3,12 @@ import sys
|
|
3
3
|
import subprocess
|
4
4
|
import threading
|
5
5
|
from numba import cuda
|
6
|
-
from numba.cuda.testing import (
|
7
|
-
|
6
|
+
from numba.cuda.testing import (
|
7
|
+
unittest,
|
8
|
+
CUDATestCase,
|
9
|
+
skip_on_cudasim,
|
10
|
+
skip_under_cuda_memcheck,
|
11
|
+
)
|
8
12
|
from numba.tests.support import captured_stdout
|
9
13
|
|
10
14
|
|
@@ -14,21 +18,19 @@ class TestCudaDetect(CUDATestCase):
|
|
14
18
|
with captured_stdout() as out:
|
15
19
|
cuda.detect()
|
16
20
|
output = out.getvalue()
|
17
|
-
self.assertIn(
|
18
|
-
self.assertIn(
|
21
|
+
self.assertIn("Found", output)
|
22
|
+
self.assertIn("CUDA devices", output)
|
19
23
|
|
20
24
|
|
21
|
-
@skip_under_cuda_memcheck(
|
25
|
+
@skip_under_cuda_memcheck("Hangs cuda-memcheck")
|
22
26
|
class TestCUDAFindLibs(CUDATestCase):
|
23
|
-
|
24
27
|
def run_cmd(self, cmdline, env):
|
25
|
-
popen = subprocess.Popen(
|
26
|
-
|
27
|
-
|
28
|
-
env=env)
|
28
|
+
popen = subprocess.Popen(
|
29
|
+
cmdline, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env
|
30
|
+
)
|
29
31
|
|
30
32
|
# finish in 5 minutes or kill it
|
31
|
-
timeout = threading.Timer(5 * 60
|
33
|
+
timeout = threading.Timer(5 * 60.0, popen.kill)
|
32
34
|
try:
|
33
35
|
timeout.start()
|
34
36
|
out, err = popen.communicate()
|
@@ -51,8 +53,8 @@ class TestCUDAFindLibs(CUDATestCase):
|
|
51
53
|
cmdline = [sys.executable, "-c", code]
|
52
54
|
return self.run_cmd(cmdline, env_copy)
|
53
55
|
|
54
|
-
@skip_on_cudasim(
|
55
|
-
@unittest.skipIf(not sys.platform.startswith(
|
56
|
+
@skip_on_cudasim("Simulator does not hit device library search code path")
|
57
|
+
@unittest.skipIf(not sys.platform.startswith("linux"), "linux only")
|
56
58
|
def test_cuda_find_lib_errors(self):
|
57
59
|
"""
|
58
60
|
This tests that the find_libs works as expected in the case of an
|
@@ -60,7 +62,7 @@ class TestCUDAFindLibs(CUDATestCase):
|
|
60
62
|
"""
|
61
63
|
# one of these is likely to exist on linux, it's also unlikely that
|
62
64
|
# someone has extracted the contents of libdevice into here!
|
63
|
-
locs = [
|
65
|
+
locs = ["lib", "lib64"]
|
64
66
|
|
65
67
|
looking_for = None
|
66
68
|
for l in locs:
|
@@ -71,11 +73,12 @@ class TestCUDAFindLibs(CUDATestCase):
|
|
71
73
|
# This is the testing part, the test will only run if there's a valid
|
72
74
|
# path in which to look
|
73
75
|
if looking_for is not None:
|
74
|
-
out, err = self.run_test_in_separate_process(
|
75
|
-
|
76
|
+
out, err = self.run_test_in_separate_process(
|
77
|
+
"NUMBA_CUDA_DRIVER", looking_for
|
78
|
+
)
|
76
79
|
self.assertTrue(out is not None)
|
77
80
|
self.assertTrue(err is not None)
|
78
81
|
|
79
82
|
|
80
|
-
if __name__ ==
|
83
|
+
if __name__ == "__main__":
|
81
84
|
unittest.main()
|
@@ -8,6 +8,7 @@ from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
|
|
8
8
|
from numba.tests.support import linux_only
|
9
9
|
|
10
10
|
if not config.ENABLE_CUDASIM:
|
11
|
+
|
11
12
|
class DeviceOnlyEMMPlugin(cuda.HostOnlyCUDAMemoryManager):
|
12
13
|
"""
|
13
14
|
Dummy EMM Plugin implementation for testing. It memorises which plugin
|
@@ -56,8 +57,9 @@ if not config.ENABLE_CUDASIM:
|
|
56
57
|
# the reference count drops to zero.
|
57
58
|
ctx = weakref.proxy(self.context)
|
58
59
|
ptr = ctypes.c_void_p(alloc_count)
|
59
|
-
return cuda.cudadrv.driver.AutoFreePointer(
|
60
|
-
|
60
|
+
return cuda.cudadrv.driver.AutoFreePointer(
|
61
|
+
ctx, ptr, size, finalizer=finalizer
|
62
|
+
)
|
61
63
|
|
62
64
|
def initialize(self):
|
63
65
|
# No special initialization needed.
|
@@ -97,7 +99,7 @@ if not config.ENABLE_CUDASIM:
|
|
97
99
|
return 2
|
98
100
|
|
99
101
|
|
100
|
-
@skip_on_cudasim(
|
102
|
+
@skip_on_cudasim("EMM Plugins not supported on CUDA simulator")
|
101
103
|
class TestDeviceOnlyEMMPlugin(CUDATestCase):
|
102
104
|
"""
|
103
105
|
Tests that the API of an EMM Plugin that implements device allocations
|
@@ -175,7 +177,7 @@ class TestDeviceOnlyEMMPlugin(CUDATestCase):
|
|
175
177
|
self.assertIn("Dummy IPC handle for alloc 1", ipch._ipc_handle)
|
176
178
|
|
177
179
|
|
178
|
-
@skip_on_cudasim(
|
180
|
+
@skip_on_cudasim("EMM Plugins not supported on CUDA simulator")
|
179
181
|
class TestBadEMMPluginVersion(CUDATestCase):
|
180
182
|
"""
|
181
183
|
Ensure that Numba rejects EMM Plugins with incompatible version
|
@@ -185,8 +187,8 @@ class TestBadEMMPluginVersion(CUDATestCase):
|
|
185
187
|
def test_bad_plugin_version(self):
|
186
188
|
with self.assertRaises(RuntimeError) as raises:
|
187
189
|
cuda.set_memory_manager(BadVersionEMMPlugin)
|
188
|
-
self.assertIn(
|
190
|
+
self.assertIn("version 1 required", str(raises.exception))
|
189
191
|
|
190
192
|
|
191
|
-
if __name__ ==
|
193
|
+
if __name__ == "__main__":
|
192
194
|
unittest.main()
|
@@ -10,10 +10,9 @@ class TestHostAlloc(ContextResettingTestCase):
|
|
10
10
|
mem = cuda.current_context().memhostalloc(n, mapped=True)
|
11
11
|
|
12
12
|
dtype = np.dtype(np.uint8)
|
13
|
-
ary = np.ndarray(shape=n // dtype.itemsize, dtype=dtype,
|
14
|
-
buffer=mem)
|
13
|
+
ary = np.ndarray(shape=n // dtype.itemsize, dtype=dtype, buffer=mem)
|
15
14
|
|
16
|
-
magic =
|
15
|
+
magic = 0xAB
|
17
16
|
driver.device_memset(mem, magic, n)
|
18
17
|
|
19
18
|
self.assertTrue(np.all(ary == magic))
|
@@ -46,8 +45,10 @@ class TestHostAlloc(ContextResettingTestCase):
|
|
46
45
|
self.assertTrue(sum(ary != 0) == 0)
|
47
46
|
|
48
47
|
def test_host_operators(self):
|
49
|
-
for ary in [
|
50
|
-
|
48
|
+
for ary in [
|
49
|
+
cuda.mapped_array(10, dtype=np.uint32),
|
50
|
+
cuda.pinned_array(10, dtype=np.uint32),
|
51
|
+
]:
|
51
52
|
ary[:] = range(10)
|
52
53
|
self.assertTrue(sum(ary + 1) == 55)
|
53
54
|
self.assertTrue(sum((ary + 1) * 2 - 1) == 100)
|
@@ -55,11 +56,11 @@ class TestHostAlloc(ContextResettingTestCase):
|
|
55
56
|
self.assertTrue(sum(ary <= 5) == 6)
|
56
57
|
self.assertTrue(sum(ary > 6) == 3)
|
57
58
|
self.assertTrue(sum(ary >= 6) == 4)
|
58
|
-
self.assertTrue(sum(ary
|
59
|
+
self.assertTrue(sum(ary**2) == 285)
|
59
60
|
self.assertTrue(sum(ary // 2) == 20)
|
60
61
|
self.assertTrue(sum(ary / 2.0) == 22.5)
|
61
62
|
self.assertTrue(sum(ary % 2) == 5)
|
62
63
|
|
63
64
|
|
64
|
-
if __name__ ==
|
65
|
+
if __name__ == "__main__":
|
65
66
|
unittest.main()
|
@@ -9,7 +9,7 @@ from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase
|
|
9
9
|
|
10
10
|
# A mock of cuInit that always raises a CudaAPIError
|
11
11
|
def cuInit_raising(arg):
|
12
|
-
raise CudaAPIError(999,
|
12
|
+
raise CudaAPIError(999, "CUDA_ERROR_UNKNOWN")
|
13
13
|
|
14
14
|
|
15
15
|
# Test code to run in a child that patches driver.cuInit to a variant that
|
@@ -82,45 +82,45 @@ def cuda_disabled_error_test(result_queue):
|
|
82
82
|
result_queue.put((success, msg))
|
83
83
|
|
84
84
|
|
85
|
-
@skip_on_cudasim(
|
85
|
+
@skip_on_cudasim("CUDA Simulator does not initialize driver")
|
86
86
|
class TestInit(CUDATestCase):
|
87
87
|
def _test_init_failure(self, target, expected):
|
88
88
|
# Run the initialization failure test in a separate subprocess
|
89
|
-
ctx = mp.get_context(
|
89
|
+
ctx = mp.get_context("spawn")
|
90
90
|
result_queue = ctx.Queue()
|
91
91
|
proc = ctx.Process(target=target, args=(result_queue,))
|
92
92
|
proc.start()
|
93
|
-
proc.join(30)
|
93
|
+
proc.join(30) # should complete within 30s
|
94
94
|
success, msg = result_queue.get()
|
95
95
|
|
96
96
|
# Ensure the child process raised an exception during initialization
|
97
97
|
# before checking the message
|
98
98
|
if not success:
|
99
|
-
self.fail(
|
99
|
+
self.fail("CudaSupportError not raised")
|
100
100
|
|
101
101
|
self.assertIn(expected, msg)
|
102
102
|
|
103
103
|
def test_init_failure_raising(self):
|
104
|
-
expected =
|
104
|
+
expected = "Error at driver init: CUDA_ERROR_UNKNOWN (999)"
|
105
105
|
self._test_init_failure(cuInit_raising_test, expected)
|
106
106
|
|
107
107
|
def test_init_failure_error(self):
|
108
|
-
expected =
|
108
|
+
expected = "CUDA_ERROR_UNKNOWN (999)"
|
109
109
|
self._test_init_failure(initialization_error_test, expected)
|
110
110
|
|
111
111
|
def _test_cuda_disabled(self, target):
|
112
112
|
# Uses _test_init_failure to launch the test in a separate subprocess
|
113
113
|
# with CUDA disabled.
|
114
|
-
cuda_disabled = os.environ.get(
|
115
|
-
os.environ[
|
114
|
+
cuda_disabled = os.environ.get("NUMBA_DISABLE_CUDA")
|
115
|
+
os.environ["NUMBA_DISABLE_CUDA"] = "1"
|
116
116
|
try:
|
117
|
-
expected =
|
117
|
+
expected = "CUDA is disabled due to setting NUMBA_DISABLE_CUDA=1"
|
118
118
|
self._test_init_failure(cuda_disabled_test, expected)
|
119
119
|
finally:
|
120
120
|
if cuda_disabled is not None:
|
121
|
-
os.environ[
|
121
|
+
os.environ["NUMBA_DISABLE_CUDA"] = cuda_disabled
|
122
122
|
else:
|
123
|
-
os.environ.pop(
|
123
|
+
os.environ.pop("NUMBA_DISABLE_CUDA")
|
124
124
|
|
125
125
|
def test_cuda_disabled_raising(self):
|
126
126
|
self._test_cuda_disabled(cuda_disabled_test)
|
@@ -135,5 +135,5 @@ class TestInit(CUDATestCase):
|
|
135
135
|
self.assertIsNone(cuda.cuda_error())
|
136
136
|
|
137
137
|
|
138
|
-
if __name__ ==
|
138
|
+
if __name__ == "__main__":
|
139
139
|
unittest.main()
|
@@ -5,20 +5,23 @@ from numba.cuda.testing import unittest, ContextResettingTestCase
|
|
5
5
|
from numba.cuda.testing import skip_on_cudasim
|
6
6
|
|
7
7
|
|
8
|
-
@skip_on_cudasim(
|
8
|
+
@skip_on_cudasim("Inline PTX cannot be used in the simulator")
|
9
9
|
class TestCudaInlineAsm(ContextResettingTestCase):
|
10
10
|
def test_inline_rsqrt(self):
|
11
11
|
mod = ir.Module(__name__)
|
12
|
-
mod.triple =
|
12
|
+
mod.triple = "nvptx64-nvidia-cuda"
|
13
13
|
nvvm.add_ir_version(mod)
|
14
14
|
fnty = ir.FunctionType(ir.VoidType(), [ir.PointerType(ir.FloatType())])
|
15
|
-
fn = ir.Function(mod, fnty,
|
16
|
-
bldr = ir.IRBuilder(fn.append_basic_block(
|
15
|
+
fn = ir.Function(mod, fnty, "cu_rsqrt")
|
16
|
+
bldr = ir.IRBuilder(fn.append_basic_block("entry"))
|
17
17
|
|
18
18
|
rsqrt_approx_fnty = ir.FunctionType(ir.FloatType(), [ir.FloatType()])
|
19
|
-
inlineasm = ir.InlineAsm(
|
20
|
-
|
21
|
-
|
19
|
+
inlineasm = ir.InlineAsm(
|
20
|
+
rsqrt_approx_fnty,
|
21
|
+
"rsqrt.approx.f32 $0, $1;",
|
22
|
+
"=f,f",
|
23
|
+
side_effect=True,
|
24
|
+
)
|
22
25
|
val = bldr.load(fn.args[0])
|
23
26
|
res = bldr.call(inlineasm, [val])
|
24
27
|
|
@@ -30,8 +33,8 @@ class TestCudaInlineAsm(ContextResettingTestCase):
|
|
30
33
|
nvvm.set_cuda_kernel(fn)
|
31
34
|
nvvmir = str(mod)
|
32
35
|
ptx = nvvm.compile_ir(nvvmir)
|
33
|
-
self.assertTrue(
|
36
|
+
self.assertTrue("rsqrt.approx.f32" in str(ptx))
|
34
37
|
|
35
38
|
|
36
|
-
if __name__ ==
|
39
|
+
if __name__ == "__main__":
|
37
40
|
unittest.main()
|
@@ -1,10 +1,9 @@
|
|
1
1
|
import numpy as np
|
2
2
|
import warnings
|
3
3
|
from numba.cuda.testing import unittest
|
4
|
-
from numba.cuda.testing import
|
4
|
+
from numba.cuda.testing import skip_on_cudasim, skip_if_cuda_includes_missing
|
5
5
|
from numba.cuda.testing import CUDATestCase, test_data_dir
|
6
|
-
from numba.cuda.cudadrv.driver import
|
7
|
-
LinkerError)
|
6
|
+
from numba.cuda.cudadrv.driver import CudaAPIError, Linker, LinkerError
|
8
7
|
from numba.cuda.cudadrv.error import NvrtcError
|
9
8
|
from numba.cuda import require_context
|
10
9
|
from numba.tests.support import ignore_internal_warnings
|
@@ -103,25 +102,24 @@ def simple_lmem(A, B, dty):
|
|
103
102
|
B[i] = C[i]
|
104
103
|
|
105
104
|
|
106
|
-
@skip_on_cudasim(
|
105
|
+
@skip_on_cudasim("Linking unsupported in the simulator")
|
107
106
|
class TestLinker(CUDATestCase):
|
108
|
-
_NUMBA_NVIDIA_BINDING_0_ENV = {
|
107
|
+
_NUMBA_NVIDIA_BINDING_0_ENV = {"NUMBA_CUDA_USE_NVIDIA_BINDING": "0"}
|
109
108
|
|
110
109
|
@require_context
|
111
110
|
def test_linker_basic(self):
|
112
|
-
|
113
|
-
'''
|
111
|
+
"""Simply go through the constructor and destructor"""
|
114
112
|
linker = Linker.new(cc=(5, 3))
|
115
113
|
del linker
|
116
114
|
|
117
115
|
def _test_linking(self, eager):
|
118
116
|
global bar # must be a global; other it is recognized as a freevar
|
119
|
-
bar = cuda.declare_device(
|
117
|
+
bar = cuda.declare_device("bar", "int32(int32)")
|
120
118
|
|
121
|
-
link = str(test_data_dir /
|
119
|
+
link = str(test_data_dir / "jitlink.ptx")
|
122
120
|
|
123
121
|
if eager:
|
124
|
-
args = [
|
122
|
+
args = ["void(int32[:], int32[:])"]
|
125
123
|
else:
|
126
124
|
args = []
|
127
125
|
|
@@ -144,9 +142,9 @@ class TestLinker(CUDATestCase):
|
|
144
142
|
self._test_linking(eager=True)
|
145
143
|
|
146
144
|
def test_linking_cu(self):
|
147
|
-
bar = cuda.declare_device(
|
145
|
+
bar = cuda.declare_device("bar", "int32(int32)")
|
148
146
|
|
149
|
-
link = str(test_data_dir /
|
147
|
+
link = str(test_data_dir / "jitlink.cu")
|
150
148
|
|
151
149
|
@cuda.jit(link=[link])
|
152
150
|
def kernel(r, x):
|
@@ -165,36 +163,37 @@ class TestLinker(CUDATestCase):
|
|
165
163
|
np.testing.assert_array_equal(r, expected)
|
166
164
|
|
167
165
|
def test_linking_cu_log_warning(self):
|
168
|
-
bar = cuda.declare_device(
|
166
|
+
bar = cuda.declare_device("bar", "int32(int32)")
|
169
167
|
|
170
|
-
link = str(test_data_dir /
|
168
|
+
link = str(test_data_dir / "warn.cu")
|
171
169
|
|
172
170
|
with warnings.catch_warnings(record=True) as w:
|
173
171
|
ignore_internal_warnings()
|
174
172
|
|
175
|
-
@cuda.jit(
|
173
|
+
@cuda.jit("void(int32)", link=[link])
|
176
174
|
def kernel(x):
|
177
175
|
bar(x)
|
178
176
|
|
179
|
-
self.assertEqual(len(w), 1,
|
177
|
+
self.assertEqual(len(w), 1, "Expected warnings from NVRTC")
|
180
178
|
# Check the warning refers to the log messages
|
181
|
-
self.assertIn(
|
179
|
+
self.assertIn("NVRTC log messages", str(w[0].message))
|
182
180
|
# Check the message pertaining to the unused variable is provided
|
183
|
-
self.assertIn(
|
181
|
+
self.assertIn("declared but never referenced", str(w[0].message))
|
184
182
|
|
185
183
|
def test_linking_cu_error(self):
|
186
|
-
bar = cuda.declare_device(
|
184
|
+
bar = cuda.declare_device("bar", "int32(int32)")
|
187
185
|
|
188
|
-
link = str(test_data_dir /
|
186
|
+
link = str(test_data_dir / "error.cu")
|
189
187
|
|
190
188
|
with self.assertRaises(NvrtcError) as e:
|
191
|
-
|
189
|
+
|
190
|
+
@cuda.jit("void(int32)", link=[link])
|
192
191
|
def kernel(x):
|
193
192
|
bar(x)
|
194
193
|
|
195
194
|
msg = e.exception.args[0]
|
196
195
|
# Check the error message refers to the NVRTC compile
|
197
|
-
self.assertIn(
|
196
|
+
self.assertIn("NVRTC Compilation failure", msg)
|
198
197
|
# Check the expected error in the CUDA source is reported
|
199
198
|
self.assertIn('identifier "SYNTAX" is undefined', msg)
|
200
199
|
# Check the filename is reported correctly
|
@@ -203,33 +202,37 @@ class TestLinker(CUDATestCase):
|
|
203
202
|
def test_linking_unknown_filetype_error(self):
|
204
203
|
expected_err = "Don't know how to link file with extension .cuh"
|
205
204
|
with self.assertRaisesRegex(RuntimeError, expected_err):
|
206
|
-
|
205
|
+
|
206
|
+
@cuda.jit("void()", link=["header.cuh"])
|
207
207
|
def kernel():
|
208
208
|
pass
|
209
209
|
|
210
210
|
def test_linking_file_with_no_extension_error(self):
|
211
211
|
expected_err = "Don't know how to link file with no extension"
|
212
212
|
with self.assertRaisesRegex(RuntimeError, expected_err):
|
213
|
-
|
213
|
+
|
214
|
+
@cuda.jit("void()", link=["data"])
|
214
215
|
def kernel():
|
215
216
|
pass
|
216
217
|
|
217
218
|
@skip_if_cuda_includes_missing
|
218
219
|
def test_linking_cu_cuda_include(self):
|
219
|
-
link = str(test_data_dir /
|
220
|
+
link = str(test_data_dir / "cuda_include.cu")
|
220
221
|
|
221
222
|
# An exception will be raised when linking this kernel due to the
|
222
223
|
# compile failure if CUDA includes cannot be found by Nvrtc.
|
223
|
-
@cuda.jit(
|
224
|
+
@cuda.jit("void()", link=[link])
|
224
225
|
def kernel():
|
225
226
|
pass
|
226
227
|
|
227
228
|
def test_try_to_link_nonexistent(self):
|
228
229
|
with self.assertRaises(LinkerError) as e:
|
229
|
-
|
230
|
+
|
231
|
+
@cuda.jit("void(int32[::1])", link=["nonexistent.a"])
|
230
232
|
def f(x):
|
231
233
|
x[0] = 0
|
232
|
-
|
234
|
+
|
235
|
+
self.assertIn("nonexistent.a not found", e.exception.args)
|
233
236
|
|
234
237
|
def test_set_registers_no_max(self):
|
235
238
|
"""Ensure that the jitted kernel used in the test_set_registers_* tests
|
@@ -276,7 +279,8 @@ class TestLinker(CUDATestCase):
|
|
276
279
|
def test_get_shared_mem_per_specialized(self):
|
277
280
|
compiled = cuda.jit(simple_smem)
|
278
281
|
compiled_specialized = compiled.specialize(
|
279
|
-
np.zeros(100, dtype=np.int32), np.float64
|
282
|
+
np.zeros(100, dtype=np.int32), np.float64
|
283
|
+
)
|
280
284
|
shared_mem_size = compiled_specialized.get_shared_mem_per_block()
|
281
285
|
self.assertEqual(shared_mem_size, 800)
|
282
286
|
|
@@ -307,11 +311,12 @@ class TestLinker(CUDATestCase):
|
|
307
311
|
compiled_specialized = compiled.specialize(
|
308
312
|
np.zeros(LMEM_SIZE, dtype=np.int32),
|
309
313
|
np.zeros(LMEM_SIZE, dtype=np.int32),
|
310
|
-
np.float64
|
314
|
+
np.float64,
|
315
|
+
)
|
311
316
|
local_mem_size = compiled_specialized.get_local_mem_per_thread()
|
312
317
|
calc_size = np.dtype(np.float64).itemsize * LMEM_SIZE
|
313
318
|
self.assertGreaterEqual(local_mem_size, calc_size)
|
314
319
|
|
315
320
|
|
316
|
-
if __name__ ==
|
321
|
+
if __name__ == "__main__":
|
317
322
|
unittest.main()
|
@@ -7,11 +7,10 @@ from numba.cuda.testing import skip_on_cudasim, skip_on_arm
|
|
7
7
|
from numba.tests.support import linux_only
|
8
8
|
|
9
9
|
|
10
|
-
@skip_on_cudasim(
|
10
|
+
@skip_on_cudasim("CUDA Driver API unsupported in the simulator")
|
11
11
|
@linux_only
|
12
|
-
@skip_on_arm(
|
12
|
+
@skip_on_arm("Managed Alloc support is experimental/untested on ARM")
|
13
13
|
class TestManagedAlloc(ContextResettingTestCase):
|
14
|
-
|
15
14
|
def get_total_gpu_memory(self):
|
16
15
|
# We use a driver function to directly get the total GPU memory because
|
17
16
|
# an EMM plugin may report something different (or not implement
|
@@ -48,7 +47,9 @@ class TestManagedAlloc(ContextResettingTestCase):
|
|
48
47
|
def test_managed_alloc_driver_undersubscribe(self):
|
49
48
|
msg = "Managed memory unsupported prior to CC 3.0"
|
50
49
|
self.skip_if_cc_major_lt(3, msg)
|
51
|
-
|
50
|
+
# We keep the allocation small so that it doesn't hang on GPUs
|
51
|
+
# with large memory (H100)
|
52
|
+
self._test_managed_alloc_driver(0.1)
|
52
53
|
|
53
54
|
# This test is skipped by default because it is easy to hang the machine
|
54
55
|
# for a very long time or get OOM killed if the GPU memory size is >50% of
|
@@ -85,7 +86,7 @@ class TestManagedAlloc(ContextResettingTestCase):
|
|
85
86
|
n_elems = n_bytes // dtype.itemsize
|
86
87
|
ary = np.ndarray(shape=n_elems, dtype=dtype, buffer=mem)
|
87
88
|
|
88
|
-
magic =
|
89
|
+
magic = 0xAB
|
89
90
|
device_memset(mem, magic, n_bytes)
|
90
91
|
ctx.synchronize()
|
91
92
|
|
@@ -102,7 +103,7 @@ class TestManagedAlloc(ContextResettingTestCase):
|
|
102
103
|
ary.fill(123.456)
|
103
104
|
self.assertTrue(all(ary == 123.456))
|
104
105
|
|
105
|
-
@cuda.jit(
|
106
|
+
@cuda.jit("void(double[:])")
|
106
107
|
def kernel(x):
|
107
108
|
i = cuda.grid(1)
|
108
109
|
if i < x.shape[0]:
|
@@ -123,5 +124,5 @@ class TestManagedAlloc(ContextResettingTestCase):
|
|
123
124
|
self._test_managed_array(attach_global=False)
|
124
125
|
|
125
126
|
|
126
|
-
if __name__ ==
|
127
|
+
if __name__ == "__main__":
|
127
128
|
unittest.main()
|