numba-cuda 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +232 -113
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_fp16.h +661 -661
- numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
- numba_cuda/numba/cuda/cuda_paths.py +291 -99
- numba_cuda/numba/cuda/cudadecl.py +125 -69
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +463 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +317 -233
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +8 -6
- numba_cuda/numba/cuda/decorators.py +75 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +69 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +1 -1
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
- numba_cuda/numba/cuda/intrinsics.py +31 -27
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +139 -102
- numba_cuda/numba/cuda/target.py +64 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +2 -2
- numba_cuda/numba/cuda/vectorizers.py +37 -32
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
- numba_cuda-0.9.0.dist-info/RECORD +253 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.0.dist-info/RECORD +0 -251
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -7,9 +7,13 @@ import numpy as np
|
|
7
7
|
|
8
8
|
from numba import cuda
|
9
9
|
from numba.cuda.cudadrv import driver
|
10
|
-
from numba.cuda.testing import (
|
11
|
-
|
12
|
-
|
10
|
+
from numba.cuda.testing import (
|
11
|
+
skip_on_arm,
|
12
|
+
skip_on_cudasim,
|
13
|
+
skip_under_cuda_memcheck,
|
14
|
+
ContextResettingTestCase,
|
15
|
+
ForeignArray,
|
16
|
+
)
|
13
17
|
from numba.tests.support import linux_only, windows_only
|
14
18
|
import unittest
|
15
19
|
|
@@ -32,8 +36,9 @@ def core_ipc_handle_test(the_work, result_queue):
|
|
32
36
|
def base_ipc_handle_test(handle, size, result_queue):
|
33
37
|
def the_work():
|
34
38
|
dtype = np.dtype(np.intp)
|
35
|
-
with cuda.open_ipc_array(
|
36
|
-
|
39
|
+
with cuda.open_ipc_array(
|
40
|
+
handle, shape=size // dtype.itemsize, dtype=dtype
|
41
|
+
) as darr:
|
37
42
|
# copy the data to host
|
38
43
|
return darr.copy_to_host()
|
39
44
|
|
@@ -43,9 +48,11 @@ def base_ipc_handle_test(handle, size, result_queue):
|
|
43
48
|
def serialize_ipc_handle_test(handle, result_queue):
|
44
49
|
def the_work():
|
45
50
|
dtype = np.dtype(np.intp)
|
46
|
-
darr = handle.open_array(
|
47
|
-
|
48
|
-
|
51
|
+
darr = handle.open_array(
|
52
|
+
cuda.current_context(),
|
53
|
+
shape=handle.size // dtype.itemsize,
|
54
|
+
dtype=dtype,
|
55
|
+
)
|
49
56
|
# copy the data to host
|
50
57
|
arr = darr.copy_to_host()
|
51
58
|
handle.close()
|
@@ -63,10 +70,10 @@ def ipc_array_test(ipcarr, result_queue):
|
|
63
70
|
with ipcarr:
|
64
71
|
pass
|
65
72
|
except ValueError as e:
|
66
|
-
if str(e) !=
|
67
|
-
raise AssertionError(
|
73
|
+
if str(e) != "IpcHandle is already opened":
|
74
|
+
raise AssertionError("invalid exception message")
|
68
75
|
else:
|
69
|
-
raise AssertionError(
|
76
|
+
raise AssertionError("did not raise on reopen")
|
70
77
|
# Catch any exception so we can propagate it
|
71
78
|
except: # noqa: E722
|
72
79
|
# FAILED. propagate the exception as a string
|
@@ -80,11 +87,10 @@ def ipc_array_test(ipcarr, result_queue):
|
|
80
87
|
|
81
88
|
|
82
89
|
@linux_only
|
83
|
-
@skip_under_cuda_memcheck(
|
84
|
-
@skip_on_cudasim(
|
85
|
-
@skip_on_arm(
|
90
|
+
@skip_under_cuda_memcheck("Hangs cuda-memcheck")
|
91
|
+
@skip_on_cudasim("Ipc not available in CUDASIM")
|
92
|
+
@skip_on_arm("CUDA IPC not supported on ARM in Numba")
|
86
93
|
class TestIpcMemory(ContextResettingTestCase):
|
87
|
-
|
88
94
|
def test_ipc_handle(self):
|
89
95
|
# prepare data for IPC
|
90
96
|
arr = np.arange(10, dtype=np.intp)
|
@@ -102,7 +108,7 @@ class TestIpcMemory(ContextResettingTestCase):
|
|
102
108
|
size = ipch.size
|
103
109
|
|
104
110
|
# spawn new process for testing
|
105
|
-
ctx = mp.get_context(
|
111
|
+
ctx = mp.get_context("spawn")
|
106
112
|
result_queue = ctx.Queue()
|
107
113
|
args = (handle_bytes, size, result_queue)
|
108
114
|
proc = ctx.Process(target=base_ipc_handle_test, args=args)
|
@@ -145,11 +151,12 @@ class TestIpcMemory(ContextResettingTestCase):
|
|
145
151
|
if driver.USE_NV_BINDING:
|
146
152
|
self.assertEqual(ipch_recon.handle.reserved, ipch.handle.reserved)
|
147
153
|
else:
|
148
|
-
self.assertEqual(
|
149
|
-
|
154
|
+
self.assertEqual(
|
155
|
+
ipch_recon.handle.reserved[:], ipch.handle.reserved[:]
|
156
|
+
)
|
150
157
|
|
151
158
|
# spawn new process for testing
|
152
|
-
ctx = mp.get_context(
|
159
|
+
ctx = mp.get_context("spawn")
|
153
160
|
result_queue = ctx.Queue()
|
154
161
|
args = (ipch, result_queue)
|
155
162
|
proc = ctx.Process(target=serialize_ipc_handle_test, args=args)
|
@@ -162,7 +169,10 @@ class TestIpcMemory(ContextResettingTestCase):
|
|
162
169
|
proc.join(3)
|
163
170
|
|
164
171
|
def test_ipc_handle_serialization(self):
|
165
|
-
for
|
172
|
+
for (
|
173
|
+
index,
|
174
|
+
foreign,
|
175
|
+
) in self.variants():
|
166
176
|
with self.subTest(index=index, foreign=foreign):
|
167
177
|
self.check_ipc_handle_serialization(index, foreign)
|
168
178
|
|
@@ -179,7 +189,7 @@ class TestIpcMemory(ContextResettingTestCase):
|
|
179
189
|
ipch = devarr.get_ipc_handle()
|
180
190
|
|
181
191
|
# spawn new process for testing
|
182
|
-
ctx = mp.get_context(
|
192
|
+
ctx = mp.get_context("spawn")
|
183
193
|
result_queue = ctx.Queue()
|
184
194
|
args = (ipch, result_queue)
|
185
195
|
proc = ctx.Process(target=ipc_array_test, args=args)
|
@@ -192,7 +202,10 @@ class TestIpcMemory(ContextResettingTestCase):
|
|
192
202
|
proc.join(3)
|
193
203
|
|
194
204
|
def test_ipc_array(self):
|
195
|
-
for
|
205
|
+
for (
|
206
|
+
index,
|
207
|
+
foreign,
|
208
|
+
) in self.variants():
|
196
209
|
with self.subTest(index=index, foreign=foreign):
|
197
210
|
self.check_ipc_array(index, foreign)
|
198
211
|
|
@@ -205,7 +218,9 @@ def staged_ipc_handle_test(handle, device_num, result_queue):
|
|
205
218
|
arrsize = handle.size // np.dtype(np.intp).itemsize
|
206
219
|
hostarray = np.zeros(arrsize, dtype=np.intp)
|
207
220
|
cuda.driver.device_to_host(
|
208
|
-
hostarray,
|
221
|
+
hostarray,
|
222
|
+
deviceptr,
|
223
|
+
size=handle.size,
|
209
224
|
)
|
210
225
|
handle.close()
|
211
226
|
return hostarray
|
@@ -223,10 +238,10 @@ def staged_ipc_array_test(ipcarr, device_num, result_queue):
|
|
223
238
|
with ipcarr:
|
224
239
|
pass
|
225
240
|
except ValueError as e:
|
226
|
-
if str(e) !=
|
227
|
-
raise AssertionError(
|
241
|
+
if str(e) != "IpcHandle is already opened":
|
242
|
+
raise AssertionError("invalid exception message")
|
228
243
|
else:
|
229
|
-
raise AssertionError(
|
244
|
+
raise AssertionError("did not raise on reopen")
|
230
245
|
# Catch any exception so we can propagate it
|
231
246
|
except: # noqa: E722
|
232
247
|
# FAILED. propagate the exception as a string
|
@@ -240,9 +255,9 @@ def staged_ipc_array_test(ipcarr, device_num, result_queue):
|
|
240
255
|
|
241
256
|
|
242
257
|
@linux_only
|
243
|
-
@skip_under_cuda_memcheck(
|
244
|
-
@skip_on_cudasim(
|
245
|
-
@skip_on_arm(
|
258
|
+
@skip_under_cuda_memcheck("Hangs cuda-memcheck")
|
259
|
+
@skip_on_cudasim("Ipc not available in CUDASIM")
|
260
|
+
@skip_on_arm("CUDA IPC not supported on ARM in Numba")
|
246
261
|
class TestIpcStaged(ContextResettingTestCase):
|
247
262
|
def test_staged(self):
|
248
263
|
# prepare data for IPC
|
@@ -250,7 +265,7 @@ class TestIpcStaged(ContextResettingTestCase):
|
|
250
265
|
devarr = cuda.to_device(arr)
|
251
266
|
|
252
267
|
# spawn new process for testing
|
253
|
-
mpctx = mp.get_context(
|
268
|
+
mpctx = mp.get_context("spawn")
|
254
269
|
result_queue = mpctx.Queue()
|
255
270
|
|
256
271
|
# create IPC handle
|
@@ -264,8 +279,7 @@ class TestIpcStaged(ContextResettingTestCase):
|
|
264
279
|
self.assertEqual(ipch_recon.handle.reserved, ipch.handle.reserved)
|
265
280
|
else:
|
266
281
|
self.assertEqual(
|
267
|
-
ipch_recon.handle.reserved[:],
|
268
|
-
ipch.handle.reserved[:]
|
282
|
+
ipch_recon.handle.reserved[:], ipch.handle.reserved[:]
|
269
283
|
)
|
270
284
|
self.assertEqual(ipch_recon.size, ipch.size)
|
271
285
|
|
@@ -289,7 +303,7 @@ class TestIpcStaged(ContextResettingTestCase):
|
|
289
303
|
ipch = devarr.get_ipc_handle()
|
290
304
|
|
291
305
|
# spawn new process for testing
|
292
|
-
ctx = mp.get_context(
|
306
|
+
ctx = mp.get_context("spawn")
|
293
307
|
result_queue = ctx.Queue()
|
294
308
|
args = (ipch, device_num, result_queue)
|
295
309
|
proc = ctx.Process(target=staged_ipc_array_test, args=args)
|
@@ -303,7 +317,7 @@ class TestIpcStaged(ContextResettingTestCase):
|
|
303
317
|
|
304
318
|
|
305
319
|
@windows_only
|
306
|
-
@skip_on_cudasim(
|
320
|
+
@skip_on_cudasim("Ipc not available in CUDASIM")
|
307
321
|
class TestIpcNotSupported(ContextResettingTestCase):
|
308
322
|
def test_unsupported(self):
|
309
323
|
arr = np.arange(10, dtype=np.intp)
|
@@ -311,8 +325,8 @@ class TestIpcNotSupported(ContextResettingTestCase):
|
|
311
325
|
with self.assertRaises(OSError) as raises:
|
312
326
|
devarr.get_ipc_handle()
|
313
327
|
errmsg = str(raises.exception)
|
314
|
-
self.assertIn(
|
328
|
+
self.assertIn("OS does not support CUDA IPC", errmsg)
|
315
329
|
|
316
330
|
|
317
|
-
if __name__ ==
|
331
|
+
if __name__ == "__main__":
|
318
332
|
unittest.main()
|
@@ -5,7 +5,6 @@ import numpy as np
|
|
5
5
|
|
6
6
|
|
7
7
|
class TestIterators(CUDATestCase):
|
8
|
-
|
9
8
|
def test_enumerate(self):
|
10
9
|
@cuda.jit
|
11
10
|
def enumerator(x, error):
|
@@ -95,5 +94,5 @@ class TestIterators(CUDATestCase):
|
|
95
94
|
self._test_twoarg_function(zipper_enumerator)
|
96
95
|
|
97
96
|
|
98
|
-
if __name__ ==
|
97
|
+
if __name__ == "__main__":
|
99
98
|
unittest.main()
|
@@ -10,7 +10,7 @@ from numba.cuda.testing import unittest, CUDATestCase
|
|
10
10
|
|
11
11
|
class TestLang(CUDATestCase):
|
12
12
|
def test_enumerate(self):
|
13
|
-
tup = (1
|
13
|
+
tup = (1.0, 2.5, 3.0)
|
14
14
|
|
15
15
|
@cuda.jit("void(float64[:])")
|
16
16
|
def foo(a):
|
@@ -39,12 +39,12 @@ class TestLang(CUDATestCase):
|
|
39
39
|
self.assertTrue(np.all(a == (b + c).sum()))
|
40
40
|
|
41
41
|
def test_issue_872(self):
|
42
|
-
|
42
|
+
"""
|
43
43
|
Ensure that typing and lowering of CUDA kernel API primitives works in
|
44
44
|
more than one block. Was originally to ensure that macro expansion works
|
45
45
|
for more than one block (issue #872), but macro expansion has been
|
46
46
|
replaced by a "proper" implementation of all kernel API functions.
|
47
|
-
|
47
|
+
"""
|
48
48
|
|
49
49
|
@cuda.jit("void(float64[:,:])")
|
50
50
|
def cuda_kernel_api_in_multiple_blocks(ary):
|
@@ -60,5 +60,5 @@ class TestLang(CUDATestCase):
|
|
60
60
|
cuda_kernel_api_in_multiple_blocks[1, (2, 3)](a)
|
61
61
|
|
62
62
|
|
63
|
-
if __name__ ==
|
63
|
+
if __name__ == "__main__":
|
64
64
|
unittest.main()
|
@@ -14,7 +14,6 @@ SM_SIZE = tpb, tpb
|
|
14
14
|
|
15
15
|
class TestCudaLaplace(CUDATestCase):
|
16
16
|
def test_laplace_small(self):
|
17
|
-
|
18
17
|
@cuda.jit(float64(float64, float64), device=True, inline=True)
|
19
18
|
def get_max(a, b):
|
20
19
|
if a > b:
|
@@ -38,8 +37,9 @@ class TestCudaLaplace(CUDATestCase):
|
|
38
37
|
|
39
38
|
err_sm[ty, tx] = 0
|
40
39
|
if j >= 1 and j < n - 1 and i >= 1 and i < m - 1:
|
41
|
-
Anew[j, i] = 0.25 * (
|
42
|
-
|
40
|
+
Anew[j, i] = 0.25 * (
|
41
|
+
A[j, i + 1] + A[j, i - 1] + A[j - 1, i] + A[j + 1, i]
|
42
|
+
)
|
43
43
|
err_sm[ty, tx] = Anew[j, i] - A[j, i]
|
44
44
|
|
45
45
|
cuda.syncthreads()
|
@@ -91,8 +91,8 @@ class TestCudaLaplace(CUDATestCase):
|
|
91
91
|
|
92
92
|
stream = cuda.stream()
|
93
93
|
|
94
|
-
dA = cuda.to_device(A, stream)
|
95
|
-
dAnew = cuda.to_device(Anew, stream)
|
94
|
+
dA = cuda.to_device(A, stream) # to device and don't come back
|
95
|
+
dAnew = cuda.to_device(Anew, stream) # to device and don't come back
|
96
96
|
derror_grid = cuda.to_device(error_grid, stream)
|
97
97
|
|
98
98
|
while error > tol and iter < iter_max:
|
@@ -115,5 +115,5 @@ class TestCudaLaplace(CUDATestCase):
|
|
115
115
|
iter += 1
|
116
116
|
|
117
117
|
|
118
|
-
if __name__ ==
|
118
|
+
if __name__ == "__main__":
|
119
119
|
unittest.main()
|
@@ -31,7 +31,7 @@ def use_sad(r, x, y, z):
|
|
31
31
|
r[i] = libdevice.sad(x[i], y[i], z[i])
|
32
32
|
|
33
33
|
|
34
|
-
@skip_on_cudasim(
|
34
|
+
@skip_on_cudasim("Libdevice functions are not supported on cudasim")
|
35
35
|
class TestLibdevice(CUDATestCase):
|
36
36
|
"""
|
37
37
|
Some tests of libdevice function wrappers that check the returned values.
|
@@ -102,14 +102,15 @@ def make_test_call(libname):
|
|
102
102
|
def _test_call_functions(self):
|
103
103
|
# Strip off '__nv_' from libdevice name to get Python name
|
104
104
|
apiname = libname[5:]
|
105
|
-
apifunc = getattr(libdevice, apiname)
|
105
|
+
apifunc = getattr(libdevice, apiname) # noqa: F841
|
106
106
|
retty, args = functions[libname]
|
107
107
|
sig = create_signature(retty, args)
|
108
108
|
|
109
109
|
# Construct arguments to the libdevice function. These are all
|
110
110
|
# non-pointer arguments to the underlying bitcode function.
|
111
|
-
funcargs = ", ".join(
|
112
|
-
|
111
|
+
funcargs = ", ".join(
|
112
|
+
["a%d" % i for i, arg in enumerate(args) if not arg.is_ptr]
|
113
|
+
)
|
113
114
|
|
114
115
|
# Arguments to the Python function (`pyfunc` in the template above) are
|
115
116
|
# the arguments to the libdevice function, plus as many extra arguments
|
@@ -118,35 +119,37 @@ def make_test_call(libname):
|
|
118
119
|
# returns.
|
119
120
|
if isinstance(sig.return_type, (types.Tuple, types.UniTuple)):
|
120
121
|
# Start with the parameters for the return values
|
121
|
-
pyargs = ", ".join([
|
122
|
-
range(len(sig.return_type))])
|
122
|
+
pyargs = ", ".join(["r%d" % i for i in range(len(sig.return_type))])
|
123
123
|
# Add the parameters for the argument values
|
124
124
|
pyargs += ", " + funcargs
|
125
125
|
# Generate the unpacking of the return value from the libdevice
|
126
126
|
# function into the Python function return values (`r0`, `r1`,
|
127
127
|
# etc.).
|
128
|
-
retvars = ", ".join(
|
129
|
-
|
128
|
+
retvars = ", ".join(
|
129
|
+
["r%d[0]" % i for i in range(len(sig.return_type))]
|
130
|
+
)
|
130
131
|
else:
|
131
132
|
# Scalar return is a more straightforward case
|
132
133
|
pyargs = "r0, " + funcargs
|
133
134
|
retvars = "r0[0]"
|
134
135
|
|
135
136
|
# Create the string containing the function to compile
|
136
|
-
d = {
|
137
|
-
|
138
|
-
|
139
|
-
|
137
|
+
d = {
|
138
|
+
"func": apiname,
|
139
|
+
"pyargs": pyargs,
|
140
|
+
"funcargs": funcargs,
|
141
|
+
"retvars": retvars,
|
142
|
+
}
|
140
143
|
code = function_template % d
|
141
144
|
|
142
145
|
# Convert the string to a Python function
|
143
146
|
locals = {}
|
144
147
|
exec(code, globals(), locals)
|
145
|
-
pyfunc = locals[
|
148
|
+
pyfunc = locals["pyfunc"]
|
146
149
|
|
147
150
|
# Compute the signature for compilation. This mirrors the creation of
|
148
151
|
# arguments to the Python function above.
|
149
|
-
pyargs = [
|
152
|
+
pyargs = [arg.ty for arg in args if not arg.is_ptr]
|
150
153
|
if isinstance(sig.return_type, (types.Tuple, types.UniTuple)):
|
151
154
|
pyreturns = [ret[::1] for ret in sig.return_type]
|
152
155
|
pyargs = pyreturns + pyargs
|
@@ -159,16 +162,16 @@ def make_test_call(libname):
|
|
159
162
|
# If the function body was discarded by optimization (therefore making
|
160
163
|
# the test a bit weak), there won't be any loading of parameters -
|
161
164
|
# ensure that a load from parameters occurs somewhere in the PTX
|
162
|
-
self.assertIn(
|
165
|
+
self.assertIn("ld.param", ptx)
|
163
166
|
|
164
167
|
# Returning the result (through a passed-in array) should also require
|
165
168
|
# a store to global memory, so check for at least one of those too.
|
166
|
-
self.assertIn(
|
169
|
+
self.assertIn("st.global", ptx)
|
167
170
|
|
168
171
|
return _test_call_functions
|
169
172
|
|
170
173
|
|
171
|
-
@skip_on_cudasim(
|
174
|
+
@skip_on_cudasim("Compilation to PTX is not supported on cudasim")
|
172
175
|
class TestLibdeviceCompilation(unittest.TestCase):
|
173
176
|
"""
|
174
177
|
Class for holding all tests of compiling calls to libdevice functions. We
|
@@ -179,9 +182,10 @@ class TestLibdeviceCompilation(unittest.TestCase):
|
|
179
182
|
|
180
183
|
|
181
184
|
for libname in functions:
|
182
|
-
setattr(
|
183
|
-
|
185
|
+
setattr(
|
186
|
+
TestLibdeviceCompilation, "test_%s" % libname, make_test_call(libname)
|
187
|
+
)
|
184
188
|
|
185
189
|
|
186
|
-
if __name__ ==
|
190
|
+
if __name__ == "__main__":
|
187
191
|
unittest.main()
|
@@ -7,16 +7,16 @@ import unittest
|
|
7
7
|
import warnings
|
8
8
|
|
9
9
|
|
10
|
-
@skip_on_cudasim(
|
10
|
+
@skip_on_cudasim("Simulator does not produce lineinfo")
|
11
11
|
class TestCudaLineInfo(CUDATestCase):
|
12
12
|
def _loc_directive_regex(self):
|
13
13
|
# This is used in several tests
|
14
14
|
|
15
15
|
pat = (
|
16
|
-
r
|
17
|
-
r
|
18
|
-
r
|
19
|
-
r
|
16
|
+
r"\.loc" # .loc directive beginning
|
17
|
+
r"\s+[0-9]+" # whitespace then file index
|
18
|
+
r"\s+[0-9]+" # whitespace then line number
|
19
|
+
r"\s+[0-9]+" # whitespace then column position
|
20
20
|
)
|
21
21
|
return re.compile(pat)
|
22
22
|
|
@@ -29,21 +29,21 @@ class TestCudaLineInfo(CUDATestCase):
|
|
29
29
|
# DICompileUnit debug info metadata should all be of the
|
30
30
|
# DebugDirectivesOnly kind, and not the FullDebug kind
|
31
31
|
pat = (
|
32
|
-
r
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
r
|
37
|
-
|
38
|
-
r
|
32
|
+
r"!DICompileUnit\(.*" # Opening of DICompileUnit metadata. Since
|
33
|
+
# the order of attributes is not
|
34
|
+
# guaranteed, we need to match arbitrarily
|
35
|
+
# afterwards.
|
36
|
+
r"emissionKind:\s+" # The emissionKind attribute followed by
|
37
|
+
# whitespace.
|
38
|
+
r"DebugDirectivesOnly" # The correct emissionKind.
|
39
39
|
)
|
40
40
|
match = re.compile(pat).search(llvm)
|
41
41
|
assertfn(match, msg=ptx)
|
42
42
|
|
43
43
|
pat = (
|
44
|
-
r
|
45
|
-
r
|
46
|
-
r
|
44
|
+
r"!DICompileUnit\(.*" # Same as the pattern above, but for the
|
45
|
+
r"emissionKind:\s+" # incorrect FullDebug emissionKind.
|
46
|
+
r"FullDebug" #
|
47
47
|
)
|
48
48
|
match = re.compile(pat).search(llvm)
|
49
49
|
self.assertIsNone(match, msg=ptx)
|
@@ -51,8 +51,8 @@ class TestCudaLineInfo(CUDATestCase):
|
|
51
51
|
# The name of this file should be present in the line mapping
|
52
52
|
# if lineinfo was propagated through correctly.
|
53
53
|
pat = (
|
54
|
-
r
|
55
|
-
r
|
54
|
+
r"\.file" # .file directive beginning
|
55
|
+
r"\s+[0-9]+\s+" # file number surrounded by whitespace
|
56
56
|
r'".*test_lineinfo.py"' # filename in quotes, ignoring full path
|
57
57
|
)
|
58
58
|
match = re.compile(pat).search(ptx)
|
@@ -65,8 +65,8 @@ class TestCudaLineInfo(CUDATestCase):
|
|
65
65
|
# Debug info sections should not be present when only lineinfo is
|
66
66
|
# generated
|
67
67
|
pat = (
|
68
|
-
r
|
69
|
-
r
|
68
|
+
r"\.section\s+" # .section directive beginning
|
69
|
+
r"\.debug_info" # Section named ".debug_info"
|
70
70
|
)
|
71
71
|
match = re.compile(pat).search(ptx)
|
72
72
|
self.assertIsNone(match, msg=ptx)
|
@@ -98,7 +98,7 @@ class TestCudaLineInfo(CUDATestCase):
|
|
98
98
|
# signal an exception (e.g. divide by zero) has occurred. When the
|
99
99
|
# error model is the default NumPy one (as it should be when only
|
100
100
|
# lineinfo is enabled) the device function always returns 0.
|
101
|
-
self.assertNotIn(
|
101
|
+
self.assertNotIn("ret i32 1", llvm)
|
102
102
|
|
103
103
|
def test_no_lineinfo_in_device_function(self):
|
104
104
|
# Ensure that no lineinfo is generated in device functions by default.
|
@@ -138,7 +138,7 @@ class TestCudaLineInfo(CUDATestCase):
|
|
138
138
|
# Check that there is no device function in the PTX
|
139
139
|
|
140
140
|
# A line beginning with ".weak .func" that identifies a device function
|
141
|
-
devfn_start = re.compile(r
|
141
|
+
devfn_start = re.compile(r"^\.weak\s+\.func")
|
142
142
|
|
143
143
|
for line in ptxlines:
|
144
144
|
if devfn_start.match(line) is not None:
|
@@ -151,13 +151,14 @@ class TestCudaLineInfo(CUDATestCase):
|
|
151
151
|
|
152
152
|
for line in ptxlines:
|
153
153
|
if loc_directive.search(line) is not None:
|
154
|
-
if
|
154
|
+
if "inlined_at" in line:
|
155
155
|
found = True
|
156
156
|
break
|
157
157
|
|
158
158
|
if not found:
|
159
|
-
self.fail(
|
160
|
-
|
159
|
+
self.fail(
|
160
|
+
f"No .loc directive with inlined_at info foundin:\n\n{ptx}"
|
161
|
+
)
|
161
162
|
|
162
163
|
# We also inspect the LLVM to ensure that there's debug info for each
|
163
164
|
# subprogram (function). A lightweight way to check this is to ensure
|
@@ -166,7 +167,7 @@ class TestCudaLineInfo(CUDATestCase):
|
|
166
167
|
llvm = caller.inspect_llvm(sig)
|
167
168
|
subprograms = 0
|
168
169
|
for line in llvm.splitlines():
|
169
|
-
if
|
170
|
+
if "distinct !DISubprogram" in line:
|
170
171
|
subprograms += 1
|
171
172
|
|
172
173
|
# One DISubprogram for each of:
|
@@ -174,9 +175,12 @@ class TestCudaLineInfo(CUDATestCase):
|
|
174
175
|
# - The callee
|
175
176
|
expected_subprograms = 2
|
176
177
|
|
177
|
-
self.assertEqual(
|
178
|
-
|
179
|
-
|
178
|
+
self.assertEqual(
|
179
|
+
subprograms,
|
180
|
+
expected_subprograms,
|
181
|
+
f'"Expected {expected_subprograms} DISubprograms; '
|
182
|
+
f"got {subprograms}",
|
183
|
+
)
|
180
184
|
|
181
185
|
def test_debug_and_lineinfo_warning(self):
|
182
186
|
with warnings.catch_warnings(record=True) as w:
|
@@ -190,9 +194,10 @@ class TestCudaLineInfo(CUDATestCase):
|
|
190
194
|
|
191
195
|
self.assertEqual(len(w), 1)
|
192
196
|
self.assertEqual(w[0].category, NumbaInvalidConfigWarning)
|
193
|
-
self.assertIn(
|
194
|
-
|
197
|
+
self.assertIn(
|
198
|
+
"debug and lineinfo are mutually exclusive", str(w[0].message)
|
199
|
+
)
|
195
200
|
|
196
201
|
|
197
|
-
if __name__ ==
|
202
|
+
if __name__ == "__main__":
|
198
203
|
unittest.main()
|
@@ -31,32 +31,31 @@ def culocal1tuple(A, B):
|
|
31
31
|
B[i] = C[i]
|
32
32
|
|
33
33
|
|
34
|
-
@skip_on_cudasim(
|
34
|
+
@skip_on_cudasim("PTX inspection not available in cudasim")
|
35
35
|
class TestCudaLocalMem(CUDATestCase):
|
36
36
|
def test_local_array(self):
|
37
37
|
sig = (int32[:], int32[:])
|
38
38
|
jculocal = cuda.jit(sig)(culocal)
|
39
|
-
self.assertTrue(
|
40
|
-
A = np.arange(1000, dtype=
|
39
|
+
self.assertTrue(".local" in jculocal.inspect_asm(sig))
|
40
|
+
A = np.arange(1000, dtype="int32")
|
41
41
|
B = np.zeros_like(A)
|
42
42
|
jculocal[1, 1](A, B)
|
43
43
|
self.assertTrue(np.all(A == B))
|
44
44
|
|
45
45
|
def test_local_array_1_tuple(self):
|
46
|
-
"""Ensure that local arrays can be constructed with 1-tuple shape
|
47
|
-
""
|
48
|
-
jculocal = cuda.jit('void(int32[:], int32[:])')(culocal1tuple)
|
46
|
+
"""Ensure that local arrays can be constructed with 1-tuple shape"""
|
47
|
+
jculocal = cuda.jit("void(int32[:], int32[:])")(culocal1tuple)
|
49
48
|
# Don't check if .local is in the ptx because the optimizer
|
50
49
|
# may reduce it to registers.
|
51
|
-
A = np.arange(5, dtype=
|
50
|
+
A = np.arange(5, dtype="int32")
|
52
51
|
B = np.zeros_like(A)
|
53
52
|
jculocal[1, 1](A, B)
|
54
53
|
self.assertTrue(np.all(A == B))
|
55
54
|
|
56
55
|
def test_local_array_complex(self):
|
57
|
-
sig =
|
56
|
+
sig = "void(complex128[:], complex128[:])"
|
58
57
|
jculocalcomplex = cuda.jit(sig)(culocalcomplex)
|
59
|
-
A = (np.arange(100, dtype=
|
58
|
+
A = (np.arange(100, dtype="complex128") - 1) / 2j
|
60
59
|
B = np.zeros_like(A)
|
61
60
|
jculocalcomplex[1, 1](A, B)
|
62
61
|
self.assertTrue(np.all(A == B))
|
@@ -64,7 +63,7 @@ class TestCudaLocalMem(CUDATestCase):
|
|
64
63
|
def check_dtype(self, f, dtype):
|
65
64
|
# Find the typing of the dtype argument to cuda.local.array
|
66
65
|
annotation = next(iter(f.overloads.values()))._type_annotation
|
67
|
-
l_dtype = annotation.typemap[
|
66
|
+
l_dtype = annotation.typemap["l"].dtype
|
68
67
|
# Ensure that the typing is correct
|
69
68
|
self.assertEqual(l_dtype, dtype)
|
70
69
|
|
@@ -95,7 +94,7 @@ class TestCudaLocalMem(CUDATestCase):
|
|
95
94
|
# Check that strings can be used to specify the dtype of a local array
|
96
95
|
@cuda.jit(void(int32[::1]))
|
97
96
|
def f(x):
|
98
|
-
l = cuda.local.array(10, dtype=
|
97
|
+
l = cuda.local.array(10, dtype="int32")
|
99
98
|
l[0] = x[0]
|
100
99
|
x[0] = l[0]
|
101
100
|
|
@@ -106,9 +105,10 @@ class TestCudaLocalMem(CUDATestCase):
|
|
106
105
|
# Check that strings of invalid dtypes cause a typing error
|
107
106
|
re = ".*Invalid NumPy dtype specified: 'int33'.*"
|
108
107
|
with self.assertRaisesRegex(TypingError, re):
|
108
|
+
|
109
109
|
@cuda.jit(void(int32[::1]))
|
110
110
|
def f(x):
|
111
|
-
l = cuda.local.array(10, dtype=
|
111
|
+
l = cuda.local.array(10, dtype="int33")
|
112
112
|
l[0] = x[0]
|
113
113
|
x[0] = l[0]
|
114
114
|
|
@@ -160,5 +160,5 @@ class TestCudaLocalMem(CUDATestCase):
|
|
160
160
|
self._check_local_array_size_fp16(2, 2, np.float16)
|
161
161
|
|
162
162
|
|
163
|
-
if __name__ ==
|
163
|
+
if __name__ == "__main__":
|
164
164
|
unittest.main()
|
@@ -3,11 +3,10 @@ from numba.cuda.compiler import compile_ptx
|
|
3
3
|
from numba.cuda.testing import skip_on_cudasim, unittest
|
4
4
|
|
5
5
|
|
6
|
-
@skip_on_cudasim(
|
6
|
+
@skip_on_cudasim("Compilation unsupported in the simulator")
|
7
7
|
class TestCudaMandel(unittest.TestCase):
|
8
8
|
def test_mandel(self):
|
9
|
-
"""Just make sure we can compile this
|
10
|
-
"""
|
9
|
+
"""Just make sure we can compile this"""
|
11
10
|
|
12
11
|
def mandel(tid, min_x, max_x, min_y, max_y, width, height, iters):
|
13
12
|
pixel_size_x = (max_x - min_x) / width
|
@@ -28,10 +27,18 @@ class TestCudaMandel(unittest.TestCase):
|
|
28
27
|
return i
|
29
28
|
return iters
|
30
29
|
|
31
|
-
args = (
|
32
|
-
|
30
|
+
args = (
|
31
|
+
uint32,
|
32
|
+
float64,
|
33
|
+
float64,
|
34
|
+
float64,
|
35
|
+
float64,
|
36
|
+
uint32,
|
37
|
+
uint32,
|
38
|
+
uint32,
|
39
|
+
)
|
33
40
|
compile_ptx(mandel, args, device=True)
|
34
41
|
|
35
42
|
|
36
|
-
if __name__ ==
|
43
|
+
if __name__ == "__main__":
|
37
44
|
unittest.main()
|