numba-cuda 0.8.1__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +232 -113
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_fp16.h +661 -661
- numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
- numba_cuda/numba/cuda/cuda_paths.py +291 -99
- numba_cuda/numba/cuda/cudadecl.py +125 -69
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +317 -233
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +8 -6
- numba_cuda/numba/cuda/decorators.py +75 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +69 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +1 -1
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
- numba_cuda/numba/cuda/intrinsics.py +31 -27
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +139 -102
- numba_cuda/numba/cuda/target.py +64 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +2 -2
- numba_cuda/numba/cuda/vectorizers.py +37 -32
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
- numba_cuda-0.9.0.dist-info/RECORD +253 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.1.dist-info/RECORD +0 -251
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -9,12 +9,13 @@ from numba.tests.support import linux_only, override_config
|
|
9
9
|
from unittest.mock import call, patch
|
10
10
|
|
11
11
|
|
12
|
-
@skip_on_cudasim(
|
12
|
+
@skip_on_cudasim("CUDA Array Interface is not supported in the simulator")
|
13
13
|
class TestCudaArrayInterface(ContextResettingTestCase):
|
14
14
|
def assertPointersEqual(self, a, b):
|
15
15
|
if driver.USE_NV_BINDING:
|
16
|
-
self.assertEqual(
|
17
|
-
|
16
|
+
self.assertEqual(
|
17
|
+
int(a.device_ctypes_pointer), int(b.device_ctypes_pointer)
|
18
|
+
)
|
18
19
|
|
19
20
|
def test_as_cuda_array(self):
|
20
21
|
h_arr = np.arange(10)
|
@@ -37,7 +38,7 @@ class TestCudaArrayInterface(ContextResettingTestCase):
|
|
37
38
|
else:
|
38
39
|
return stream.handle.value
|
39
40
|
|
40
|
-
@skip_if_external_memmgr(
|
41
|
+
@skip_if_external_memmgr("Ownership not relevant with external memmgr")
|
41
42
|
def test_ownership(self):
|
42
43
|
# Get the deallocation queue
|
43
44
|
ctx = cuda.current_context()
|
@@ -82,7 +83,7 @@ class TestCudaArrayInterface(ContextResettingTestCase):
|
|
82
83
|
np.testing.assert_array_equal(d_arr.copy_to_host(), h_arr + val)
|
83
84
|
|
84
85
|
def test_ufunc_arg(self):
|
85
|
-
@vectorize([
|
86
|
+
@vectorize(["f8(f8, f8)"], target="cuda")
|
86
87
|
def vadd(a, b):
|
87
88
|
return a + b
|
88
89
|
|
@@ -99,7 +100,7 @@ class TestCudaArrayInterface(ContextResettingTestCase):
|
|
99
100
|
np.testing.assert_array_equal(returned.copy_to_host(), h_arr + val)
|
100
101
|
|
101
102
|
def test_gufunc_arg(self):
|
102
|
-
@guvectorize([
|
103
|
+
@guvectorize(["(f8, f8, f8[:])"], "(),()->()", target="cuda")
|
103
104
|
def vadd(inp, val, out):
|
104
105
|
out[0] = inp + val
|
105
106
|
|
@@ -118,8 +119,8 @@ class TestCudaArrayInterface(ContextResettingTestCase):
|
|
118
119
|
|
119
120
|
def test_array_views(self):
|
120
121
|
"""Views created via array interface support:
|
121
|
-
|
122
|
-
|
122
|
+
- Strided slices
|
123
|
+
- Strided slices
|
123
124
|
"""
|
124
125
|
h_arr = np.random.random(10)
|
125
126
|
c_arr = cuda.to_device(h_arr)
|
@@ -148,23 +149,22 @@ class TestCudaArrayInterface(ContextResettingTestCase):
|
|
148
149
|
self.assertEqual(arr[::2].strides, arr_strided.strides)
|
149
150
|
self.assertEqual(arr[::2].dtype.itemsize, arr_strided.dtype.itemsize)
|
150
151
|
self.assertEqual(arr[::2].alloc_size, arr_strided.alloc_size)
|
151
|
-
self.assertEqual(
|
152
|
-
|
152
|
+
self.assertEqual(
|
153
|
+
arr[::2].nbytes, arr_strided.size * arr_strided.dtype.itemsize
|
154
|
+
)
|
153
155
|
|
154
156
|
# __setitem__ interface propagates into external array
|
155
157
|
|
156
158
|
# Writes to a slice
|
157
159
|
arr[:5] = np.pi
|
158
160
|
np.testing.assert_array_equal(
|
159
|
-
c_arr.copy_to_host(),
|
160
|
-
np.concatenate((np.full(5, np.pi), h_arr[5:]))
|
161
|
+
c_arr.copy_to_host(), np.concatenate((np.full(5, np.pi), h_arr[5:]))
|
161
162
|
)
|
162
163
|
|
163
164
|
# Writes to a slice from a view
|
164
165
|
arr[:5] = arr[5:]
|
165
166
|
np.testing.assert_array_equal(
|
166
|
-
c_arr.copy_to_host(),
|
167
|
-
np.concatenate((h_arr[5:], h_arr[5:]))
|
167
|
+
c_arr.copy_to_host(), np.concatenate((h_arr[5:], h_arr[5:]))
|
168
168
|
)
|
169
169
|
|
170
170
|
# Writes through a view
|
@@ -177,10 +177,7 @@ class TestCudaArrayInterface(ContextResettingTestCase):
|
|
177
177
|
c_arr.copy_to_host()[::2],
|
178
178
|
np.full(5, np.pi),
|
179
179
|
)
|
180
|
-
np.testing.assert_array_equal(
|
181
|
-
c_arr.copy_to_host()[1::2],
|
182
|
-
h_arr[1::2]
|
183
|
-
)
|
180
|
+
np.testing.assert_array_equal(c_arr.copy_to_host()[1::2], h_arr[1::2])
|
184
181
|
|
185
182
|
def test_negative_strided_issue(self):
|
186
183
|
# issue #3705
|
@@ -188,7 +185,7 @@ class TestCudaArrayInterface(ContextResettingTestCase):
|
|
188
185
|
c_arr = cuda.to_device(h_arr)
|
189
186
|
|
190
187
|
def base_offset(orig, sliced):
|
191
|
-
return sliced[
|
188
|
+
return sliced["data"][0] - orig["data"][0]
|
192
189
|
|
193
190
|
h_ai = h_arr.__array_interface__
|
194
191
|
c_ai = c_arr.__cuda_array_interface__
|
@@ -202,8 +199,8 @@ class TestCudaArrayInterface(ContextResettingTestCase):
|
|
202
199
|
base_offset(c_ai, c_ai_sliced),
|
203
200
|
)
|
204
201
|
# Check shape and strides are correct
|
205
|
-
self.assertEqual(h_ai_sliced[
|
206
|
-
self.assertEqual(h_ai_sliced[
|
202
|
+
self.assertEqual(h_ai_sliced["shape"], c_ai_sliced["shape"])
|
203
|
+
self.assertEqual(h_ai_sliced["strides"], c_ai_sliced["strides"])
|
207
204
|
|
208
205
|
def test_negative_strided_copy_to_host(self):
|
209
206
|
# issue #3705
|
@@ -212,28 +209,28 @@ class TestCudaArrayInterface(ContextResettingTestCase):
|
|
212
209
|
sliced = c_arr[::-1]
|
213
210
|
with self.assertRaises(NotImplementedError) as raises:
|
214
211
|
sliced.copy_to_host()
|
215
|
-
expected_msg =
|
212
|
+
expected_msg = "D->H copy not implemented for negative strides"
|
216
213
|
self.assertIn(expected_msg, str(raises.exception))
|
217
214
|
|
218
215
|
def test_masked_array(self):
|
219
216
|
h_arr = np.random.random(10)
|
220
|
-
h_mask = np.random.randint(2, size=10, dtype=
|
217
|
+
h_mask = np.random.randint(2, size=10, dtype="bool")
|
221
218
|
c_arr = cuda.to_device(h_arr)
|
222
219
|
c_mask = cuda.to_device(h_mask)
|
223
220
|
|
224
221
|
# Manually create a masked CUDA Array Interface dictionary
|
225
222
|
masked_cuda_array_interface = c_arr.__cuda_array_interface__.copy()
|
226
|
-
masked_cuda_array_interface[
|
223
|
+
masked_cuda_array_interface["mask"] = c_mask
|
227
224
|
|
228
225
|
with self.assertRaises(NotImplementedError) as raises:
|
229
226
|
cuda.from_cuda_array_interface(masked_cuda_array_interface)
|
230
|
-
expected_msg =
|
227
|
+
expected_msg = "Masked arrays are not supported"
|
231
228
|
self.assertIn(expected_msg, str(raises.exception))
|
232
229
|
|
233
230
|
def test_zero_size_array(self):
|
234
231
|
# for #4175
|
235
232
|
c_arr = cuda.device_array(0)
|
236
|
-
self.assertEqual(c_arr.__cuda_array_interface__[
|
233
|
+
self.assertEqual(c_arr.__cuda_array_interface__["data"][0], 0)
|
237
234
|
|
238
235
|
@cuda.jit
|
239
236
|
def add_one(arr):
|
@@ -249,49 +246,49 @@ class TestCudaArrayInterface(ContextResettingTestCase):
|
|
249
246
|
# for #4175
|
250
247
|
# First, test C-contiguous array
|
251
248
|
c_arr = cuda.device_array((2, 3, 4))
|
252
|
-
self.assertEqual(c_arr.__cuda_array_interface__[
|
249
|
+
self.assertEqual(c_arr.__cuda_array_interface__["strides"], None)
|
253
250
|
|
254
251
|
# Second, test non C-contiguous array
|
255
252
|
c_arr = c_arr[:, 1, :]
|
256
|
-
self.assertNotEqual(c_arr.__cuda_array_interface__[
|
253
|
+
self.assertNotEqual(c_arr.__cuda_array_interface__["strides"], None)
|
257
254
|
|
258
255
|
def test_consuming_strides(self):
|
259
256
|
hostarray = np.arange(10).reshape(2, 5)
|
260
257
|
devarray = cuda.to_device(hostarray)
|
261
258
|
face = devarray.__cuda_array_interface__
|
262
|
-
self.assertIsNone(face[
|
259
|
+
self.assertIsNone(face["strides"])
|
263
260
|
got = cuda.from_cuda_array_interface(face).copy_to_host()
|
264
261
|
np.testing.assert_array_equal(got, hostarray)
|
265
|
-
self.assertTrue(got.flags[
|
262
|
+
self.assertTrue(got.flags["C_CONTIGUOUS"])
|
266
263
|
# Try non-NULL strides
|
267
|
-
face[
|
268
|
-
self.assertIsNotNone(face[
|
264
|
+
face["strides"] = hostarray.strides
|
265
|
+
self.assertIsNotNone(face["strides"])
|
269
266
|
got = cuda.from_cuda_array_interface(face).copy_to_host()
|
270
267
|
np.testing.assert_array_equal(got, hostarray)
|
271
|
-
self.assertTrue(got.flags[
|
268
|
+
self.assertTrue(got.flags["C_CONTIGUOUS"])
|
272
269
|
|
273
270
|
def test_produce_no_stream(self):
|
274
271
|
c_arr = cuda.device_array(10)
|
275
|
-
self.assertIsNone(c_arr.__cuda_array_interface__[
|
272
|
+
self.assertIsNone(c_arr.__cuda_array_interface__["stream"])
|
276
273
|
|
277
274
|
mapped_arr = cuda.mapped_array(10)
|
278
|
-
self.assertIsNone(mapped_arr.__cuda_array_interface__[
|
275
|
+
self.assertIsNone(mapped_arr.__cuda_array_interface__["stream"])
|
279
276
|
|
280
277
|
@linux_only
|
281
278
|
def test_produce_managed_no_stream(self):
|
282
279
|
managed_arr = cuda.managed_array(10)
|
283
|
-
self.assertIsNone(managed_arr.__cuda_array_interface__[
|
280
|
+
self.assertIsNone(managed_arr.__cuda_array_interface__["stream"])
|
284
281
|
|
285
282
|
def test_produce_stream(self):
|
286
283
|
s = cuda.stream()
|
287
284
|
c_arr = cuda.device_array(10, stream=s)
|
288
|
-
cai_stream = c_arr.__cuda_array_interface__[
|
285
|
+
cai_stream = c_arr.__cuda_array_interface__["stream"]
|
289
286
|
stream_value = self.get_stream_value(s)
|
290
287
|
self.assertEqual(stream_value, cai_stream)
|
291
288
|
|
292
289
|
s = cuda.stream()
|
293
290
|
mapped_arr = cuda.mapped_array(10, stream=s)
|
294
|
-
cai_stream = mapped_arr.__cuda_array_interface__[
|
291
|
+
cai_stream = mapped_arr.__cuda_array_interface__["stream"]
|
295
292
|
stream_value = self.get_stream_value(s)
|
296
293
|
self.assertEqual(stream_value, cai_stream)
|
297
294
|
|
@@ -299,7 +296,7 @@ class TestCudaArrayInterface(ContextResettingTestCase):
|
|
299
296
|
def test_produce_managed_stream(self):
|
300
297
|
s = cuda.stream()
|
301
298
|
managed_arr = cuda.managed_array(10, stream=s)
|
302
|
-
cai_stream = managed_arr.__cuda_array_interface__[
|
299
|
+
cai_stream = managed_arr.__cuda_array_interface__["stream"]
|
303
300
|
stream_value = self.get_stream_value(s)
|
304
301
|
self.assertEqual(stream_value, cai_stream)
|
305
302
|
|
@@ -327,8 +324,9 @@ class TestCudaArrayInterface(ContextResettingTestCase):
|
|
327
324
|
# Create a foreign array with no stream
|
328
325
|
f_arr = ForeignArray(cuda.device_array(10))
|
329
326
|
|
330
|
-
with patch.object(
|
331
|
-
|
327
|
+
with patch.object(
|
328
|
+
cuda.cudadrv.driver.Stream, "synchronize", return_value=None
|
329
|
+
) as mock_sync:
|
332
330
|
cuda.as_cuda_array(f_arr)
|
333
331
|
|
334
332
|
# Ensure the synchronize method of a stream was not called
|
@@ -339,8 +337,9 @@ class TestCudaArrayInterface(ContextResettingTestCase):
|
|
339
337
|
s = cuda.stream()
|
340
338
|
f_arr = ForeignArray(cuda.device_array(10, stream=s))
|
341
339
|
|
342
|
-
with patch.object(
|
343
|
-
|
340
|
+
with patch.object(
|
341
|
+
cuda.cudadrv.driver.Stream, "synchronize", return_value=None
|
342
|
+
) as mock_sync:
|
344
343
|
cuda.as_cuda_array(f_arr)
|
345
344
|
|
346
345
|
# Ensure the synchronize method of a stream was called
|
@@ -354,9 +353,10 @@ class TestCudaArrayInterface(ContextResettingTestCase):
|
|
354
353
|
# Set sync to false before testing. The test suite should generally be
|
355
354
|
# run with sync enabled, but stash the old value just in case it is
|
356
355
|
# not.
|
357
|
-
with override_config(
|
358
|
-
with patch.object(
|
359
|
-
|
356
|
+
with override_config("CUDA_ARRAY_INTERFACE_SYNC", False):
|
357
|
+
with patch.object(
|
358
|
+
cuda.cudadrv.driver.Stream, "synchronize", return_value=None
|
359
|
+
) as mock_sync:
|
360
360
|
cuda.as_cuda_array(f_arr)
|
361
361
|
|
362
362
|
# Ensure the synchronize method of a stream was not called
|
@@ -370,8 +370,9 @@ class TestCudaArrayInterface(ContextResettingTestCase):
|
|
370
370
|
def f(x):
|
371
371
|
pass
|
372
372
|
|
373
|
-
with patch.object(
|
374
|
-
|
373
|
+
with patch.object(
|
374
|
+
cuda.cudadrv.driver.Stream, "synchronize", return_value=None
|
375
|
+
) as mock_sync:
|
375
376
|
f[1, 1](f_arr)
|
376
377
|
|
377
378
|
# Ensure the synchronize method of a stream was not called
|
@@ -386,8 +387,9 @@ class TestCudaArrayInterface(ContextResettingTestCase):
|
|
386
387
|
def f(x):
|
387
388
|
pass
|
388
389
|
|
389
|
-
with patch.object(
|
390
|
-
|
390
|
+
with patch.object(
|
391
|
+
cuda.cudadrv.driver.Stream, "synchronize", return_value=None
|
392
|
+
) as mock_sync:
|
391
393
|
f[1, 1](f_arr)
|
392
394
|
|
393
395
|
# Ensure the synchronize method of a stream was called
|
@@ -404,8 +406,9 @@ class TestCudaArrayInterface(ContextResettingTestCase):
|
|
404
406
|
def f(x, y):
|
405
407
|
pass
|
406
408
|
|
407
|
-
with patch.object(
|
408
|
-
|
409
|
+
with patch.object(
|
410
|
+
cuda.cudadrv.driver.Stream, "synchronize", return_value=None
|
411
|
+
) as mock_sync:
|
409
412
|
f[1, 1](f_arr1, f_arr2)
|
410
413
|
|
411
414
|
# Ensure that synchronize was called twice
|
@@ -418,13 +421,15 @@ class TestCudaArrayInterface(ContextResettingTestCase):
|
|
418
421
|
f_arr1 = ForeignArray(cuda.device_array(10, stream=s1))
|
419
422
|
f_arr2 = ForeignArray(cuda.device_array(10, stream=s2))
|
420
423
|
|
421
|
-
with override_config(
|
424
|
+
with override_config("CUDA_ARRAY_INTERFACE_SYNC", False):
|
425
|
+
|
422
426
|
@cuda.jit
|
423
427
|
def f(x, y):
|
424
428
|
pass
|
425
429
|
|
426
|
-
with patch.object(
|
427
|
-
|
430
|
+
with patch.object(
|
431
|
+
cuda.cudadrv.driver.Stream, "synchronize", return_value=None
|
432
|
+
) as mock_sync:
|
428
433
|
f[1, 1](f_arr1, f_arr2)
|
429
434
|
|
430
435
|
# Ensure that synchronize was not called
|
@@ -80,11 +80,12 @@ class TestCudaJitNoTypes(CUDATestCase):
|
|
80
80
|
def test_jit_debug_simulator(self):
|
81
81
|
# Ensure that the jit decorator accepts the debug kwarg when the
|
82
82
|
# simulator is in use - see Issue #6615.
|
83
|
-
with override_config(
|
83
|
+
with override_config("ENABLE_CUDASIM", 1):
|
84
|
+
|
84
85
|
@cuda.jit(debug=True, opt=False)
|
85
86
|
def f(x):
|
86
87
|
pass
|
87
88
|
|
88
89
|
|
89
|
-
if __name__ ==
|
90
|
+
if __name__ == "__main__":
|
90
91
|
unittest.main()
|
@@ -13,9 +13,9 @@ class TestCudaDateTime(CUDATestCase):
|
|
13
13
|
for i in range(cuda.grid(1), delta.size, cuda.gridsize(1)):
|
14
14
|
delta[i] = end[i] - start[i]
|
15
15
|
|
16
|
-
arr1 = np.arange(
|
16
|
+
arr1 = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
|
17
17
|
arr2 = arr1 + np.random.randint(0, 10000, arr1.size)
|
18
|
-
delta = np.zeros_like(arr1, dtype=
|
18
|
+
delta = np.zeros_like(arr1, dtype="timedelta64[D]")
|
19
19
|
|
20
20
|
foo[1, 32](arr1, arr2, delta)
|
21
21
|
|
@@ -27,11 +27,12 @@ class TestCudaDateTime(CUDATestCase):
|
|
27
27
|
for i in range(cuda.grid(1), matches.size, cuda.gridsize(1)):
|
28
28
|
matches[i] = dates[i] == target
|
29
29
|
outdelta[i] = dates[i] - delta
|
30
|
-
|
31
|
-
|
30
|
+
|
31
|
+
arr1 = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
|
32
|
+
target = arr1[5] # datetime
|
32
33
|
delta = arr1[6] - arr1[5] # timedelta
|
33
34
|
matches = np.zeros_like(arr1, dtype=np.bool_)
|
34
|
-
outdelta = np.zeros_like(arr1, dtype=
|
35
|
+
outdelta = np.zeros_like(arr1, dtype="datetime64[D]")
|
35
36
|
|
36
37
|
foo[1, 32](arr1, target, delta, matches, outdelta)
|
37
38
|
where = matches.nonzero()
|
@@ -39,56 +40,59 @@ class TestCudaDateTime(CUDATestCase):
|
|
39
40
|
self.assertEqual(list(where), [5])
|
40
41
|
self.assertPreciseEqual(outdelta, arr1 - delta)
|
41
42
|
|
42
|
-
@skip_on_cudasim(
|
43
|
+
@skip_on_cudasim("ufunc API unsupported in the simulator")
|
43
44
|
def test_ufunc(self):
|
44
|
-
datetime_t = from_dtype(np.dtype(
|
45
|
+
datetime_t = from_dtype(np.dtype("datetime64[D]"))
|
45
46
|
|
46
|
-
@vectorize([(datetime_t, datetime_t)], target=
|
47
|
+
@vectorize([(datetime_t, datetime_t)], target="cuda")
|
47
48
|
def timediff(start, end):
|
48
49
|
return end - start
|
49
50
|
|
50
|
-
arr1 = np.arange(
|
51
|
+
arr1 = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
|
51
52
|
arr2 = arr1 + np.random.randint(0, 10000, arr1.size)
|
52
53
|
|
53
54
|
delta = timediff(arr1, arr2)
|
54
55
|
|
55
56
|
self.assertPreciseEqual(delta, arr2 - arr1)
|
56
57
|
|
57
|
-
@skip_on_cudasim(
|
58
|
+
@skip_on_cudasim("ufunc API unsupported in the simulator")
|
58
59
|
def test_gufunc(self):
|
59
|
-
datetime_t = from_dtype(np.dtype(
|
60
|
-
timedelta_t = from_dtype(np.dtype(
|
61
|
-
|
62
|
-
@guvectorize(
|
63
|
-
|
60
|
+
datetime_t = from_dtype(np.dtype("datetime64[D]"))
|
61
|
+
timedelta_t = from_dtype(np.dtype("timedelta64[D]"))
|
62
|
+
|
63
|
+
@guvectorize(
|
64
|
+
[(datetime_t, datetime_t, timedelta_t[:])],
|
65
|
+
"(),()->()",
|
66
|
+
target="cuda",
|
67
|
+
)
|
64
68
|
def timediff(start, end, out):
|
65
69
|
out[0] = end - start
|
66
70
|
|
67
|
-
arr1 = np.arange(
|
71
|
+
arr1 = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
|
68
72
|
arr2 = arr1 + np.random.randint(0, 10000, arr1.size)
|
69
73
|
|
70
74
|
delta = timediff(arr1, arr2)
|
71
75
|
|
72
76
|
self.assertPreciseEqual(delta, arr2 - arr1)
|
73
77
|
|
74
|
-
@skip_on_cudasim(
|
78
|
+
@skip_on_cudasim("no .copy_to_host() in the simulator")
|
75
79
|
def test_datetime_view_as_int64(self):
|
76
|
-
arr = np.arange(
|
80
|
+
arr = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
|
77
81
|
darr = cuda.to_device(arr)
|
78
82
|
viewed = darr.view(np.int64)
|
79
83
|
self.assertPreciseEqual(arr.view(np.int64), viewed.copy_to_host())
|
80
84
|
self.assertEqual(viewed.gpu_data, darr.gpu_data)
|
81
85
|
|
82
|
-
@skip_on_cudasim(
|
86
|
+
@skip_on_cudasim("no .copy_to_host() in the simulator")
|
83
87
|
def test_timedelta_view_as_int64(self):
|
84
|
-
arr = np.arange(
|
88
|
+
arr = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
|
85
89
|
arr = arr - (arr - 1)
|
86
|
-
self.assertEqual(arr.dtype, np.dtype(
|
90
|
+
self.assertEqual(arr.dtype, np.dtype("timedelta64[D]"))
|
87
91
|
darr = cuda.to_device(arr)
|
88
92
|
viewed = darr.view(np.int64)
|
89
93
|
self.assertPreciseEqual(arr.view(np.int64), viewed.copy_to_host())
|
90
94
|
self.assertEqual(viewed.gpu_data, darr.gpu_data)
|
91
95
|
|
92
96
|
|
93
|
-
if __name__ ==
|
97
|
+
if __name__ == "__main__":
|
94
98
|
unittest.main()
|
@@ -2,8 +2,11 @@ import numpy as np
|
|
2
2
|
|
3
3
|
from numba.core.utils import PYVERSION
|
4
4
|
from numba.cuda.testing import skip_on_cudasim, CUDATestCase
|
5
|
-
from numba.tests.support import (
|
6
|
-
|
5
|
+
from numba.tests.support import (
|
6
|
+
override_config,
|
7
|
+
captured_stderr,
|
8
|
+
captured_stdout,
|
9
|
+
)
|
7
10
|
from numba import cuda, float64
|
8
11
|
import unittest
|
9
12
|
|
@@ -13,9 +16,8 @@ def simple_cuda(A, B):
|
|
13
16
|
B[i] = A[i] + 1.5
|
14
17
|
|
15
18
|
|
16
|
-
@skip_on_cudasim(
|
19
|
+
@skip_on_cudasim("Simulator does not produce debug dumps")
|
17
20
|
class TestDebugOutput(CUDATestCase):
|
18
|
-
|
19
21
|
def compile_simple_cuda(self):
|
20
22
|
with captured_stderr() as err:
|
21
23
|
with captured_stdout() as out:
|
@@ -34,14 +36,14 @@ class TestDebugOutput(CUDATestCase):
|
|
34
36
|
self.assertRaises(AssertionError, *args, **kwargs)
|
35
37
|
|
36
38
|
def check_debug_output(self, out, enabled_dumps):
|
37
|
-
all_dumps = dict.fromkeys(
|
38
|
-
|
39
|
-
|
39
|
+
all_dumps = dict.fromkeys(
|
40
|
+
["bytecode", "cfg", "ir", "llvm", "assembly"], False
|
41
|
+
)
|
40
42
|
for name in enabled_dumps:
|
41
43
|
assert name in all_dumps
|
42
44
|
all_dumps[name] = True
|
43
45
|
for name, enabled in sorted(all_dumps.items()):
|
44
|
-
check_meth = getattr(self,
|
46
|
+
check_meth = getattr(self, "_check_dump_%s" % name)
|
45
47
|
if enabled:
|
46
48
|
check_meth(out)
|
47
49
|
else:
|
@@ -50,50 +52,50 @@ class TestDebugOutput(CUDATestCase):
|
|
50
52
|
def _check_dump_bytecode(self, out):
|
51
53
|
if PYVERSION > (3, 10):
|
52
54
|
# binop with arg=0 is binary add, see CPython dis.py and opcode.py
|
53
|
-
self.assertIn(
|
55
|
+
self.assertIn("BINARY_OP(arg=0", out)
|
54
56
|
else:
|
55
|
-
self.assertIn(
|
57
|
+
self.assertIn("BINARY_ADD", out)
|
56
58
|
|
57
59
|
def _check_dump_cfg(self, out):
|
58
|
-
self.assertIn(
|
60
|
+
self.assertIn("CFG dominators", out)
|
59
61
|
|
60
62
|
def _check_dump_ir(self, out):
|
61
|
-
self.assertIn(
|
62
|
-
self.assertIn(
|
63
|
+
self.assertIn("--IR DUMP: simple_cuda--", out)
|
64
|
+
self.assertIn("const(float, 1.5)", out)
|
63
65
|
|
64
66
|
def _check_dump_llvm(self, out):
|
65
|
-
self.assertIn(
|
67
|
+
self.assertIn("--LLVM DUMP", out)
|
66
68
|
self.assertIn('!"kernel", i32 1', out)
|
67
69
|
|
68
70
|
def _check_dump_assembly(self, out):
|
69
|
-
self.assertIn(
|
70
|
-
self.assertIn(
|
71
|
+
self.assertIn("--ASSEMBLY simple_cuda", out)
|
72
|
+
self.assertIn("Generated by NVIDIA NVVM Compiler", out)
|
71
73
|
|
72
74
|
def test_dump_bytecode(self):
|
73
|
-
with override_config(
|
75
|
+
with override_config("DUMP_BYTECODE", True):
|
74
76
|
out = self.compile_simple_cuda()
|
75
|
-
self.check_debug_output(out, [
|
77
|
+
self.check_debug_output(out, ["bytecode"])
|
76
78
|
|
77
79
|
def test_dump_ir(self):
|
78
|
-
with override_config(
|
80
|
+
with override_config("DUMP_IR", True):
|
79
81
|
out = self.compile_simple_cuda()
|
80
|
-
self.check_debug_output(out, [
|
82
|
+
self.check_debug_output(out, ["ir"])
|
81
83
|
|
82
84
|
def test_dump_cfg(self):
|
83
|
-
with override_config(
|
85
|
+
with override_config("DUMP_CFG", True):
|
84
86
|
out = self.compile_simple_cuda()
|
85
|
-
self.check_debug_output(out, [
|
87
|
+
self.check_debug_output(out, ["cfg"])
|
86
88
|
|
87
89
|
def test_dump_llvm(self):
|
88
|
-
with override_config(
|
90
|
+
with override_config("DUMP_LLVM", True):
|
89
91
|
out = self.compile_simple_cuda()
|
90
|
-
self.check_debug_output(out, [
|
92
|
+
self.check_debug_output(out, ["llvm"])
|
91
93
|
|
92
94
|
def test_dump_assembly(self):
|
93
|
-
with override_config(
|
95
|
+
with override_config("DUMP_ASSEMBLY", True):
|
94
96
|
out = self.compile_simple_cuda()
|
95
|
-
self.check_debug_output(out, [
|
97
|
+
self.check_debug_output(out, ["assembly"])
|
96
98
|
|
97
99
|
|
98
|
-
if __name__ ==
|
100
|
+
if __name__ == "__main__":
|
99
101
|
unittest.main()
|