numba-cuda 0.0.0__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +1 -0
- _numba_cuda_redirector.py +74 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +5 -0
- numba_cuda/_version.py +19 -0
- numba_cuda/numba/cuda/__init__.py +22 -0
- numba_cuda/numba/cuda/api.py +526 -0
- numba_cuda/numba/cuda/api_util.py +30 -0
- numba_cuda/numba/cuda/args.py +77 -0
- numba_cuda/numba/cuda/cg.py +62 -0
- numba_cuda/numba/cuda/codegen.py +378 -0
- numba_cuda/numba/cuda/compiler.py +422 -0
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
- numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
- numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
- numba_cuda/numba/cuda/cuda_paths.py +258 -0
- numba_cuda/numba/cuda/cudadecl.py +806 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
- numba_cuda/numba/cuda/cudadrv/error.py +36 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
- numba_cuda/numba/cuda/cudaimpl.py +1055 -0
- numba_cuda/numba/cuda/cudamath.py +140 -0
- numba_cuda/numba/cuda/decorators.py +189 -0
- numba_cuda/numba/cuda/descriptor.py +33 -0
- numba_cuda/numba/cuda/device_init.py +89 -0
- numba_cuda/numba/cuda/deviceufunc.py +908 -0
- numba_cuda/numba/cuda/dispatcher.py +1057 -0
- numba_cuda/numba/cuda/errors.py +59 -0
- numba_cuda/numba/cuda/extending.py +7 -0
- numba_cuda/numba/cuda/initialize.py +13 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
- numba_cuda/numba/cuda/intrinsics.py +198 -0
- numba_cuda/numba/cuda/kernels/__init__.py +0 -0
- numba_cuda/numba/cuda/kernels/reduction.py +262 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3382 -0
- numba_cuda/numba/cuda/libdevicedecl.py +17 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
- numba_cuda/numba/cuda/mathimpl.py +448 -0
- numba_cuda/numba/cuda/models.py +48 -0
- numba_cuda/numba/cuda/nvvmutils.py +235 -0
- numba_cuda/numba/cuda/printimpl.py +86 -0
- numba_cuda/numba/cuda/random.py +292 -0
- numba_cuda/numba/cuda/simulator/__init__.py +38 -0
- numba_cuda/numba/cuda/simulator/api.py +110 -0
- numba_cuda/numba/cuda/simulator/compiler.py +9 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
- numba_cuda/numba/cuda/simulator/kernel.py +308 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
- numba_cuda/numba/cuda/simulator/reduction.py +15 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
- numba_cuda/numba/cuda/simulator_init.py +17 -0
- numba_cuda/numba/cuda/stubs.py +902 -0
- numba_cuda/numba/cuda/target.py +440 -0
- numba_cuda/numba/cuda/testing.py +202 -0
- numba_cuda/numba/cuda/tests/__init__.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +7 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
- numba_cuda/numba/cuda/types.py +37 -0
- numba_cuda/numba/cuda/ufuncs.py +662 -0
- numba_cuda/numba/cuda/vector_types.py +209 -0
- numba_cuda/numba/cuda/vectorizers.py +252 -0
- numba_cuda-0.0.12.dist-info/LICENSE +25 -0
- numba_cuda-0.0.12.dist-info/METADATA +68 -0
- numba_cuda-0.0.12.dist-info/RECORD +231 -0
- {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
- numba_cuda-0.0.0.dist-info/METADATA +0 -6
- numba_cuda-0.0.0.dist-info/RECORD +0 -5
- {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,547 @@
|
|
1
|
+
import itertools
|
2
|
+
import numpy as np
|
3
|
+
from numba.cuda.cudadrv import devicearray
|
4
|
+
from numba import cuda
|
5
|
+
from numba.cuda.testing import unittest, CUDATestCase
|
6
|
+
from numba.cuda.testing import skip_on_cudasim
|
7
|
+
|
8
|
+
|
9
|
+
class TestCudaNDArray(CUDATestCase):
|
10
|
+
def test_device_array_interface(self):
|
11
|
+
dary = cuda.device_array(shape=100)
|
12
|
+
devicearray.verify_cuda_ndarray_interface(dary)
|
13
|
+
|
14
|
+
ary = np.empty(100)
|
15
|
+
dary = cuda.to_device(ary)
|
16
|
+
devicearray.verify_cuda_ndarray_interface(dary)
|
17
|
+
|
18
|
+
ary = np.asarray(1.234)
|
19
|
+
dary = cuda.to_device(ary)
|
20
|
+
self.assertEqual(dary.ndim, 0)
|
21
|
+
devicearray.verify_cuda_ndarray_interface(dary)
|
22
|
+
|
23
|
+
def test_device_array_from_readonly(self):
|
24
|
+
ary = np.arange(100, dtype=np.float32)
|
25
|
+
# Make the array readonly
|
26
|
+
ary.flags.writeable = False
|
27
|
+
self.assertFalse(ary.flags.writeable)
|
28
|
+
# Ensure that we can copy the readonly array
|
29
|
+
dary = cuda.to_device(ary)
|
30
|
+
retr = dary.copy_to_host()
|
31
|
+
np.testing.assert_array_equal(retr, ary)
|
32
|
+
|
33
|
+
def test_devicearray_dtype(self):
|
34
|
+
dary = cuda.device_array(shape=(100,), dtype="f4")
|
35
|
+
self.assertEqual(dary.dtype, np.dtype("f4"))
|
36
|
+
|
37
|
+
def test_devicearray_no_copy(self):
|
38
|
+
array = np.arange(100, dtype=np.float32)
|
39
|
+
cuda.to_device(array, copy=False)
|
40
|
+
|
41
|
+
def test_devicearray_shape(self):
|
42
|
+
ary = np.arange(2 * 3 * 4).reshape(2, 3, 4)
|
43
|
+
dary = cuda.to_device(ary)
|
44
|
+
self.assertEqual(ary.shape, dary.shape)
|
45
|
+
self.assertEqual(ary.shape[1:], dary.shape[1:])
|
46
|
+
|
47
|
+
def test_devicearray(self):
|
48
|
+
array = np.arange(100, dtype=np.int32)
|
49
|
+
original = array.copy()
|
50
|
+
gpumem = cuda.to_device(array)
|
51
|
+
array[:] = 0
|
52
|
+
gpumem.copy_to_host(array)
|
53
|
+
|
54
|
+
np.testing.assert_array_equal(array, original)
|
55
|
+
|
56
|
+
def test_stream_bind(self):
|
57
|
+
stream = cuda.stream()
|
58
|
+
with stream.auto_synchronize():
|
59
|
+
arr = cuda.device_array(
|
60
|
+
(3, 3),
|
61
|
+
dtype=np.float64,
|
62
|
+
stream=stream)
|
63
|
+
self.assertEqual(arr.bind(stream).stream, stream)
|
64
|
+
self.assertEqual(arr.stream, stream)
|
65
|
+
|
66
|
+
def test_len_1d(self):
|
67
|
+
ary = np.empty((3,))
|
68
|
+
dary = cuda.device_array(3)
|
69
|
+
self.assertEqual(len(ary), len(dary))
|
70
|
+
|
71
|
+
def test_len_2d(self):
|
72
|
+
ary = np.empty((3, 5))
|
73
|
+
dary = cuda.device_array((3, 5))
|
74
|
+
self.assertEqual(len(ary), len(dary))
|
75
|
+
|
76
|
+
def test_len_3d(self):
|
77
|
+
ary = np.empty((3, 5, 7))
|
78
|
+
dary = cuda.device_array((3, 5, 7))
|
79
|
+
self.assertEqual(len(ary), len(dary))
|
80
|
+
|
81
|
+
def test_devicearray_partition(self):
|
82
|
+
N = 100
|
83
|
+
array = np.arange(N, dtype=np.int32)
|
84
|
+
original = array.copy()
|
85
|
+
gpumem = cuda.to_device(array)
|
86
|
+
left, right = gpumem.split(N // 2)
|
87
|
+
|
88
|
+
array[:] = 0
|
89
|
+
|
90
|
+
self.assertTrue(np.all(array == 0))
|
91
|
+
|
92
|
+
right.copy_to_host(array[N // 2:])
|
93
|
+
left.copy_to_host(array[:N // 2])
|
94
|
+
|
95
|
+
self.assertTrue(np.all(array == original))
|
96
|
+
|
97
|
+
def test_devicearray_replace(self):
|
98
|
+
N = 100
|
99
|
+
array = np.arange(N, dtype=np.int32)
|
100
|
+
original = array.copy()
|
101
|
+
gpumem = cuda.to_device(array)
|
102
|
+
cuda.to_device(array * 2, to=gpumem)
|
103
|
+
gpumem.copy_to_host(array)
|
104
|
+
np.testing.assert_array_equal(array, original * 2)
|
105
|
+
|
106
|
+
@skip_on_cudasim('This works in the simulator')
|
107
|
+
def test_devicearray_transpose_wrongdim(self):
|
108
|
+
gpumem = cuda.to_device(np.array(np.arange(12)).reshape(3, 4, 1))
|
109
|
+
|
110
|
+
with self.assertRaises(NotImplementedError) as e:
|
111
|
+
np.transpose(gpumem)
|
112
|
+
|
113
|
+
self.assertEqual(
|
114
|
+
"transposing a non-2D DeviceNDArray isn't supported",
|
115
|
+
str(e.exception))
|
116
|
+
|
117
|
+
def test_devicearray_transpose_identity(self):
|
118
|
+
# any-shape identities should work
|
119
|
+
original = np.array(np.arange(24)).reshape(3, 4, 2)
|
120
|
+
array = np.transpose(cuda.to_device(original),
|
121
|
+
axes=(0, 1, 2)).copy_to_host()
|
122
|
+
self.assertTrue(np.all(array == original))
|
123
|
+
|
124
|
+
def test_devicearray_transpose_duplicatedaxis(self):
|
125
|
+
gpumem = cuda.to_device(np.array(np.arange(12)).reshape(3, 4))
|
126
|
+
|
127
|
+
with self.assertRaises(ValueError) as e:
|
128
|
+
np.transpose(gpumem, axes=(0, 0))
|
129
|
+
|
130
|
+
self.assertIn(
|
131
|
+
str(e.exception),
|
132
|
+
container=[
|
133
|
+
'invalid axes list (0, 0)', # GPU
|
134
|
+
'repeated axis in transpose', # sim
|
135
|
+
])
|
136
|
+
|
137
|
+
def test_devicearray_transpose_wrongaxis(self):
|
138
|
+
gpumem = cuda.to_device(np.array(np.arange(12)).reshape(3, 4))
|
139
|
+
|
140
|
+
with self.assertRaises(ValueError) as e:
|
141
|
+
np.transpose(gpumem, axes=(0, 2))
|
142
|
+
|
143
|
+
self.assertIn(
|
144
|
+
str(e.exception),
|
145
|
+
container=[
|
146
|
+
'invalid axes list (0, 2)', # GPU
|
147
|
+
'invalid axis for this array',
|
148
|
+
'axis 2 is out of bounds for array of dimension 2', # sim
|
149
|
+
])
|
150
|
+
|
151
|
+
def test_devicearray_view_ok(self):
|
152
|
+
original = np.array(np.arange(12), dtype="i2").reshape(3, 4)
|
153
|
+
array = cuda.to_device(original)
|
154
|
+
for dtype in ("i4", "u4", "i8", "f8"):
|
155
|
+
with self.subTest(dtype=dtype):
|
156
|
+
np.testing.assert_array_equal(
|
157
|
+
array.view(dtype).copy_to_host(),
|
158
|
+
original.view(dtype)
|
159
|
+
)
|
160
|
+
|
161
|
+
def test_devicearray_view_ok_not_c_contig(self):
|
162
|
+
original = np.array(np.arange(32), dtype="i2").reshape(4, 8)
|
163
|
+
array = cuda.to_device(original)[:, ::2]
|
164
|
+
original = original[:, ::2]
|
165
|
+
np.testing.assert_array_equal(
|
166
|
+
array.view("u2").copy_to_host(),
|
167
|
+
original.view("u2")
|
168
|
+
)
|
169
|
+
|
170
|
+
def test_devicearray_view_bad_not_c_contig(self):
|
171
|
+
original = np.array(np.arange(32), dtype="i2").reshape(4, 8)
|
172
|
+
array = cuda.to_device(original)[:, ::2]
|
173
|
+
with self.assertRaises(ValueError) as e:
|
174
|
+
array.view("i4")
|
175
|
+
|
176
|
+
msg = str(e.exception)
|
177
|
+
self.assertIn('To change to a dtype of a different size,', msg)
|
178
|
+
|
179
|
+
contiguous_pre_np123 = 'the array must be C-contiguous' in msg
|
180
|
+
contiguous_post_np123 = 'the last axis must be contiguous' in msg
|
181
|
+
self.assertTrue(contiguous_pre_np123 or contiguous_post_np123,
|
182
|
+
'Expected message to mention contiguity')
|
183
|
+
|
184
|
+
def test_devicearray_view_bad_itemsize(self):
|
185
|
+
original = np.array(np.arange(12), dtype="i2").reshape(4, 3)
|
186
|
+
array = cuda.to_device(original)
|
187
|
+
with self.assertRaises(ValueError) as e:
|
188
|
+
array.view("i4")
|
189
|
+
self.assertEqual(
|
190
|
+
"When changing to a larger dtype,"
|
191
|
+
" its size must be a divisor of the total size in bytes"
|
192
|
+
" of the last axis of the array.",
|
193
|
+
str(e.exception))
|
194
|
+
|
195
|
+
def test_devicearray_transpose_ok(self):
|
196
|
+
original = np.array(np.arange(12)).reshape(3, 4)
|
197
|
+
array = np.transpose(cuda.to_device(original)).copy_to_host()
|
198
|
+
self.assertTrue(np.all(array == original.T))
|
199
|
+
|
200
|
+
def test_devicearray_transpose_T(self):
|
201
|
+
original = np.array(np.arange(12)).reshape(3, 4)
|
202
|
+
array = cuda.to_device(original).T.copy_to_host()
|
203
|
+
self.assertTrue(np.all(array == original.T))
|
204
|
+
|
205
|
+
def test_devicearray_contiguous_slice(self):
|
206
|
+
# memcpys are dumb ranges of bytes, so trying to
|
207
|
+
# copy to a non-contiguous range shouldn't work!
|
208
|
+
a = np.arange(25).reshape(5, 5, order='F')
|
209
|
+
s = np.full(fill_value=5, shape=(5,))
|
210
|
+
|
211
|
+
d = cuda.to_device(a)
|
212
|
+
a[2] = s
|
213
|
+
|
214
|
+
# d is in F-order (not C-order), so d[2] is not contiguous
|
215
|
+
# (40-byte strides). This means we can't memcpy to it!
|
216
|
+
with self.assertRaises(ValueError) as e:
|
217
|
+
d[2].copy_to_device(s)
|
218
|
+
self.assertEqual(
|
219
|
+
devicearray.errmsg_contiguous_buffer,
|
220
|
+
str(e.exception))
|
221
|
+
|
222
|
+
# if d[2].copy_to_device(s), then this would pass:
|
223
|
+
# self.assertTrue((a == d.copy_to_host()).all())
|
224
|
+
|
225
|
+
def _test_devicearray_contiguous_host_copy(self, a_c, a_f):
|
226
|
+
"""
|
227
|
+
Checks host->device memcpys
|
228
|
+
"""
|
229
|
+
self.assertTrue(a_c.flags.c_contiguous)
|
230
|
+
self.assertTrue(a_f.flags.f_contiguous)
|
231
|
+
|
232
|
+
for original, copy in [
|
233
|
+
(a_f, a_f),
|
234
|
+
(a_f, a_c),
|
235
|
+
(a_c, a_f),
|
236
|
+
(a_c, a_c),
|
237
|
+
]:
|
238
|
+
msg = '%s => %s' % (
|
239
|
+
'C' if original.flags.c_contiguous else 'F',
|
240
|
+
'C' if copy.flags.c_contiguous else 'F',
|
241
|
+
)
|
242
|
+
|
243
|
+
d = cuda.to_device(original)
|
244
|
+
d.copy_to_device(copy)
|
245
|
+
self.assertTrue(np.all(d.copy_to_host() == a_c), msg=msg)
|
246
|
+
self.assertTrue(np.all(d.copy_to_host() == a_f), msg=msg)
|
247
|
+
|
248
|
+
def test_devicearray_contiguous_copy_host_3d(self):
|
249
|
+
a_c = np.arange(5 * 5 * 5).reshape(5, 5, 5)
|
250
|
+
a_f = np.array(a_c, order='F')
|
251
|
+
self._test_devicearray_contiguous_host_copy(a_c, a_f)
|
252
|
+
|
253
|
+
def test_devicearray_contiguous_copy_host_1d(self):
|
254
|
+
a_c = np.arange(5)
|
255
|
+
a_f = np.array(a_c, order='F')
|
256
|
+
self._test_devicearray_contiguous_host_copy(a_c, a_f)
|
257
|
+
|
258
|
+
def test_devicearray_contiguous_copy_device(self):
|
259
|
+
a_c = np.arange(5 * 5 * 5).reshape(5, 5, 5)
|
260
|
+
a_f = np.array(a_c, order='F')
|
261
|
+
self.assertTrue(a_c.flags.c_contiguous)
|
262
|
+
self.assertTrue(a_f.flags.f_contiguous)
|
263
|
+
|
264
|
+
d = cuda.to_device(a_c)
|
265
|
+
|
266
|
+
with self.assertRaises(ValueError) as e:
|
267
|
+
d.copy_to_device(cuda.to_device(a_f))
|
268
|
+
self.assertEqual(
|
269
|
+
"incompatible strides: {} vs. {}".format(a_c.strides, a_f.strides),
|
270
|
+
str(e.exception))
|
271
|
+
|
272
|
+
d.copy_to_device(cuda.to_device(a_c))
|
273
|
+
self.assertTrue(np.all(d.copy_to_host() == a_c))
|
274
|
+
|
275
|
+
d = cuda.to_device(a_f)
|
276
|
+
|
277
|
+
with self.assertRaises(ValueError) as e:
|
278
|
+
d.copy_to_device(cuda.to_device(a_c))
|
279
|
+
self.assertEqual(
|
280
|
+
"incompatible strides: {} vs. {}".format(a_f.strides, a_c.strides),
|
281
|
+
str(e.exception))
|
282
|
+
|
283
|
+
d.copy_to_device(cuda.to_device(a_f))
|
284
|
+
self.assertTrue(np.all(d.copy_to_host() == a_f))
|
285
|
+
|
286
|
+
def test_devicearray_broadcast_host_copy(self):
|
287
|
+
broadsize = 4
|
288
|
+
coreshape = (2, 3)
|
289
|
+
coresize = np.prod(coreshape)
|
290
|
+
core_c = np.arange(coresize).reshape(coreshape, order='C')
|
291
|
+
core_f = np.arange(coresize).reshape(coreshape, order='F')
|
292
|
+
for dim in range(len(coreshape)):
|
293
|
+
newindex = (slice(None),) * dim + (np.newaxis,)
|
294
|
+
broadshape = coreshape[:dim] + (broadsize,) + coreshape[dim:]
|
295
|
+
broad_c = np.broadcast_to(core_c[newindex], broadshape)
|
296
|
+
broad_f = np.broadcast_to(core_f[newindex], broadshape)
|
297
|
+
dbroad_c = cuda.to_device(broad_c)
|
298
|
+
dbroad_f = cuda.to_device(broad_f)
|
299
|
+
np.testing.assert_array_equal(dbroad_c.copy_to_host(), broad_c)
|
300
|
+
np.testing.assert_array_equal(dbroad_f.copy_to_host(), broad_f)
|
301
|
+
# Also test copying across different core orderings
|
302
|
+
dbroad_c.copy_to_device(broad_f)
|
303
|
+
dbroad_f.copy_to_device(broad_c)
|
304
|
+
np.testing.assert_array_equal(dbroad_c.copy_to_host(), broad_f)
|
305
|
+
np.testing.assert_array_equal(dbroad_f.copy_to_host(), broad_c)
|
306
|
+
|
307
|
+
def test_devicearray_contiguous_host_strided(self):
|
308
|
+
a_c = np.arange(10)
|
309
|
+
d = cuda.to_device(a_c)
|
310
|
+
arr = np.arange(20)[::2]
|
311
|
+
d.copy_to_device(arr)
|
312
|
+
np.testing.assert_array_equal(d.copy_to_host(), arr)
|
313
|
+
|
314
|
+
def test_devicearray_contiguous_device_strided(self):
|
315
|
+
d = cuda.to_device(np.arange(20))
|
316
|
+
arr = np.arange(20)
|
317
|
+
|
318
|
+
with self.assertRaises(ValueError) as e:
|
319
|
+
d.copy_to_device(cuda.to_device(arr)[::2])
|
320
|
+
self.assertEqual(
|
321
|
+
devicearray.errmsg_contiguous_buffer,
|
322
|
+
str(e.exception))
|
323
|
+
|
324
|
+
@skip_on_cudasim('DeviceNDArray class not present in simulator')
|
325
|
+
def test_devicearray_relaxed_strides(self):
|
326
|
+
# From the reproducer in Issue #6824.
|
327
|
+
|
328
|
+
# Construct a device array that is contiguous even though
|
329
|
+
# the strides for the first axis (800) are not equal to
|
330
|
+
# the strides * size (10 * 8 = 80) for the previous axis,
|
331
|
+
# because the first axis size is 1.
|
332
|
+
arr = devicearray.DeviceNDArray((1, 10), (800, 8), np.float64)
|
333
|
+
|
334
|
+
# Ensure we still believe the array to be contiguous because
|
335
|
+
# strides checking is relaxed.
|
336
|
+
self.assertTrue(arr.flags['C_CONTIGUOUS'])
|
337
|
+
self.assertTrue(arr.flags['F_CONTIGUOUS'])
|
338
|
+
|
339
|
+
def test_c_f_contiguity_matches_numpy(self):
|
340
|
+
# From the reproducer in Issue #4943.
|
341
|
+
|
342
|
+
shapes = ((1, 4), (4, 1))
|
343
|
+
orders = ('C', 'F')
|
344
|
+
|
345
|
+
for shape, order in itertools.product(shapes, orders):
|
346
|
+
arr = np.ndarray(shape, order=order)
|
347
|
+
d_arr = cuda.to_device(arr)
|
348
|
+
self.assertEqual(arr.flags['C_CONTIGUOUS'],
|
349
|
+
d_arr.flags['C_CONTIGUOUS'])
|
350
|
+
self.assertEqual(arr.flags['F_CONTIGUOUS'],
|
351
|
+
d_arr.flags['F_CONTIGUOUS'])
|
352
|
+
|
353
|
+
@skip_on_cudasim('Typing not done in the simulator')
|
354
|
+
def test_devicearray_typing_order_simple_c(self):
|
355
|
+
# C-order 1D array
|
356
|
+
a = np.zeros(10, order='C')
|
357
|
+
d = cuda.to_device(a)
|
358
|
+
self.assertEqual(d._numba_type_.layout, 'C')
|
359
|
+
|
360
|
+
@skip_on_cudasim('Typing not done in the simulator')
|
361
|
+
def test_devicearray_typing_order_simple_f(self):
|
362
|
+
# F-order array that is also C layout.
|
363
|
+
a = np.zeros(10, order='F')
|
364
|
+
d = cuda.to_device(a)
|
365
|
+
self.assertEqual(d._numba_type_.layout, 'C')
|
366
|
+
|
367
|
+
@skip_on_cudasim('Typing not done in the simulator')
|
368
|
+
def test_devicearray_typing_order_2d_c(self):
|
369
|
+
# C-order 2D array
|
370
|
+
a = np.zeros((2, 10), order='C')
|
371
|
+
d = cuda.to_device(a)
|
372
|
+
self.assertEqual(d._numba_type_.layout, 'C')
|
373
|
+
|
374
|
+
@skip_on_cudasim('Typing not done in the simulator')
|
375
|
+
def test_devicearray_typing_order_2d_f(self):
|
376
|
+
# F-order array that can only be F layout
|
377
|
+
a = np.zeros((2, 10), order='F')
|
378
|
+
d = cuda.to_device(a)
|
379
|
+
self.assertEqual(d._numba_type_.layout, 'F')
|
380
|
+
|
381
|
+
@skip_on_cudasim('Typing not done in the simulator')
|
382
|
+
def test_devicearray_typing_order_noncontig_slice_c(self):
|
383
|
+
# Non-contiguous slice of C-order array
|
384
|
+
a = np.zeros((5, 5), order='C')
|
385
|
+
d = cuda.to_device(a)[:,2]
|
386
|
+
self.assertEqual(d._numba_type_.layout, 'A')
|
387
|
+
|
388
|
+
@skip_on_cudasim('Typing not done in the simulator')
|
389
|
+
def test_devicearray_typing_order_noncontig_slice_f(self):
|
390
|
+
# Non-contiguous slice of F-order array
|
391
|
+
a = np.zeros((5, 5), order='F')
|
392
|
+
d = cuda.to_device(a)[2,:]
|
393
|
+
self.assertEqual(d._numba_type_.layout, 'A')
|
394
|
+
|
395
|
+
@skip_on_cudasim('Typing not done in the simulator')
|
396
|
+
def test_devicearray_typing_order_contig_slice_c(self):
|
397
|
+
# Contiguous slice of C-order array
|
398
|
+
a = np.zeros((5, 5), order='C')
|
399
|
+
d = cuda.to_device(a)[2,:]
|
400
|
+
self.assertEqual(d._numba_type_.layout, 'C')
|
401
|
+
|
402
|
+
@skip_on_cudasim('Typing not done in the simulator')
|
403
|
+
def test_devicearray_typing_order_contig_slice_f(self):
|
404
|
+
# Contiguous slice of F-order array - is both C- and F-contiguous, so
|
405
|
+
# types as 'C' layout
|
406
|
+
a = np.zeros((5, 5), order='F')
|
407
|
+
d = cuda.to_device(a)[:,2]
|
408
|
+
self.assertEqual(d._numba_type_.layout, 'C')
|
409
|
+
|
410
|
+
@skip_on_cudasim('Typing not done in the simulator')
|
411
|
+
def test_devicearray_typing_order_broadcasted(self):
|
412
|
+
# Broadcasted array, similar to that used for passing scalars to ufuncs
|
413
|
+
a = np.broadcast_to(np.array([1]), (10,))
|
414
|
+
d = cuda.to_device(a)
|
415
|
+
self.assertEqual(d._numba_type_.layout, 'A')
|
416
|
+
|
417
|
+
def test_bug6697(self):
|
418
|
+
ary = np.arange(10, dtype=np.int16)
|
419
|
+
dary = cuda.to_device(ary)
|
420
|
+
got = np.asarray(dary)
|
421
|
+
self.assertEqual(got.dtype, dary.dtype)
|
422
|
+
|
423
|
+
@skip_on_cudasim('DeviceNDArray class not present in simulator')
|
424
|
+
def test_issue_8477(self):
|
425
|
+
# Ensure that we can copy a zero-length device array to a zero-length
|
426
|
+
# host array when the strides of the device and host arrays differ -
|
427
|
+
# this should be possible because the strides are irrelevant when the
|
428
|
+
# length is zero. For more info see
|
429
|
+
# https://github.com/numba/numba/issues/8477.
|
430
|
+
|
431
|
+
# Create a device array with shape (0,) and strides (8,)
|
432
|
+
dev_array = devicearray.DeviceNDArray(shape=(0,), strides=(8,),
|
433
|
+
dtype=np.int8)
|
434
|
+
|
435
|
+
# Create a host array with shape (0,) and strides (0,)
|
436
|
+
host_array = np.ndarray(shape=(0,), strides=(0,), dtype=np.int8)
|
437
|
+
|
438
|
+
# Sanity check for this test - ensure our destination has the strides
|
439
|
+
# we expect, because strides can be ignored in some cases by the
|
440
|
+
# ndarray constructor - checking here ensures that we haven't failed to
|
441
|
+
# account for unexpected behaviour across different versions of NumPy
|
442
|
+
self.assertEqual(host_array.strides, (0,))
|
443
|
+
|
444
|
+
# Ensure that the copy succeeds in both directions
|
445
|
+
dev_array.copy_to_host(host_array)
|
446
|
+
dev_array.copy_to_device(host_array)
|
447
|
+
|
448
|
+
# Ensure that a device-to-device copy also succeeds when the strides
|
449
|
+
# differ - one way of doing this is to copy the host array across and
|
450
|
+
# use that for copies in both directions.
|
451
|
+
dev_array_from_host = cuda.to_device(host_array)
|
452
|
+
self.assertEqual(dev_array_from_host.shape, (0,))
|
453
|
+
self.assertEqual(dev_array_from_host.strides, (0,))
|
454
|
+
|
455
|
+
dev_array.copy_to_device(dev_array_from_host)
|
456
|
+
dev_array_from_host.copy_to_device(dev_array)
|
457
|
+
|
458
|
+
|
459
|
+
class TestRecarray(CUDATestCase):
|
460
|
+
def test_recarray(self):
|
461
|
+
# From issue #4111
|
462
|
+
a = np.recarray((16,), dtype=[
|
463
|
+
("value1", np.int64),
|
464
|
+
("value2", np.float64),
|
465
|
+
])
|
466
|
+
a.value1 = np.arange(a.size, dtype=np.int64)
|
467
|
+
a.value2 = np.arange(a.size, dtype=np.float64) / 100
|
468
|
+
|
469
|
+
expect1 = a.value1
|
470
|
+
expect2 = a.value2
|
471
|
+
|
472
|
+
def test(x, out1, out2):
|
473
|
+
i = cuda.grid(1)
|
474
|
+
if i < x.size:
|
475
|
+
out1[i] = x.value1[i]
|
476
|
+
out2[i] = x.value2[i]
|
477
|
+
|
478
|
+
got1 = np.zeros_like(expect1)
|
479
|
+
got2 = np.zeros_like(expect2)
|
480
|
+
cuda.jit(test)[1, a.size](a, got1, got2)
|
481
|
+
|
482
|
+
np.testing.assert_array_equal(expect1, got1)
|
483
|
+
np.testing.assert_array_equal(expect2, got2)
|
484
|
+
|
485
|
+
|
486
|
+
class TestCoreContiguous(CUDATestCase):
|
487
|
+
def _test_against_array_core(self, view):
|
488
|
+
self.assertEqual(
|
489
|
+
devicearray.is_contiguous(view),
|
490
|
+
devicearray.array_core(view).flags['C_CONTIGUOUS']
|
491
|
+
)
|
492
|
+
|
493
|
+
def test_device_array_like_1d(self):
|
494
|
+
d_a = cuda.device_array(10, order='C')
|
495
|
+
self._test_against_array_core(d_a)
|
496
|
+
|
497
|
+
def test_device_array_like_2d(self):
|
498
|
+
d_a = cuda.device_array((10, 12), order='C')
|
499
|
+
self._test_against_array_core(d_a)
|
500
|
+
|
501
|
+
def test_device_array_like_2d_transpose(self):
|
502
|
+
d_a = cuda.device_array((10, 12), order='C')
|
503
|
+
self._test_against_array_core(d_a.T)
|
504
|
+
|
505
|
+
def test_device_array_like_3d(self):
|
506
|
+
d_a = cuda.device_array((10, 12, 14), order='C')
|
507
|
+
self._test_against_array_core(d_a)
|
508
|
+
|
509
|
+
def test_device_array_like_1d_f(self):
|
510
|
+
d_a = cuda.device_array(10, order='F')
|
511
|
+
self._test_against_array_core(d_a)
|
512
|
+
|
513
|
+
def test_device_array_like_2d_f(self):
|
514
|
+
d_a = cuda.device_array((10, 12), order='F')
|
515
|
+
self._test_against_array_core(d_a)
|
516
|
+
|
517
|
+
def test_device_array_like_2d_f_transpose(self):
|
518
|
+
d_a = cuda.device_array((10, 12), order='F')
|
519
|
+
self._test_against_array_core(d_a.T)
|
520
|
+
|
521
|
+
def test_device_array_like_3d_f(self):
|
522
|
+
d_a = cuda.device_array((10, 12, 14), order='F')
|
523
|
+
self._test_against_array_core(d_a)
|
524
|
+
|
525
|
+
def test_1d_view(self):
|
526
|
+
shape = 10
|
527
|
+
view = np.zeros(shape)[::2]
|
528
|
+
self._test_against_array_core(view)
|
529
|
+
|
530
|
+
def test_1d_view_f(self):
|
531
|
+
shape = 10
|
532
|
+
view = np.zeros(shape, order='F')[::2]
|
533
|
+
self._test_against_array_core(view)
|
534
|
+
|
535
|
+
def test_2d_view(self):
|
536
|
+
shape = (10, 12)
|
537
|
+
view = np.zeros(shape)[::2, ::2]
|
538
|
+
self._test_against_array_core(view)
|
539
|
+
|
540
|
+
def test_2d_view_f(self):
|
541
|
+
shape = (10, 12)
|
542
|
+
view = np.zeros(shape, order='F')[::2, ::2]
|
543
|
+
self._test_against_array_core(view)
|
544
|
+
|
545
|
+
|
546
|
+
if __name__ == '__main__':
|
547
|
+
unittest.main()
|