numba-cuda 0.0.0__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +1 -0
- _numba_cuda_redirector.py +74 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +5 -0
- numba_cuda/_version.py +19 -0
- numba_cuda/numba/cuda/__init__.py +22 -0
- numba_cuda/numba/cuda/api.py +526 -0
- numba_cuda/numba/cuda/api_util.py +30 -0
- numba_cuda/numba/cuda/args.py +77 -0
- numba_cuda/numba/cuda/cg.py +62 -0
- numba_cuda/numba/cuda/codegen.py +378 -0
- numba_cuda/numba/cuda/compiler.py +422 -0
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
- numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
- numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
- numba_cuda/numba/cuda/cuda_paths.py +258 -0
- numba_cuda/numba/cuda/cudadecl.py +806 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
- numba_cuda/numba/cuda/cudadrv/error.py +36 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
- numba_cuda/numba/cuda/cudaimpl.py +1055 -0
- numba_cuda/numba/cuda/cudamath.py +140 -0
- numba_cuda/numba/cuda/decorators.py +189 -0
- numba_cuda/numba/cuda/descriptor.py +33 -0
- numba_cuda/numba/cuda/device_init.py +89 -0
- numba_cuda/numba/cuda/deviceufunc.py +908 -0
- numba_cuda/numba/cuda/dispatcher.py +1057 -0
- numba_cuda/numba/cuda/errors.py +59 -0
- numba_cuda/numba/cuda/extending.py +7 -0
- numba_cuda/numba/cuda/initialize.py +13 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
- numba_cuda/numba/cuda/intrinsics.py +198 -0
- numba_cuda/numba/cuda/kernels/__init__.py +0 -0
- numba_cuda/numba/cuda/kernels/reduction.py +262 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3382 -0
- numba_cuda/numba/cuda/libdevicedecl.py +17 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
- numba_cuda/numba/cuda/mathimpl.py +448 -0
- numba_cuda/numba/cuda/models.py +48 -0
- numba_cuda/numba/cuda/nvvmutils.py +235 -0
- numba_cuda/numba/cuda/printimpl.py +86 -0
- numba_cuda/numba/cuda/random.py +292 -0
- numba_cuda/numba/cuda/simulator/__init__.py +38 -0
- numba_cuda/numba/cuda/simulator/api.py +110 -0
- numba_cuda/numba/cuda/simulator/compiler.py +9 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
- numba_cuda/numba/cuda/simulator/kernel.py +308 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
- numba_cuda/numba/cuda/simulator/reduction.py +15 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
- numba_cuda/numba/cuda/simulator_init.py +17 -0
- numba_cuda/numba/cuda/stubs.py +902 -0
- numba_cuda/numba/cuda/target.py +440 -0
- numba_cuda/numba/cuda/testing.py +202 -0
- numba_cuda/numba/cuda/tests/__init__.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +7 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
- numba_cuda/numba/cuda/types.py +37 -0
- numba_cuda/numba/cuda/ufuncs.py +662 -0
- numba_cuda/numba/cuda/vector_types.py +209 -0
- numba_cuda/numba/cuda/vectorizers.py +252 -0
- numba_cuda-0.0.12.dist-info/LICENSE +25 -0
- numba_cuda-0.0.12.dist-info/METADATA +68 -0
- numba_cuda-0.0.12.dist-info/RECORD +231 -0
- {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
- numba_cuda-0.0.0.dist-info/METADATA +0 -6
- numba_cuda-0.0.0.dist-info/RECORD +0 -5
- {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,260 @@
|
|
1
|
+
import numpy as np
|
2
|
+
|
3
|
+
from numba.cuda.testing import unittest, CUDATestCase
|
4
|
+
from numba.cuda.testing import skip_on_cudasim, skip_unless_cudasim
|
5
|
+
from numba import config, cuda
|
6
|
+
|
7
|
+
|
8
|
+
if config.ENABLE_CUDASIM:
|
9
|
+
ARRAY_LIKE_FUNCTIONS = (cuda.device_array_like, cuda.pinned_array_like)
|
10
|
+
else:
|
11
|
+
ARRAY_LIKE_FUNCTIONS = (cuda.device_array_like, cuda.mapped_array_like,
|
12
|
+
cuda.pinned_array_like)
|
13
|
+
|
14
|
+
|
15
|
+
class TestCudaArray(CUDATestCase):
|
16
|
+
def test_gpu_array_zero_length(self):
|
17
|
+
x = np.arange(0)
|
18
|
+
dx = cuda.to_device(x)
|
19
|
+
hx = dx.copy_to_host()
|
20
|
+
self.assertEqual(x.shape, dx.shape)
|
21
|
+
self.assertEqual(x.size, dx.size)
|
22
|
+
self.assertEqual(x.shape, hx.shape)
|
23
|
+
self.assertEqual(x.size, hx.size)
|
24
|
+
|
25
|
+
def test_null_shape(self):
|
26
|
+
null_shape = ()
|
27
|
+
shape1 = cuda.device_array(()).shape
|
28
|
+
shape2 = cuda.device_array_like(np.ndarray(())).shape
|
29
|
+
self.assertEqual(shape1, null_shape)
|
30
|
+
self.assertEqual(shape2, null_shape)
|
31
|
+
|
32
|
+
def test_gpu_array_strided(self):
|
33
|
+
|
34
|
+
@cuda.jit('void(double[:])')
|
35
|
+
def kernel(x):
|
36
|
+
i = cuda.grid(1)
|
37
|
+
if i < x.shape[0]:
|
38
|
+
x[i] = i
|
39
|
+
|
40
|
+
x = np.arange(10, dtype=np.double)
|
41
|
+
y = np.ndarray(shape=10 * 8, buffer=x, dtype=np.byte)
|
42
|
+
z = np.ndarray(9, buffer=y[4:-4], dtype=np.double)
|
43
|
+
kernel[10, 10](z)
|
44
|
+
self.assertTrue(np.allclose(z, list(range(9))))
|
45
|
+
|
46
|
+
def test_gpu_array_interleaved(self):
|
47
|
+
|
48
|
+
@cuda.jit('void(double[:], double[:])')
|
49
|
+
def copykernel(x, y):
|
50
|
+
i = cuda.grid(1)
|
51
|
+
if i < x.shape[0]:
|
52
|
+
x[i] = i
|
53
|
+
y[i] = i
|
54
|
+
|
55
|
+
x = np.arange(10, dtype=np.double)
|
56
|
+
y = x[:-1:2]
|
57
|
+
# z = x[1::2]
|
58
|
+
# n = y.size
|
59
|
+
try:
|
60
|
+
cuda.devicearray.auto_device(y)
|
61
|
+
except ValueError:
|
62
|
+
pass
|
63
|
+
else:
|
64
|
+
raise AssertionError("Should raise exception complaining the "
|
65
|
+
"contiguous-ness of the array.")
|
66
|
+
# Should we handle this use case?
|
67
|
+
# assert z.size == y.size
|
68
|
+
# copykernel[1, n](y, x)
|
69
|
+
# print(y, z)
|
70
|
+
# assert np.all(y == z)
|
71
|
+
# assert np.all(y == list(range(n)))
|
72
|
+
|
73
|
+
def test_auto_device_const(self):
|
74
|
+
d, _ = cuda.devicearray.auto_device(2)
|
75
|
+
self.assertTrue(np.all(d.copy_to_host() == np.array(2)))
|
76
|
+
|
77
|
+
def _test_array_like_same(self, like_func, array):
|
78
|
+
"""
|
79
|
+
Tests of *_array_like where shape, strides, dtype, and flags should
|
80
|
+
all be equal.
|
81
|
+
"""
|
82
|
+
array_like = like_func(array)
|
83
|
+
self.assertEqual(array.shape, array_like.shape)
|
84
|
+
self.assertEqual(array.strides, array_like.strides)
|
85
|
+
self.assertEqual(array.dtype, array_like.dtype)
|
86
|
+
self.assertEqual(array.flags['C_CONTIGUOUS'],
|
87
|
+
array_like.flags['C_CONTIGUOUS'])
|
88
|
+
self.assertEqual(array.flags['F_CONTIGUOUS'],
|
89
|
+
array_like.flags['F_CONTIGUOUS'])
|
90
|
+
|
91
|
+
def test_array_like_1d(self):
|
92
|
+
d_a = cuda.device_array(10, order='C')
|
93
|
+
for like_func in ARRAY_LIKE_FUNCTIONS:
|
94
|
+
with self.subTest(like_func=like_func):
|
95
|
+
self._test_array_like_same(like_func, d_a)
|
96
|
+
|
97
|
+
def test_array_like_2d(self):
|
98
|
+
d_a = cuda.device_array((10, 12), order='C')
|
99
|
+
for like_func in ARRAY_LIKE_FUNCTIONS:
|
100
|
+
with self.subTest(like_func=like_func):
|
101
|
+
self._test_array_like_same(like_func, d_a)
|
102
|
+
|
103
|
+
def test_array_like_2d_transpose(self):
|
104
|
+
d_a = cuda.device_array((10, 12), order='C')
|
105
|
+
for like_func in ARRAY_LIKE_FUNCTIONS:
|
106
|
+
with self.subTest(like_func=like_func):
|
107
|
+
self._test_array_like_same(like_func, d_a)
|
108
|
+
|
109
|
+
def test_array_like_3d(self):
|
110
|
+
d_a = cuda.device_array((10, 12, 14), order='C')
|
111
|
+
for like_func in ARRAY_LIKE_FUNCTIONS:
|
112
|
+
with self.subTest(like_func=like_func):
|
113
|
+
self._test_array_like_same(like_func, d_a)
|
114
|
+
|
115
|
+
def test_array_like_1d_f(self):
|
116
|
+
d_a = cuda.device_array(10, order='F')
|
117
|
+
for like_func in ARRAY_LIKE_FUNCTIONS:
|
118
|
+
with self.subTest(like_func=like_func):
|
119
|
+
self._test_array_like_same(like_func, d_a)
|
120
|
+
|
121
|
+
def test_array_like_2d_f(self):
|
122
|
+
d_a = cuda.device_array((10, 12), order='F')
|
123
|
+
for like_func in ARRAY_LIKE_FUNCTIONS:
|
124
|
+
with self.subTest(like_func=like_func):
|
125
|
+
self._test_array_like_same(like_func, d_a)
|
126
|
+
|
127
|
+
def test_array_like_2d_f_transpose(self):
|
128
|
+
d_a = cuda.device_array((10, 12), order='F')
|
129
|
+
for like_func in ARRAY_LIKE_FUNCTIONS:
|
130
|
+
with self.subTest(like_func=like_func):
|
131
|
+
self._test_array_like_same(like_func, d_a)
|
132
|
+
|
133
|
+
def test_array_like_3d_f(self):
|
134
|
+
d_a = cuda.device_array((10, 12, 14), order='F')
|
135
|
+
for like_func in ARRAY_LIKE_FUNCTIONS:
|
136
|
+
with self.subTest(like_func=like_func):
|
137
|
+
self._test_array_like_same(like_func, d_a)
|
138
|
+
|
139
|
+
def _test_array_like_view(self, like_func, view, d_view):
|
140
|
+
"""
|
141
|
+
Tests of device_array_like where the original array is a view - the
|
142
|
+
strides should not be equal because a contiguous array is expected.
|
143
|
+
"""
|
144
|
+
nb_like = like_func(d_view)
|
145
|
+
self.assertEqual(d_view.shape, nb_like.shape)
|
146
|
+
self.assertEqual(d_view.dtype, nb_like.dtype)
|
147
|
+
|
148
|
+
# Use NumPy as a reference for the expected strides
|
149
|
+
np_like = np.zeros_like(view)
|
150
|
+
self.assertEqual(nb_like.strides, np_like.strides)
|
151
|
+
self.assertEqual(nb_like.flags['C_CONTIGUOUS'],
|
152
|
+
np_like.flags['C_CONTIGUOUS'])
|
153
|
+
self.assertEqual(nb_like.flags['F_CONTIGUOUS'],
|
154
|
+
np_like.flags['F_CONTIGUOUS'])
|
155
|
+
|
156
|
+
def test_array_like_1d_view(self):
|
157
|
+
shape = 10
|
158
|
+
view = np.zeros(shape)[::2]
|
159
|
+
d_view = cuda.device_array(shape)[::2]
|
160
|
+
for like_func in ARRAY_LIKE_FUNCTIONS:
|
161
|
+
with self.subTest(like_func=like_func):
|
162
|
+
self._test_array_like_view(like_func, view, d_view)
|
163
|
+
|
164
|
+
def test_array_like_1d_view_f(self):
|
165
|
+
shape = 10
|
166
|
+
view = np.zeros(shape, order='F')[::2]
|
167
|
+
d_view = cuda.device_array(shape, order='F')[::2]
|
168
|
+
for like_func in ARRAY_LIKE_FUNCTIONS:
|
169
|
+
with self.subTest(like_func=like_func):
|
170
|
+
self._test_array_like_view(like_func, view, d_view)
|
171
|
+
|
172
|
+
def test_array_like_2d_view(self):
|
173
|
+
shape = (10, 12)
|
174
|
+
view = np.zeros(shape)[::2, ::2]
|
175
|
+
d_view = cuda.device_array(shape)[::2, ::2]
|
176
|
+
for like_func in ARRAY_LIKE_FUNCTIONS:
|
177
|
+
with self.subTest(like_func=like_func):
|
178
|
+
self._test_array_like_view(like_func, view, d_view)
|
179
|
+
|
180
|
+
def test_array_like_2d_view_f(self):
|
181
|
+
shape = (10, 12)
|
182
|
+
view = np.zeros(shape, order='F')[::2, ::2]
|
183
|
+
d_view = cuda.device_array(shape, order='F')[::2, ::2]
|
184
|
+
for like_func in ARRAY_LIKE_FUNCTIONS:
|
185
|
+
with self.subTest(like_func=like_func):
|
186
|
+
self._test_array_like_view(like_func, view, d_view)
|
187
|
+
|
188
|
+
@skip_on_cudasim('Numba and NumPy stride semantics differ for transpose')
|
189
|
+
def test_array_like_2d_view_transpose_device(self):
|
190
|
+
shape = (10, 12)
|
191
|
+
d_view = cuda.device_array(shape)[::2, ::2].T
|
192
|
+
for like_func in ARRAY_LIKE_FUNCTIONS:
|
193
|
+
with self.subTest(like_func=like_func):
|
194
|
+
# This is a special case (see issue #4974) because creating the
|
195
|
+
# transpose creates a new contiguous allocation with different
|
196
|
+
# strides. In this case, rather than comparing against NumPy,
|
197
|
+
# we can only compare against expected values.
|
198
|
+
like = like_func(d_view)
|
199
|
+
self.assertEqual(d_view.shape, like.shape)
|
200
|
+
self.assertEqual(d_view.dtype, like.dtype)
|
201
|
+
self.assertEqual((40, 8), like.strides)
|
202
|
+
self.assertTrue(like.flags['C_CONTIGUOUS'])
|
203
|
+
self.assertFalse(like.flags['F_CONTIGUOUS'])
|
204
|
+
|
205
|
+
@skip_unless_cudasim('Numba and NumPy stride semantics differ for '
|
206
|
+
'transpose')
|
207
|
+
def test_array_like_2d_view_transpose_simulator(self):
|
208
|
+
shape = (10, 12)
|
209
|
+
view = np.zeros(shape)[::2, ::2].T
|
210
|
+
d_view = cuda.device_array(shape)[::2, ::2].T
|
211
|
+
for like_func in ARRAY_LIKE_FUNCTIONS:
|
212
|
+
with self.subTest(like_func=like_func):
|
213
|
+
# On the simulator, the transpose has different strides to on a
|
214
|
+
# CUDA device (See issue #4974). Here we can compare strides
|
215
|
+
# against NumPy as a reference.
|
216
|
+
np_like = np.zeros_like(view)
|
217
|
+
nb_like = like_func(d_view)
|
218
|
+
self.assertEqual(d_view.shape, nb_like.shape)
|
219
|
+
self.assertEqual(d_view.dtype, nb_like.dtype)
|
220
|
+
self.assertEqual(np_like.strides, nb_like.strides)
|
221
|
+
self.assertEqual(np_like.flags['C_CONTIGUOUS'],
|
222
|
+
nb_like.flags['C_CONTIGUOUS'])
|
223
|
+
self.assertEqual(np_like.flags['F_CONTIGUOUS'],
|
224
|
+
nb_like.flags['F_CONTIGUOUS'])
|
225
|
+
|
226
|
+
def test_array_like_2d_view_f_transpose(self):
|
227
|
+
shape = (10, 12)
|
228
|
+
view = np.zeros(shape, order='F')[::2, ::2].T
|
229
|
+
d_view = cuda.device_array(shape, order='F')[::2, ::2].T
|
230
|
+
for like_func in ARRAY_LIKE_FUNCTIONS:
|
231
|
+
with self.subTest(like_func=like_func):
|
232
|
+
self._test_array_like_view(like_func, view, d_view)
|
233
|
+
|
234
|
+
@skip_on_cudasim('Kernel overloads not created in the simulator')
|
235
|
+
def test_issue_4628(self):
|
236
|
+
# CUDA Device arrays were reported as always being typed with 'A' order
|
237
|
+
# so launching the kernel with a host array and then a device array
|
238
|
+
# resulted in two overloads being compiled - one for 'C' order from
|
239
|
+
# the host array, and one for 'A' order from the device array. With the
|
240
|
+
# resolution of this issue, the order of the device array is also 'C',
|
241
|
+
# so after the kernel launches there should only be one overload of
|
242
|
+
# the function.
|
243
|
+
@cuda.jit
|
244
|
+
def func(A, out):
|
245
|
+
i = cuda.grid(1)
|
246
|
+
out[i] = A[i] * 2
|
247
|
+
|
248
|
+
n = 128
|
249
|
+
a = np.ones((n,))
|
250
|
+
d_a = cuda.to_device(a)
|
251
|
+
result = np.zeros((n,))
|
252
|
+
|
253
|
+
func[1, 128](a, result)
|
254
|
+
func[1, 128](d_a, result)
|
255
|
+
|
256
|
+
self.assertEqual(1, len(func.overloads))
|
257
|
+
|
258
|
+
|
259
|
+
if __name__ == '__main__':
|
260
|
+
unittest.main()
|
@@ -0,0 +1,201 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from collections import namedtuple
|
3
|
+
|
4
|
+
from numba import cuda
|
5
|
+
from numba.cuda.testing import unittest, CUDATestCase
|
6
|
+
|
7
|
+
|
8
|
+
class TestCudaArrayArg(CUDATestCase):
|
9
|
+
def test_array_ary(self):
|
10
|
+
|
11
|
+
@cuda.jit('double(double[:],int64)', device=True, inline=True)
|
12
|
+
def device_function(a, c):
|
13
|
+
return a[c]
|
14
|
+
|
15
|
+
@cuda.jit('void(double[:],double[:])')
|
16
|
+
def kernel(x, y):
|
17
|
+
i = cuda.grid(1)
|
18
|
+
y[i] = device_function(x, i)
|
19
|
+
|
20
|
+
x = np.arange(10, dtype=np.double)
|
21
|
+
y = np.zeros_like(x)
|
22
|
+
kernel[10, 1](x, y)
|
23
|
+
self.assertTrue(np.all(x == y))
|
24
|
+
|
25
|
+
def test_unituple(self):
|
26
|
+
@cuda.jit
|
27
|
+
def f(r, x):
|
28
|
+
r[0] = x[0]
|
29
|
+
r[1] = x[1]
|
30
|
+
r[2] = x[2]
|
31
|
+
|
32
|
+
x = (1, 2, 3)
|
33
|
+
r = np.zeros(len(x), dtype=np.int64)
|
34
|
+
f[1, 1](r, x)
|
35
|
+
|
36
|
+
for i in range(len(x)):
|
37
|
+
self.assertEqual(r[i], x[i])
|
38
|
+
|
39
|
+
def test_tuple(self):
|
40
|
+
@cuda.jit
|
41
|
+
def f(r1, r2, x):
|
42
|
+
r1[0] = x[0]
|
43
|
+
r1[1] = x[1]
|
44
|
+
r1[2] = x[2]
|
45
|
+
r2[0] = x[3]
|
46
|
+
r2[1] = x[4]
|
47
|
+
r2[2] = x[5]
|
48
|
+
|
49
|
+
x = (1, 2, 3, 4.5, 5.5, 6.5)
|
50
|
+
r1 = np.zeros(len(x) // 2, dtype=np.int64)
|
51
|
+
r2 = np.zeros(len(x) // 2, dtype=np.float64)
|
52
|
+
f[1, 1](r1, r2, x)
|
53
|
+
|
54
|
+
for i in range(len(r1)):
|
55
|
+
self.assertEqual(r1[i], x[i])
|
56
|
+
|
57
|
+
for i in range(len(r2)):
|
58
|
+
self.assertEqual(r2[i], x[i + len(r1)])
|
59
|
+
|
60
|
+
def test_namedunituple(self):
|
61
|
+
@cuda.jit
|
62
|
+
def f(r, x):
|
63
|
+
r[0] = x.x
|
64
|
+
r[1] = x.y
|
65
|
+
|
66
|
+
Point = namedtuple('Point', ('x', 'y'))
|
67
|
+
x = Point(1, 2)
|
68
|
+
r = np.zeros(len(x), dtype=np.int64)
|
69
|
+
f[1, 1](r, x)
|
70
|
+
|
71
|
+
self.assertEqual(r[0], x.x)
|
72
|
+
self.assertEqual(r[1], x.y)
|
73
|
+
|
74
|
+
def test_namedtuple(self):
|
75
|
+
@cuda.jit
|
76
|
+
def f(r1, r2, x):
|
77
|
+
r1[0] = x.x
|
78
|
+
r1[1] = x.y
|
79
|
+
r2[0] = x.r
|
80
|
+
|
81
|
+
Point = namedtuple('Point', ('x', 'y', 'r'))
|
82
|
+
x = Point(1, 2, 2.236)
|
83
|
+
r1 = np.zeros(2, dtype=np.int64)
|
84
|
+
r2 = np.zeros(1, dtype=np.float64)
|
85
|
+
f[1, 1](r1, r2, x)
|
86
|
+
|
87
|
+
self.assertEqual(r1[0], x.x)
|
88
|
+
self.assertEqual(r1[1], x.y)
|
89
|
+
self.assertEqual(r2[0], x.r)
|
90
|
+
|
91
|
+
def test_empty_tuple(self):
|
92
|
+
@cuda.jit
|
93
|
+
def f(r, x):
|
94
|
+
r[0] = len(x)
|
95
|
+
|
96
|
+
x = tuple()
|
97
|
+
r = np.ones(1, dtype=np.int64)
|
98
|
+
f[1, 1](r, x)
|
99
|
+
|
100
|
+
self.assertEqual(r[0], 0)
|
101
|
+
|
102
|
+
def test_tuple_of_empty_tuples(self):
|
103
|
+
@cuda.jit
|
104
|
+
def f(r, x):
|
105
|
+
r[0] = len(x)
|
106
|
+
r[1] = len(x[0])
|
107
|
+
|
108
|
+
x = ((), (), ())
|
109
|
+
r = np.ones(2, dtype=np.int64)
|
110
|
+
f[1, 1](r, x)
|
111
|
+
|
112
|
+
self.assertEqual(r[0], 3)
|
113
|
+
self.assertEqual(r[1], 0)
|
114
|
+
|
115
|
+
def test_tuple_of_tuples(self):
|
116
|
+
@cuda.jit
|
117
|
+
def f(r, x):
|
118
|
+
r[0] = len(x)
|
119
|
+
r[1] = len(x[0])
|
120
|
+
r[2] = len(x[1])
|
121
|
+
r[3] = len(x[2])
|
122
|
+
r[4] = x[1][0]
|
123
|
+
r[5] = x[1][1]
|
124
|
+
r[6] = x[2][0]
|
125
|
+
r[7] = x[2][1]
|
126
|
+
r[8] = x[2][2]
|
127
|
+
|
128
|
+
x = ((), (5, 6), (8, 9, 10))
|
129
|
+
r = np.ones(9, dtype=np.int64)
|
130
|
+
f[1, 1](r, x)
|
131
|
+
|
132
|
+
self.assertEqual(r[0], 3)
|
133
|
+
self.assertEqual(r[1], 0)
|
134
|
+
self.assertEqual(r[2], 2)
|
135
|
+
self.assertEqual(r[3], 3)
|
136
|
+
self.assertEqual(r[4], 5)
|
137
|
+
self.assertEqual(r[5], 6)
|
138
|
+
self.assertEqual(r[6], 8)
|
139
|
+
self.assertEqual(r[7], 9)
|
140
|
+
self.assertEqual(r[8], 10)
|
141
|
+
|
142
|
+
def test_tuple_of_tuples_and_scalars(self):
|
143
|
+
@cuda.jit
|
144
|
+
def f(r, x):
|
145
|
+
r[0] = len(x)
|
146
|
+
r[1] = len(x[0])
|
147
|
+
r[2] = x[0][0]
|
148
|
+
r[3] = x[0][1]
|
149
|
+
r[4] = x[0][2]
|
150
|
+
r[5] = x[1]
|
151
|
+
|
152
|
+
x = ((6, 5, 4), 7)
|
153
|
+
r = np.ones(9, dtype=np.int64)
|
154
|
+
f[1, 1](r, x)
|
155
|
+
|
156
|
+
self.assertEqual(r[0], 2)
|
157
|
+
self.assertEqual(r[1], 3)
|
158
|
+
self.assertEqual(r[2], 6)
|
159
|
+
self.assertEqual(r[3], 5)
|
160
|
+
self.assertEqual(r[4], 4)
|
161
|
+
self.assertEqual(r[5], 7)
|
162
|
+
|
163
|
+
def test_tuple_of_arrays(self):
|
164
|
+
@cuda.jit
|
165
|
+
def f(x):
|
166
|
+
i = cuda.grid(1)
|
167
|
+
if i < len(x[0]):
|
168
|
+
x[0][i] = x[1][i] + x[2][i]
|
169
|
+
|
170
|
+
N = 10
|
171
|
+
x0 = np.zeros(N)
|
172
|
+
x1 = np.ones_like(x0)
|
173
|
+
x2 = x1 * 3
|
174
|
+
x = (x0, x1, x2)
|
175
|
+
f[1, N](x)
|
176
|
+
|
177
|
+
np.testing.assert_equal(x0, x1 + x2)
|
178
|
+
|
179
|
+
def test_tuple_of_array_scalar_tuple(self):
|
180
|
+
@cuda.jit
|
181
|
+
def f(r, x):
|
182
|
+
r[0] = x[0][0]
|
183
|
+
r[1] = x[0][1]
|
184
|
+
r[2] = x[1]
|
185
|
+
r[3] = x[2][0]
|
186
|
+
r[4] = x[2][1]
|
187
|
+
|
188
|
+
z = np.arange(2, dtype=np.int64)
|
189
|
+
x = (2 * z, 10, (4, 3))
|
190
|
+
r = np.zeros(5, dtype=np.int64)
|
191
|
+
f[1, 1](r, x)
|
192
|
+
|
193
|
+
self.assertEqual(r[0], 0)
|
194
|
+
self.assertEqual(r[1], 2)
|
195
|
+
self.assertEqual(r[2], 10)
|
196
|
+
self.assertEqual(r[3], 4)
|
197
|
+
self.assertEqual(r[4], 3)
|
198
|
+
|
199
|
+
|
200
|
+
if __name__ == '__main__':
|
201
|
+
unittest.main()
|
@@ -0,0 +1,35 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from numba import cuda
|
3
|
+
from numba.cuda.testing import CUDATestCase
|
4
|
+
import unittest
|
5
|
+
|
6
|
+
|
7
|
+
def reinterpret_array_type(byte_arr, start, stop, output):
|
8
|
+
# Tested with just one thread
|
9
|
+
val = byte_arr[start:stop].view(np.int32)[0]
|
10
|
+
output[0] = val
|
11
|
+
|
12
|
+
|
13
|
+
class TestCudaArrayMethods(CUDATestCase):
|
14
|
+
def test_reinterpret_array_type(self):
|
15
|
+
"""
|
16
|
+
Reinterpret byte array as int32 in the GPU.
|
17
|
+
"""
|
18
|
+
pyfunc = reinterpret_array_type
|
19
|
+
kernel = cuda.jit(pyfunc)
|
20
|
+
|
21
|
+
byte_arr = np.arange(256, dtype=np.uint8)
|
22
|
+
itemsize = np.dtype(np.int32).itemsize
|
23
|
+
for start in range(0, 256, itemsize):
|
24
|
+
stop = start + itemsize
|
25
|
+
expect = byte_arr[start:stop].view(np.int32)[0]
|
26
|
+
|
27
|
+
output = np.zeros(1, dtype=np.int32)
|
28
|
+
kernel[1, 1](byte_arr, start, stop, output)
|
29
|
+
|
30
|
+
got = output[0]
|
31
|
+
self.assertEqual(expect, got)
|
32
|
+
|
33
|
+
|
34
|
+
if __name__ == '__main__':
|
35
|
+
unittest.main()
|