numba-cuda 0.0.0__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +1 -0
- _numba_cuda_redirector.py +74 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +5 -0
- numba_cuda/_version.py +19 -0
- numba_cuda/numba/cuda/__init__.py +22 -0
- numba_cuda/numba/cuda/api.py +526 -0
- numba_cuda/numba/cuda/api_util.py +30 -0
- numba_cuda/numba/cuda/args.py +77 -0
- numba_cuda/numba/cuda/cg.py +62 -0
- numba_cuda/numba/cuda/codegen.py +378 -0
- numba_cuda/numba/cuda/compiler.py +422 -0
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
- numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
- numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
- numba_cuda/numba/cuda/cuda_paths.py +258 -0
- numba_cuda/numba/cuda/cudadecl.py +806 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
- numba_cuda/numba/cuda/cudadrv/error.py +36 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
- numba_cuda/numba/cuda/cudaimpl.py +1055 -0
- numba_cuda/numba/cuda/cudamath.py +140 -0
- numba_cuda/numba/cuda/decorators.py +189 -0
- numba_cuda/numba/cuda/descriptor.py +33 -0
- numba_cuda/numba/cuda/device_init.py +89 -0
- numba_cuda/numba/cuda/deviceufunc.py +908 -0
- numba_cuda/numba/cuda/dispatcher.py +1057 -0
- numba_cuda/numba/cuda/errors.py +59 -0
- numba_cuda/numba/cuda/extending.py +7 -0
- numba_cuda/numba/cuda/initialize.py +13 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
- numba_cuda/numba/cuda/intrinsics.py +198 -0
- numba_cuda/numba/cuda/kernels/__init__.py +0 -0
- numba_cuda/numba/cuda/kernels/reduction.py +262 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3382 -0
- numba_cuda/numba/cuda/libdevicedecl.py +17 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
- numba_cuda/numba/cuda/mathimpl.py +448 -0
- numba_cuda/numba/cuda/models.py +48 -0
- numba_cuda/numba/cuda/nvvmutils.py +235 -0
- numba_cuda/numba/cuda/printimpl.py +86 -0
- numba_cuda/numba/cuda/random.py +292 -0
- numba_cuda/numba/cuda/simulator/__init__.py +38 -0
- numba_cuda/numba/cuda/simulator/api.py +110 -0
- numba_cuda/numba/cuda/simulator/compiler.py +9 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
- numba_cuda/numba/cuda/simulator/kernel.py +308 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
- numba_cuda/numba/cuda/simulator/reduction.py +15 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
- numba_cuda/numba/cuda/simulator_init.py +17 -0
- numba_cuda/numba/cuda/stubs.py +902 -0
- numba_cuda/numba/cuda/target.py +440 -0
- numba_cuda/numba/cuda/testing.py +202 -0
- numba_cuda/numba/cuda/tests/__init__.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +7 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
- numba_cuda/numba/cuda/types.py +37 -0
- numba_cuda/numba/cuda/ufuncs.py +662 -0
- numba_cuda/numba/cuda/vector_types.py +209 -0
- numba_cuda/numba/cuda/vectorizers.py +252 -0
- numba_cuda-0.0.12.dist-info/LICENSE +25 -0
- numba_cuda-0.0.12.dist-info/METADATA +68 -0
- numba_cuda-0.0.12.dist-info/RECORD +231 -0
- {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
- numba_cuda-0.0.0.dist-info/METADATA +0 -6
- numba_cuda-0.0.0.dist-info/RECORD +0 -5
- {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,145 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from numba import cuda
|
3
|
+
from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
|
4
|
+
|
5
|
+
|
6
|
+
class TestArrayAttr(CUDATestCase):
|
7
|
+
|
8
|
+
def test_contigous_2d(self):
|
9
|
+
ary = np.arange(10)
|
10
|
+
cary = ary.reshape(2, 5)
|
11
|
+
fary = np.asfortranarray(cary)
|
12
|
+
|
13
|
+
dcary = cuda.to_device(cary)
|
14
|
+
dfary = cuda.to_device(fary)
|
15
|
+
self.assertTrue(dcary.is_c_contiguous())
|
16
|
+
self.assertTrue(not dfary.is_c_contiguous())
|
17
|
+
self.assertTrue(not dcary.is_f_contiguous())
|
18
|
+
self.assertTrue(dfary.is_f_contiguous())
|
19
|
+
|
20
|
+
def test_contigous_3d(self):
|
21
|
+
ary = np.arange(20)
|
22
|
+
cary = ary.reshape(2, 5, 2)
|
23
|
+
fary = np.asfortranarray(cary)
|
24
|
+
|
25
|
+
dcary = cuda.to_device(cary)
|
26
|
+
dfary = cuda.to_device(fary)
|
27
|
+
self.assertTrue(dcary.is_c_contiguous())
|
28
|
+
self.assertTrue(not dfary.is_c_contiguous())
|
29
|
+
self.assertTrue(not dcary.is_f_contiguous())
|
30
|
+
self.assertTrue(dfary.is_f_contiguous())
|
31
|
+
|
32
|
+
def test_contigous_4d(self):
|
33
|
+
ary = np.arange(60)
|
34
|
+
cary = ary.reshape(2, 5, 2, 3)
|
35
|
+
fary = np.asfortranarray(cary)
|
36
|
+
|
37
|
+
dcary = cuda.to_device(cary)
|
38
|
+
dfary = cuda.to_device(fary)
|
39
|
+
self.assertTrue(dcary.is_c_contiguous())
|
40
|
+
self.assertTrue(not dfary.is_c_contiguous())
|
41
|
+
self.assertTrue(not dcary.is_f_contiguous())
|
42
|
+
self.assertTrue(dfary.is_f_contiguous())
|
43
|
+
|
44
|
+
def test_ravel_1d(self):
|
45
|
+
ary = np.arange(60)
|
46
|
+
dary = cuda.to_device(ary)
|
47
|
+
for order in 'CFA':
|
48
|
+
expect = ary.ravel(order=order)
|
49
|
+
dflat = dary.ravel(order=order)
|
50
|
+
flat = dflat.copy_to_host()
|
51
|
+
self.assertTrue(dary is not dflat) # ravel returns new array
|
52
|
+
self.assertEqual(flat.ndim, 1)
|
53
|
+
self.assertPreciseEqual(expect, flat)
|
54
|
+
|
55
|
+
@skip_on_cudasim('CUDA Array Interface is not supported in the simulator')
|
56
|
+
def test_ravel_stride_1d(self):
|
57
|
+
ary = np.arange(60)
|
58
|
+
dary = cuda.to_device(ary)
|
59
|
+
# No-copy stride device array
|
60
|
+
darystride = dary[::2]
|
61
|
+
dary_data = dary.__cuda_array_interface__['data'][0]
|
62
|
+
ddarystride_data = darystride.__cuda_array_interface__['data'][0]
|
63
|
+
self.assertEqual(dary_data, ddarystride_data)
|
64
|
+
# Fail on ravel on non-contiguous array
|
65
|
+
with self.assertRaises(NotImplementedError):
|
66
|
+
darystride.ravel()
|
67
|
+
|
68
|
+
def test_ravel_c(self):
|
69
|
+
ary = np.arange(60)
|
70
|
+
reshaped = ary.reshape(2, 5, 2, 3)
|
71
|
+
|
72
|
+
expect = reshaped.ravel(order='C')
|
73
|
+
dary = cuda.to_device(reshaped)
|
74
|
+
dflat = dary.ravel()
|
75
|
+
flat = dflat.copy_to_host()
|
76
|
+
self.assertTrue(dary is not dflat)
|
77
|
+
self.assertEqual(flat.ndim, 1)
|
78
|
+
self.assertPreciseEqual(expect, flat)
|
79
|
+
|
80
|
+
# explicit order kwarg
|
81
|
+
for order in 'CA':
|
82
|
+
expect = reshaped.ravel(order=order)
|
83
|
+
dary = cuda.to_device(reshaped)
|
84
|
+
dflat = dary.ravel(order=order)
|
85
|
+
flat = dflat.copy_to_host()
|
86
|
+
self.assertTrue(dary is not dflat)
|
87
|
+
self.assertEqual(flat.ndim, 1)
|
88
|
+
self.assertPreciseEqual(expect, flat)
|
89
|
+
|
90
|
+
@skip_on_cudasim('CUDA Array Interface is not supported in the simulator')
|
91
|
+
def test_ravel_stride_c(self):
|
92
|
+
ary = np.arange(60)
|
93
|
+
reshaped = ary.reshape(2, 5, 2, 3)
|
94
|
+
|
95
|
+
dary = cuda.to_device(reshaped)
|
96
|
+
darystride = dary[::2, ::2, ::2, ::2]
|
97
|
+
dary_data = dary.__cuda_array_interface__['data'][0]
|
98
|
+
ddarystride_data = darystride.__cuda_array_interface__['data'][0]
|
99
|
+
self.assertEqual(dary_data, ddarystride_data)
|
100
|
+
with self.assertRaises(NotImplementedError):
|
101
|
+
darystride.ravel()
|
102
|
+
|
103
|
+
def test_ravel_f(self):
|
104
|
+
ary = np.arange(60)
|
105
|
+
reshaped = np.asfortranarray(ary.reshape(2, 5, 2, 3))
|
106
|
+
for order in 'FA':
|
107
|
+
expect = reshaped.ravel(order=order)
|
108
|
+
dary = cuda.to_device(reshaped)
|
109
|
+
dflat = dary.ravel(order=order)
|
110
|
+
flat = dflat.copy_to_host()
|
111
|
+
self.assertTrue(dary is not dflat)
|
112
|
+
self.assertEqual(flat.ndim, 1)
|
113
|
+
self.assertPreciseEqual(expect, flat)
|
114
|
+
|
115
|
+
@skip_on_cudasim('CUDA Array Interface is not supported in the simulator')
|
116
|
+
def test_ravel_stride_f(self):
|
117
|
+
ary = np.arange(60)
|
118
|
+
reshaped = np.asfortranarray(ary.reshape(2, 5, 2, 3))
|
119
|
+
dary = cuda.to_device(reshaped)
|
120
|
+
darystride = dary[::2, ::2, ::2, ::2]
|
121
|
+
dary_data = dary.__cuda_array_interface__['data'][0]
|
122
|
+
ddarystride_data = darystride.__cuda_array_interface__['data'][0]
|
123
|
+
self.assertEqual(dary_data, ddarystride_data)
|
124
|
+
with self.assertRaises(NotImplementedError):
|
125
|
+
darystride.ravel()
|
126
|
+
|
127
|
+
def test_reshape_c(self):
|
128
|
+
ary = np.arange(10)
|
129
|
+
expect = ary.reshape(2, 5)
|
130
|
+
dary = cuda.to_device(ary)
|
131
|
+
dary_reshaped = dary.reshape(2, 5)
|
132
|
+
got = dary_reshaped.copy_to_host()
|
133
|
+
self.assertPreciseEqual(expect, got)
|
134
|
+
|
135
|
+
def test_reshape_f(self):
|
136
|
+
ary = np.arange(10)
|
137
|
+
expect = ary.reshape(2, 5, order='F')
|
138
|
+
dary = cuda.to_device(ary)
|
139
|
+
dary_reshaped = dary.reshape(2, 5, order='F')
|
140
|
+
got = dary_reshaped.copy_to_host()
|
141
|
+
self.assertPreciseEqual(expect, got)
|
142
|
+
|
143
|
+
|
144
|
+
if __name__ == '__main__':
|
145
|
+
unittest.main()
|
@@ -0,0 +1,145 @@
|
|
1
|
+
import numbers
|
2
|
+
from ctypes import byref
|
3
|
+
import weakref
|
4
|
+
|
5
|
+
from numba import cuda
|
6
|
+
from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
|
7
|
+
from numba.cuda.cudadrv import driver
|
8
|
+
|
9
|
+
|
10
|
+
class TestContextStack(CUDATestCase):
|
11
|
+
def setUp(self):
|
12
|
+
super().setUp()
|
13
|
+
# Reset before testing
|
14
|
+
cuda.close()
|
15
|
+
|
16
|
+
def test_gpus_current(self):
|
17
|
+
self.assertIs(cuda.gpus.current, None)
|
18
|
+
with cuda.gpus[0]:
|
19
|
+
self.assertEqual(int(cuda.gpus.current.id), 0)
|
20
|
+
|
21
|
+
def test_gpus_len(self):
|
22
|
+
self.assertGreater(len(cuda.gpus), 0)
|
23
|
+
|
24
|
+
def test_gpus_iter(self):
|
25
|
+
gpulist = list(cuda.gpus)
|
26
|
+
self.assertGreater(len(gpulist), 0)
|
27
|
+
|
28
|
+
|
29
|
+
class TestContextAPI(CUDATestCase):
|
30
|
+
|
31
|
+
def tearDown(self):
|
32
|
+
super().tearDown()
|
33
|
+
cuda.close()
|
34
|
+
|
35
|
+
def test_context_memory(self):
|
36
|
+
try:
|
37
|
+
mem = cuda.current_context().get_memory_info()
|
38
|
+
except NotImplementedError:
|
39
|
+
self.skipTest('EMM Plugin does not implement get_memory_info()')
|
40
|
+
|
41
|
+
self.assertIsInstance(mem.free, numbers.Number)
|
42
|
+
self.assertEqual(mem.free, mem[0])
|
43
|
+
|
44
|
+
self.assertIsInstance(mem.total, numbers.Number)
|
45
|
+
self.assertEqual(mem.total, mem[1])
|
46
|
+
|
47
|
+
self.assertLessEqual(mem.free, mem.total)
|
48
|
+
|
49
|
+
@unittest.skipIf(len(cuda.gpus) < 2, "need more than 1 gpus")
|
50
|
+
@skip_on_cudasim('CUDA HW required')
|
51
|
+
def test_forbidden_context_switch(self):
|
52
|
+
# Cannot switch context inside a `cuda.require_context`
|
53
|
+
@cuda.require_context
|
54
|
+
def switch_gpu():
|
55
|
+
with cuda.gpus[1]:
|
56
|
+
pass
|
57
|
+
|
58
|
+
with cuda.gpus[0]:
|
59
|
+
with self.assertRaises(RuntimeError) as raises:
|
60
|
+
switch_gpu()
|
61
|
+
|
62
|
+
self.assertIn("Cannot switch CUDA-context.", str(raises.exception))
|
63
|
+
|
64
|
+
@unittest.skipIf(len(cuda.gpus) < 2, "need more than 1 gpus")
|
65
|
+
def test_accepted_context_switch(self):
|
66
|
+
def switch_gpu():
|
67
|
+
with cuda.gpus[1]:
|
68
|
+
return cuda.current_context().device.id
|
69
|
+
|
70
|
+
with cuda.gpus[0]:
|
71
|
+
devid = switch_gpu()
|
72
|
+
self.assertEqual(int(devid), 1)
|
73
|
+
|
74
|
+
|
75
|
+
@skip_on_cudasim('CUDA HW required')
|
76
|
+
class Test3rdPartyContext(CUDATestCase):
|
77
|
+
def tearDown(self):
|
78
|
+
super().tearDown()
|
79
|
+
cuda.close()
|
80
|
+
|
81
|
+
def test_attached_primary(self, extra_work=lambda: None):
|
82
|
+
# Emulate primary context creation by 3rd party
|
83
|
+
the_driver = driver.driver
|
84
|
+
if driver.USE_NV_BINDING:
|
85
|
+
dev = driver.binding.CUdevice(0)
|
86
|
+
hctx = the_driver.cuDevicePrimaryCtxRetain(dev)
|
87
|
+
else:
|
88
|
+
dev = 0
|
89
|
+
hctx = driver.drvapi.cu_context()
|
90
|
+
the_driver.cuDevicePrimaryCtxRetain(byref(hctx), dev)
|
91
|
+
try:
|
92
|
+
ctx = driver.Context(weakref.proxy(self), hctx)
|
93
|
+
ctx.push()
|
94
|
+
# Check that the context from numba matches the created primary
|
95
|
+
# context.
|
96
|
+
my_ctx = cuda.current_context()
|
97
|
+
if driver.USE_NV_BINDING:
|
98
|
+
self.assertEqual(int(my_ctx.handle), int(ctx.handle))
|
99
|
+
else:
|
100
|
+
self.assertEqual(my_ctx.handle.value, ctx.handle.value)
|
101
|
+
|
102
|
+
extra_work()
|
103
|
+
finally:
|
104
|
+
ctx.pop()
|
105
|
+
the_driver.cuDevicePrimaryCtxRelease(dev)
|
106
|
+
|
107
|
+
def test_attached_non_primary(self):
|
108
|
+
# Emulate non-primary context creation by 3rd party
|
109
|
+
the_driver = driver.driver
|
110
|
+
if driver.USE_NV_BINDING:
|
111
|
+
flags = 0
|
112
|
+
dev = driver.binding.CUdevice(0)
|
113
|
+
hctx = the_driver.cuCtxCreate(flags, dev)
|
114
|
+
else:
|
115
|
+
hctx = driver.drvapi.cu_context()
|
116
|
+
the_driver.cuCtxCreate(byref(hctx), 0, 0)
|
117
|
+
try:
|
118
|
+
cuda.current_context()
|
119
|
+
except RuntimeError as e:
|
120
|
+
# Expecting an error about non-primary CUDA context
|
121
|
+
self.assertIn("Numba cannot operate on non-primary CUDA context ",
|
122
|
+
str(e))
|
123
|
+
else:
|
124
|
+
self.fail("No RuntimeError raised")
|
125
|
+
finally:
|
126
|
+
the_driver.cuCtxDestroy(hctx)
|
127
|
+
|
128
|
+
def test_cudajit_in_attached_primary_context(self):
|
129
|
+
def do():
|
130
|
+
from numba import cuda
|
131
|
+
|
132
|
+
@cuda.jit
|
133
|
+
def foo(a):
|
134
|
+
for i in range(a.size):
|
135
|
+
a[i] = i
|
136
|
+
|
137
|
+
a = cuda.device_array(10)
|
138
|
+
foo[1, 1](a)
|
139
|
+
self.assertEqual(list(a.copy_to_host()), list(range(10)))
|
140
|
+
|
141
|
+
self.test_attached_primary(do)
|
142
|
+
|
143
|
+
|
144
|
+
if __name__ == '__main__':
|
145
|
+
unittest.main()
|
@@ -0,0 +1,375 @@
|
|
1
|
+
from itertools import product
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
|
5
|
+
from numba import cuda
|
6
|
+
from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
|
7
|
+
from unittest.mock import patch
|
8
|
+
|
9
|
+
|
10
|
+
class CudaArrayIndexing(CUDATestCase):
|
11
|
+
def test_index_1d(self):
|
12
|
+
arr = np.arange(10)
|
13
|
+
darr = cuda.to_device(arr)
|
14
|
+
x, = arr.shape
|
15
|
+
for i in range(-x, x):
|
16
|
+
self.assertEqual(arr[i], darr[i])
|
17
|
+
with self.assertRaises(IndexError):
|
18
|
+
darr[-x - 1]
|
19
|
+
with self.assertRaises(IndexError):
|
20
|
+
darr[x]
|
21
|
+
|
22
|
+
def test_index_2d(self):
|
23
|
+
arr = np.arange(3 * 4).reshape(3, 4)
|
24
|
+
darr = cuda.to_device(arr)
|
25
|
+
x, y = arr.shape
|
26
|
+
for i in range(-x, x):
|
27
|
+
for j in range(-y, y):
|
28
|
+
self.assertEqual(arr[i, j], darr[i, j])
|
29
|
+
with self.assertRaises(IndexError):
|
30
|
+
darr[-x - 1, 0]
|
31
|
+
with self.assertRaises(IndexError):
|
32
|
+
darr[x, 0]
|
33
|
+
with self.assertRaises(IndexError):
|
34
|
+
darr[0, -y - 1]
|
35
|
+
with self.assertRaises(IndexError):
|
36
|
+
darr[0, y]
|
37
|
+
|
38
|
+
def test_index_3d(self):
|
39
|
+
arr = np.arange(3 * 4 * 5).reshape(3, 4, 5)
|
40
|
+
darr = cuda.to_device(arr)
|
41
|
+
x, y, z = arr.shape
|
42
|
+
for i in range(-x, x):
|
43
|
+
for j in range(-y, y):
|
44
|
+
for k in range(-z, z):
|
45
|
+
self.assertEqual(arr[i, j, k], darr[i, j, k])
|
46
|
+
with self.assertRaises(IndexError):
|
47
|
+
darr[-x - 1, 0, 0]
|
48
|
+
with self.assertRaises(IndexError):
|
49
|
+
darr[x, 0, 0]
|
50
|
+
with self.assertRaises(IndexError):
|
51
|
+
darr[0, -y - 1, 0]
|
52
|
+
with self.assertRaises(IndexError):
|
53
|
+
darr[0, y, 0]
|
54
|
+
with self.assertRaises(IndexError):
|
55
|
+
darr[0, 0, -z - 1]
|
56
|
+
with self.assertRaises(IndexError):
|
57
|
+
darr[0, 0, z]
|
58
|
+
|
59
|
+
|
60
|
+
class CudaArrayStridedSlice(CUDATestCase):
|
61
|
+
|
62
|
+
def test_strided_index_1d(self):
|
63
|
+
arr = np.arange(10)
|
64
|
+
darr = cuda.to_device(arr)
|
65
|
+
for i in range(arr.size):
|
66
|
+
np.testing.assert_equal(arr[i::2], darr[i::2].copy_to_host())
|
67
|
+
|
68
|
+
def test_strided_index_2d(self):
|
69
|
+
arr = np.arange(6 * 7).reshape(6, 7)
|
70
|
+
darr = cuda.to_device(arr)
|
71
|
+
|
72
|
+
for i in range(arr.shape[0]):
|
73
|
+
for j in range(arr.shape[1]):
|
74
|
+
np.testing.assert_equal(arr[i::2, j::2],
|
75
|
+
darr[i::2, j::2].copy_to_host())
|
76
|
+
|
77
|
+
def test_strided_index_3d(self):
|
78
|
+
arr = np.arange(6 * 7 * 8).reshape(6, 7, 8)
|
79
|
+
darr = cuda.to_device(arr)
|
80
|
+
|
81
|
+
for i in range(arr.shape[0]):
|
82
|
+
for j in range(arr.shape[1]):
|
83
|
+
for k in range(arr.shape[2]):
|
84
|
+
np.testing.assert_equal(
|
85
|
+
arr[i::2, j::2, k::2],
|
86
|
+
darr[i::2, j::2, k::2].copy_to_host())
|
87
|
+
|
88
|
+
|
89
|
+
class CudaArraySlicing(CUDATestCase):
|
90
|
+
def test_prefix_1d(self):
|
91
|
+
arr = np.arange(5)
|
92
|
+
darr = cuda.to_device(arr)
|
93
|
+
for i in range(arr.size):
|
94
|
+
expect = arr[i:]
|
95
|
+
got = darr[i:].copy_to_host()
|
96
|
+
self.assertTrue(np.all(expect == got))
|
97
|
+
|
98
|
+
def test_prefix_2d(self):
|
99
|
+
arr = np.arange(3 ** 2).reshape(3, 3)
|
100
|
+
darr = cuda.to_device(arr)
|
101
|
+
for i in range(arr.shape[0]):
|
102
|
+
for j in range(arr.shape[1]):
|
103
|
+
expect = arr[i:, j:]
|
104
|
+
sliced = darr[i:, j:]
|
105
|
+
self.assertEqual(expect.shape, sliced.shape)
|
106
|
+
self.assertEqual(expect.strides, sliced.strides)
|
107
|
+
got = sliced.copy_to_host()
|
108
|
+
self.assertTrue(np.all(expect == got))
|
109
|
+
|
110
|
+
def test_select_3d_first_two_dim(self):
|
111
|
+
arr = np.arange(3 * 4 * 5).reshape(3, 4, 5)
|
112
|
+
darr = cuda.to_device(arr)
|
113
|
+
# Select first dimension
|
114
|
+
for i in range(arr.shape[0]):
|
115
|
+
expect = arr[i]
|
116
|
+
sliced = darr[i]
|
117
|
+
self.assertEqual(expect.shape, sliced.shape)
|
118
|
+
self.assertEqual(expect.strides, sliced.strides)
|
119
|
+
got = sliced.copy_to_host()
|
120
|
+
self.assertTrue(np.all(expect == got))
|
121
|
+
# Select second dimension
|
122
|
+
for i in range(arr.shape[0]):
|
123
|
+
for j in range(arr.shape[1]):
|
124
|
+
expect = arr[i, j]
|
125
|
+
sliced = darr[i, j]
|
126
|
+
self.assertEqual(expect.shape, sliced.shape)
|
127
|
+
self.assertEqual(expect.strides, sliced.strides)
|
128
|
+
got = sliced.copy_to_host()
|
129
|
+
self.assertTrue(np.all(expect == got))
|
130
|
+
|
131
|
+
def test_select_f(self):
|
132
|
+
a = np.arange(5 * 6 * 7).reshape(5, 6, 7, order='F')
|
133
|
+
da = cuda.to_device(a)
|
134
|
+
|
135
|
+
for i in range(a.shape[0]):
|
136
|
+
for j in range(a.shape[1]):
|
137
|
+
self.assertTrue(np.array_equal(da[i, j, :].copy_to_host(),
|
138
|
+
a[i, j, :]))
|
139
|
+
for j in range(a.shape[2]):
|
140
|
+
self.assertTrue(np.array_equal(da[i, :, j].copy_to_host(),
|
141
|
+
a[i, :, j]))
|
142
|
+
for i in range(a.shape[1]):
|
143
|
+
for j in range(a.shape[2]):
|
144
|
+
self.assertTrue(np.array_equal(da[:, i, j].copy_to_host(),
|
145
|
+
a[:, i, j]))
|
146
|
+
|
147
|
+
def test_select_c(self):
|
148
|
+
a = np.arange(5 * 6 * 7).reshape(5, 6, 7, order='C')
|
149
|
+
da = cuda.to_device(a)
|
150
|
+
|
151
|
+
for i in range(a.shape[0]):
|
152
|
+
for j in range(a.shape[1]):
|
153
|
+
self.assertTrue(np.array_equal(da[i, j, :].copy_to_host(),
|
154
|
+
a[i, j, :]))
|
155
|
+
for j in range(a.shape[2]):
|
156
|
+
self.assertTrue(np.array_equal(da[i, :, j].copy_to_host(),
|
157
|
+
a[i, :, j]))
|
158
|
+
for i in range(a.shape[1]):
|
159
|
+
for j in range(a.shape[2]):
|
160
|
+
self.assertTrue(np.array_equal(da[:, i, j].copy_to_host(),
|
161
|
+
a[:, i, j]))
|
162
|
+
|
163
|
+
def test_prefix_select(self):
|
164
|
+
arr = np.arange(5 * 7).reshape(5, 7, order='F')
|
165
|
+
|
166
|
+
darr = cuda.to_device(arr)
|
167
|
+
self.assertTrue(np.all(darr[:1, 1].copy_to_host() == arr[:1, 1]))
|
168
|
+
|
169
|
+
def test_negative_slicing_1d(self):
|
170
|
+
arr = np.arange(10)
|
171
|
+
darr = cuda.to_device(arr)
|
172
|
+
for i, j in product(range(-10, 10), repeat=2):
|
173
|
+
np.testing.assert_array_equal(arr[i:j],
|
174
|
+
darr[i:j].copy_to_host())
|
175
|
+
|
176
|
+
def test_negative_slicing_2d(self):
|
177
|
+
arr = np.arange(12).reshape(3, 4)
|
178
|
+
darr = cuda.to_device(arr)
|
179
|
+
for x, y, w, s in product(range(-4, 4), repeat=4):
|
180
|
+
np.testing.assert_array_equal(arr[x:y, w:s],
|
181
|
+
darr[x:y, w:s].copy_to_host())
|
182
|
+
|
183
|
+
def test_empty_slice_1d(self):
|
184
|
+
arr = np.arange(5)
|
185
|
+
darr = cuda.to_device(arr)
|
186
|
+
for i in range(darr.shape[0]):
|
187
|
+
np.testing.assert_array_equal(darr[i:i].copy_to_host(), arr[i:i])
|
188
|
+
# empty slice of empty slice
|
189
|
+
self.assertFalse(darr[:0][:0].copy_to_host())
|
190
|
+
# out-of-bound slice just produces empty slices
|
191
|
+
np.testing.assert_array_equal(darr[:0][:1].copy_to_host(),
|
192
|
+
arr[:0][:1])
|
193
|
+
np.testing.assert_array_equal(darr[:0][-1:].copy_to_host(),
|
194
|
+
arr[:0][-1:])
|
195
|
+
|
196
|
+
def test_empty_slice_2d(self):
|
197
|
+
arr = np.arange(5 * 7).reshape(5, 7)
|
198
|
+
darr = cuda.to_device(arr)
|
199
|
+
np.testing.assert_array_equal(darr[:0].copy_to_host(), arr[:0])
|
200
|
+
np.testing.assert_array_equal(darr[3, :0].copy_to_host(), arr[3, :0])
|
201
|
+
# empty slice of empty slice
|
202
|
+
self.assertFalse(darr[:0][:0].copy_to_host())
|
203
|
+
# out-of-bound slice just produces empty slices
|
204
|
+
np.testing.assert_array_equal(darr[:0][:1].copy_to_host(), arr[:0][:1])
|
205
|
+
np.testing.assert_array_equal(darr[:0][-1:].copy_to_host(),
|
206
|
+
arr[:0][-1:])
|
207
|
+
|
208
|
+
|
209
|
+
class CudaArraySetting(CUDATestCase):
|
210
|
+
"""
|
211
|
+
Most of the slicing logic is tested in the cases above, so these
|
212
|
+
tests focus on the setting logic.
|
213
|
+
"""
|
214
|
+
|
215
|
+
def test_scalar(self):
|
216
|
+
arr = np.arange(5 * 7).reshape(5, 7)
|
217
|
+
darr = cuda.to_device(arr)
|
218
|
+
arr[2, 2] = 500
|
219
|
+
darr[2, 2] = 500
|
220
|
+
np.testing.assert_array_equal(darr.copy_to_host(), arr)
|
221
|
+
|
222
|
+
def test_rank(self):
|
223
|
+
arr = np.arange(5 * 7).reshape(5, 7)
|
224
|
+
darr = cuda.to_device(arr)
|
225
|
+
arr[2] = 500
|
226
|
+
darr[2] = 500
|
227
|
+
np.testing.assert_array_equal(darr.copy_to_host(), arr)
|
228
|
+
|
229
|
+
def test_broadcast(self):
|
230
|
+
arr = np.arange(5 * 7).reshape(5, 7)
|
231
|
+
darr = cuda.to_device(arr)
|
232
|
+
arr[:, 2] = 500
|
233
|
+
darr[:, 2] = 500
|
234
|
+
np.testing.assert_array_equal(darr.copy_to_host(), arr)
|
235
|
+
|
236
|
+
def test_array_assign_column(self):
|
237
|
+
arr = np.arange(5 * 7).reshape(5, 7)
|
238
|
+
darr = cuda.to_device(arr)
|
239
|
+
_400 = np.full(shape=7, fill_value=400)
|
240
|
+
arr[2] = _400
|
241
|
+
darr[2] = _400
|
242
|
+
np.testing.assert_array_equal(darr.copy_to_host(), arr)
|
243
|
+
|
244
|
+
def test_array_assign_row(self):
|
245
|
+
arr = np.arange(5 * 7).reshape(5, 7)
|
246
|
+
darr = cuda.to_device(arr)
|
247
|
+
_400 = np.full(shape=5, fill_value=400)
|
248
|
+
arr[:, 2] = _400
|
249
|
+
darr[:, 2] = _400
|
250
|
+
np.testing.assert_array_equal(darr.copy_to_host(), arr)
|
251
|
+
|
252
|
+
def test_array_assign_subarray(self):
|
253
|
+
arr = np.arange(5 * 6 * 7).reshape(5, 6, 7)
|
254
|
+
darr = cuda.to_device(arr)
|
255
|
+
_400 = np.full(shape=(6, 7), fill_value=400)
|
256
|
+
arr[2] = _400
|
257
|
+
darr[2] = _400
|
258
|
+
np.testing.assert_array_equal(darr.copy_to_host(), arr)
|
259
|
+
|
260
|
+
def test_array_assign_deep_subarray(self):
|
261
|
+
arr = np.arange(5 * 6 * 7 * 8).reshape(5, 6, 7, 8)
|
262
|
+
darr = cuda.to_device(arr)
|
263
|
+
_400 = np.full(shape=(5, 6, 8), fill_value=400)
|
264
|
+
arr[:, :, 2] = _400
|
265
|
+
darr[:, :, 2] = _400
|
266
|
+
np.testing.assert_array_equal(darr.copy_to_host(), arr)
|
267
|
+
|
268
|
+
def test_array_assign_all(self):
|
269
|
+
arr = np.arange(5 * 7).reshape(5, 7)
|
270
|
+
darr = cuda.to_device(arr)
|
271
|
+
_400 = np.full(shape=(5, 7), fill_value=400)
|
272
|
+
arr[:] = _400
|
273
|
+
darr[:] = _400
|
274
|
+
np.testing.assert_array_equal(darr.copy_to_host(), arr)
|
275
|
+
|
276
|
+
def test_strides(self):
|
277
|
+
arr = np.ones(20)
|
278
|
+
darr = cuda.to_device(arr)
|
279
|
+
arr[::2] = 500
|
280
|
+
darr[::2] = 500
|
281
|
+
np.testing.assert_array_equal(darr.copy_to_host(), arr)
|
282
|
+
|
283
|
+
def test_incompatible_highdim(self):
|
284
|
+
darr = cuda.to_device(np.arange(5 * 7))
|
285
|
+
|
286
|
+
with self.assertRaises(ValueError) as e:
|
287
|
+
darr[:] = np.ones(shape=(1, 2, 3))
|
288
|
+
|
289
|
+
self.assertIn(
|
290
|
+
member=str(e.exception),
|
291
|
+
container=[
|
292
|
+
"Can't assign 3-D array to 1-D self", # device
|
293
|
+
"could not broadcast input array from shape (2,3) "
|
294
|
+
"into shape (35,)", # simulator, NP >= 1.20
|
295
|
+
])
|
296
|
+
|
297
|
+
def test_incompatible_shape(self):
|
298
|
+
darr = cuda.to_device(np.arange(5))
|
299
|
+
|
300
|
+
with self.assertRaises(ValueError) as e:
|
301
|
+
darr[:] = [1, 3]
|
302
|
+
|
303
|
+
self.assertIn(
|
304
|
+
member=str(e.exception),
|
305
|
+
container=[
|
306
|
+
"Can't copy sequence with size 2 to array axis 0 with "
|
307
|
+
"dimension 5", # device
|
308
|
+
"could not broadcast input array from shape (2,) into "
|
309
|
+
"shape (5,)", # simulator, NP >= 1.20
|
310
|
+
])
|
311
|
+
|
312
|
+
@skip_on_cudasim('cudasim does not use streams and operates synchronously')
|
313
|
+
def test_sync(self):
|
314
|
+
# There should be a synchronization when no stream is supplied
|
315
|
+
darr = cuda.to_device(np.arange(5))
|
316
|
+
|
317
|
+
with patch.object(cuda.cudadrv.driver.Stream, 'synchronize',
|
318
|
+
return_value=None) as mock_sync:
|
319
|
+
darr[0] = 10
|
320
|
+
|
321
|
+
mock_sync.assert_called_once()
|
322
|
+
|
323
|
+
@skip_on_cudasim('cudasim does not use streams and operates synchronously')
|
324
|
+
def test_no_sync_default_stream(self):
|
325
|
+
# There should not be a synchronization when the array has a default
|
326
|
+
# stream, whether it is the default stream, the legacy default stream,
|
327
|
+
# the per-thread default stream, or another stream.
|
328
|
+
streams = (cuda.stream(), cuda.default_stream(),
|
329
|
+
cuda.legacy_default_stream(),
|
330
|
+
cuda.per_thread_default_stream())
|
331
|
+
|
332
|
+
for stream in streams:
|
333
|
+
darr = cuda.to_device(np.arange(5), stream=stream)
|
334
|
+
|
335
|
+
with patch.object(cuda.cudadrv.driver.Stream, 'synchronize',
|
336
|
+
return_value=None) as mock_sync:
|
337
|
+
darr[0] = 10
|
338
|
+
|
339
|
+
mock_sync.assert_not_called()
|
340
|
+
|
341
|
+
@skip_on_cudasim('cudasim does not use streams and operates synchronously')
|
342
|
+
def test_no_sync_supplied_stream(self):
|
343
|
+
# There should not be a synchronization when a stream is supplied for
|
344
|
+
# the setitem call, whether it is the default stream, the legacy default
|
345
|
+
# stream, the per-thread default stream, or another stream.
|
346
|
+
streams = (cuda.stream(), cuda.default_stream(),
|
347
|
+
cuda.legacy_default_stream(),
|
348
|
+
cuda.per_thread_default_stream())
|
349
|
+
|
350
|
+
for stream in streams:
|
351
|
+
darr = cuda.to_device(np.arange(5))
|
352
|
+
|
353
|
+
with patch.object(cuda.cudadrv.driver.Stream, 'synchronize',
|
354
|
+
return_value=None) as mock_sync:
|
355
|
+
darr.setitem(0, 10, stream=stream)
|
356
|
+
|
357
|
+
mock_sync.assert_not_called()
|
358
|
+
|
359
|
+
@unittest.skip('Requires PR #6367')
|
360
|
+
def test_issue_6505(self):
|
361
|
+
# On Windows, the writes to ary_v would not be visible prior to the
|
362
|
+
# assertion, due to the assignment being done with a kernel launch that
|
363
|
+
# returns asynchronously - there should now be a sync after the kernel
|
364
|
+
# launch to ensure that the writes are always visible.
|
365
|
+
ary = cuda.mapped_array(2, dtype=np.int32)
|
366
|
+
ary[:] = 0
|
367
|
+
|
368
|
+
ary_v = ary.view('u1')
|
369
|
+
ary_v[1] = 1
|
370
|
+
ary_v[5] = 1
|
371
|
+
self.assertEqual(sum(ary), 512)
|
372
|
+
|
373
|
+
|
374
|
+
if __name__ == '__main__':
|
375
|
+
unittest.main()
|
@@ -0,0 +1,21 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from numba import cuda
|
3
|
+
from numba.cuda.testing import unittest, CUDATestCase
|
4
|
+
|
5
|
+
|
6
|
+
class TestCudaAutoContext(CUDATestCase):
|
7
|
+
def test_auto_context(self):
|
8
|
+
"""A problem was revealed by a customer that the use cuda.to_device
|
9
|
+
does not create a CUDA context.
|
10
|
+
This tests the problem
|
11
|
+
"""
|
12
|
+
A = np.arange(10, dtype=np.float32)
|
13
|
+
newA = np.empty_like(A)
|
14
|
+
dA = cuda.to_device(A)
|
15
|
+
|
16
|
+
dA.copy_to_host(newA)
|
17
|
+
self.assertTrue(np.allclose(A, newA))
|
18
|
+
|
19
|
+
|
20
|
+
if __name__ == '__main__':
|
21
|
+
unittest.main()
|