numba-cuda 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +232 -113
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_fp16.h +661 -661
- numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
- numba_cuda/numba/cuda/cuda_paths.py +291 -99
- numba_cuda/numba/cuda/cudadecl.py +125 -69
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +463 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +317 -233
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +8 -6
- numba_cuda/numba/cuda/decorators.py +75 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +69 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +1 -1
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
- numba_cuda/numba/cuda/intrinsics.py +31 -27
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +139 -102
- numba_cuda/numba/cuda/target.py +64 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +2 -2
- numba_cuda/numba/cuda/vectorizers.py +37 -32
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
- numba_cuda-0.9.0.dist-info/RECORD +253 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.0.dist-info/RECORD +0 -251
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,7 @@ from numba.cuda.testing import unittest, ContextResettingTestCase
|
|
7
7
|
from numba.cuda.testing import skip_on_cudasim
|
8
8
|
|
9
9
|
|
10
|
-
@skip_on_cudasim(
|
10
|
+
@skip_on_cudasim("CUDA Memory API unsupported in the simulator")
|
11
11
|
class TestCudaMemory(ContextResettingTestCase):
|
12
12
|
def setUp(self):
|
13
13
|
super().setUp()
|
@@ -24,8 +24,7 @@ class TestCudaMemory(ContextResettingTestCase):
|
|
24
24
|
expected_class = driver.binding.CUdeviceptr
|
25
25
|
else:
|
26
26
|
expected_class = drvapi.cu_device_ptr
|
27
|
-
self.assertTrue(isinstance(obj.device_ctypes_pointer,
|
28
|
-
expected_class))
|
27
|
+
self.assertTrue(isinstance(obj.device_ctypes_pointer, expected_class))
|
29
28
|
|
30
29
|
def test_device_memory(self):
|
31
30
|
devmem = self.context.memalloc(1024)
|
@@ -41,9 +40,9 @@ class TestCudaMemory(ContextResettingTestCase):
|
|
41
40
|
|
42
41
|
def test_pinned_memory(self):
|
43
42
|
ary = np.arange(10)
|
44
|
-
devmem = self.context.mempin(
|
45
|
-
|
46
|
-
|
43
|
+
devmem = self.context.mempin(
|
44
|
+
ary, ary.ctypes.data, ary.size * ary.dtype.itemsize, mapped=True
|
45
|
+
)
|
47
46
|
self._template(devmem)
|
48
47
|
|
49
48
|
def test_managed_memory(self):
|
@@ -69,8 +68,7 @@ class TestCudaMemory(ContextResettingTestCase):
|
|
69
68
|
v2 = v1.view(offset)
|
70
69
|
self.assertEqual(handle_val(v2.owner), handle_val(m))
|
71
70
|
self.assertEqual(handle_val(v2.owner), handle_val(m))
|
72
|
-
self.assertEqual(handle_val(v2) - offset * 2,
|
73
|
-
handle_val(v2.owner))
|
71
|
+
self.assertEqual(handle_val(v2) - offset * 2, handle_val(v2.owner))
|
74
72
|
self.assertEqual(m.refct, 3)
|
75
73
|
del v2
|
76
74
|
self.assertEqual(m.refct, 2)
|
@@ -84,22 +82,24 @@ class TestCudaMemory(ContextResettingTestCase):
|
|
84
82
|
def test_user_extension(self):
|
85
83
|
# User can use MemoryPointer to wrap externally defined pointers.
|
86
84
|
# This test checks if the finalizer is invokded at correct time
|
87
|
-
fake_ptr = ctypes.c_void_p(
|
85
|
+
fake_ptr = ctypes.c_void_p(0xDEADBEEF)
|
88
86
|
dtor_invoked = [0]
|
89
87
|
|
90
88
|
def dtor():
|
91
89
|
dtor_invoked[0] += 1
|
92
90
|
|
93
91
|
# Ensure finalizer is called when pointer is deleted
|
94
|
-
ptr = driver.MemoryPointer(
|
95
|
-
|
92
|
+
ptr = driver.MemoryPointer(
|
93
|
+
context=self.context, pointer=fake_ptr, size=40, finalizer=dtor
|
94
|
+
)
|
96
95
|
self.assertEqual(dtor_invoked[0], 0)
|
97
96
|
del ptr
|
98
97
|
self.assertEqual(dtor_invoked[0], 1)
|
99
98
|
|
100
99
|
# Ensure removing derived pointer doesn't call finalizer
|
101
|
-
ptr = driver.MemoryPointer(
|
102
|
-
|
100
|
+
ptr = driver.MemoryPointer(
|
101
|
+
context=self.context, pointer=fake_ptr, size=40, finalizer=dtor
|
102
|
+
)
|
103
103
|
owned = ptr.own()
|
104
104
|
del owned
|
105
105
|
self.assertEqual(dtor_invoked[0], 1)
|
@@ -128,16 +128,16 @@ class TestCudaMemoryFunctions(ContextResettingTestCase):
|
|
128
128
|
self.assertTrue(np.all(hstary == hstary2))
|
129
129
|
|
130
130
|
def test_memset(self):
|
131
|
-
dtype = np.dtype(
|
131
|
+
dtype = np.dtype("uint32")
|
132
132
|
n = 10
|
133
133
|
sz = dtype.itemsize * 10
|
134
134
|
devary = self.context.memalloc(sz)
|
135
|
-
driver.device_memset(devary,
|
135
|
+
driver.device_memset(devary, 0xAB, sz)
|
136
136
|
|
137
137
|
hstary = np.empty(n, dtype=dtype)
|
138
138
|
driver.device_to_host(hstary, devary, sz)
|
139
139
|
|
140
|
-
hstary2 = np.array([
|
140
|
+
hstary2 = np.array([0xABABABAB] * n, dtype=np.dtype("uint32"))
|
141
141
|
self.assertTrue(np.all(hstary == hstary2))
|
142
142
|
|
143
143
|
def test_d2d(self):
|
@@ -152,7 +152,7 @@ class TestCudaMemoryFunctions(ContextResettingTestCase):
|
|
152
152
|
self.assertTrue(np.all(hst == hst2))
|
153
153
|
|
154
154
|
|
155
|
-
@skip_on_cudasim(
|
155
|
+
@skip_on_cudasim("CUDA Memory API unsupported in the simulator")
|
156
156
|
class TestMVExtent(ContextResettingTestCase):
|
157
157
|
def test_c_contiguous_array(self):
|
158
158
|
ary = np.arange(100)
|
@@ -177,7 +177,7 @@ class TestMVExtent(ContextResettingTestCase):
|
|
177
177
|
|
178
178
|
def test_ctypes_struct(self):
|
179
179
|
class mystruct(ctypes.Structure):
|
180
|
-
_fields_ = [(
|
180
|
+
_fields_ = [("x", ctypes.c_int), ("y", ctypes.c_int)]
|
181
181
|
|
182
182
|
data = mystruct(x=123, y=432)
|
183
183
|
sz = driver.host_memory_size(data)
|
@@ -189,5 +189,5 @@ class TestMVExtent(ContextResettingTestCase):
|
|
189
189
|
self.assertTrue(ctypes.sizeof(data) == sz)
|
190
190
|
|
191
191
|
|
192
|
-
if __name__ ==
|
192
|
+
if __name__ == "__main__":
|
193
193
|
unittest.main()
|
@@ -57,10 +57,7 @@ class TestCudaNDArray(CUDATestCase):
|
|
57
57
|
def test_stream_bind(self):
|
58
58
|
stream = cuda.stream()
|
59
59
|
with stream.auto_synchronize():
|
60
|
-
arr = cuda.device_array(
|
61
|
-
(3, 3),
|
62
|
-
dtype=np.float64,
|
63
|
-
stream=stream)
|
60
|
+
arr = cuda.device_array((3, 3), dtype=np.float64, stream=stream)
|
64
61
|
self.assertEqual(arr.bind(stream).stream, stream)
|
65
62
|
self.assertEqual(arr.stream, stream)
|
66
63
|
|
@@ -90,8 +87,8 @@ class TestCudaNDArray(CUDATestCase):
|
|
90
87
|
|
91
88
|
self.assertTrue(np.all(array == 0))
|
92
89
|
|
93
|
-
right.copy_to_host(array[N // 2:])
|
94
|
-
left.copy_to_host(array[:N // 2])
|
90
|
+
right.copy_to_host(array[N // 2 :])
|
91
|
+
left.copy_to_host(array[: N // 2])
|
95
92
|
|
96
93
|
self.assertTrue(np.all(array == original))
|
97
94
|
|
@@ -104,7 +101,7 @@ class TestCudaNDArray(CUDATestCase):
|
|
104
101
|
gpumem.copy_to_host(array)
|
105
102
|
np.testing.assert_array_equal(array, original * 2)
|
106
103
|
|
107
|
-
@skip_on_cudasim(
|
104
|
+
@skip_on_cudasim("This works in the simulator")
|
108
105
|
def test_devicearray_transpose_wrongdim(self):
|
109
106
|
gpumem = cuda.to_device(np.array(np.arange(12)).reshape(3, 4, 1))
|
110
107
|
|
@@ -113,13 +110,15 @@ class TestCudaNDArray(CUDATestCase):
|
|
113
110
|
|
114
111
|
self.assertEqual(
|
115
112
|
"transposing a non-2D DeviceNDArray isn't supported",
|
116
|
-
str(e.exception)
|
113
|
+
str(e.exception),
|
114
|
+
)
|
117
115
|
|
118
116
|
def test_devicearray_transpose_identity(self):
|
119
117
|
# any-shape identities should work
|
120
118
|
original = np.array(np.arange(24)).reshape(3, 4, 2)
|
121
|
-
array = np.transpose(
|
122
|
-
|
119
|
+
array = np.transpose(
|
120
|
+
cuda.to_device(original), axes=(0, 1, 2)
|
121
|
+
).copy_to_host()
|
123
122
|
self.assertTrue(np.all(array == original))
|
124
123
|
|
125
124
|
def test_devicearray_transpose_duplicatedaxis(self):
|
@@ -131,9 +130,10 @@ class TestCudaNDArray(CUDATestCase):
|
|
131
130
|
self.assertIn(
|
132
131
|
str(e.exception),
|
133
132
|
container=[
|
134
|
-
|
135
|
-
|
136
|
-
]
|
133
|
+
"invalid axes list (0, 0)", # GPU
|
134
|
+
"repeated axis in transpose", # sim
|
135
|
+
],
|
136
|
+
)
|
137
137
|
|
138
138
|
def test_devicearray_transpose_wrongaxis(self):
|
139
139
|
gpumem = cuda.to_device(np.array(np.arange(12)).reshape(3, 4))
|
@@ -144,10 +144,11 @@ class TestCudaNDArray(CUDATestCase):
|
|
144
144
|
self.assertIn(
|
145
145
|
str(e.exception),
|
146
146
|
container=[
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
]
|
147
|
+
"invalid axes list (0, 2)", # GPU
|
148
|
+
"invalid axis for this array",
|
149
|
+
"axis 2 is out of bounds for array of dimension 2", # sim
|
150
|
+
],
|
151
|
+
)
|
151
152
|
|
152
153
|
def test_devicearray_view_ok(self):
|
153
154
|
original = np.array(np.arange(12), dtype="i2").reshape(3, 4)
|
@@ -155,8 +156,7 @@ class TestCudaNDArray(CUDATestCase):
|
|
155
156
|
for dtype in ("i4", "u4", "i8", "f8"):
|
156
157
|
with self.subTest(dtype=dtype):
|
157
158
|
np.testing.assert_array_equal(
|
158
|
-
array.view(dtype).copy_to_host(),
|
159
|
-
original.view(dtype)
|
159
|
+
array.view(dtype).copy_to_host(), original.view(dtype)
|
160
160
|
)
|
161
161
|
|
162
162
|
def test_devicearray_view_ok_not_c_contig(self):
|
@@ -164,8 +164,7 @@ class TestCudaNDArray(CUDATestCase):
|
|
164
164
|
array = cuda.to_device(original)[:, ::2]
|
165
165
|
original = original[:, ::2]
|
166
166
|
np.testing.assert_array_equal(
|
167
|
-
array.view("u2").copy_to_host(),
|
168
|
-
original.view("u2")
|
167
|
+
array.view("u2").copy_to_host(), original.view("u2")
|
169
168
|
)
|
170
169
|
|
171
170
|
def test_devicearray_view_bad_not_c_contig(self):
|
@@ -175,12 +174,14 @@ class TestCudaNDArray(CUDATestCase):
|
|
175
174
|
array.view("i4")
|
176
175
|
|
177
176
|
msg = str(e.exception)
|
178
|
-
self.assertIn(
|
177
|
+
self.assertIn("To change to a dtype of a different size,", msg)
|
179
178
|
|
180
|
-
contiguous_pre_np123 =
|
181
|
-
contiguous_post_np123 =
|
182
|
-
self.assertTrue(
|
183
|
-
|
179
|
+
contiguous_pre_np123 = "the array must be C-contiguous" in msg
|
180
|
+
contiguous_post_np123 = "the last axis must be contiguous" in msg
|
181
|
+
self.assertTrue(
|
182
|
+
contiguous_pre_np123 or contiguous_post_np123,
|
183
|
+
"Expected message to mention contiguity",
|
184
|
+
)
|
184
185
|
|
185
186
|
def test_devicearray_view_bad_itemsize(self):
|
186
187
|
original = np.array(np.arange(12), dtype="i2").reshape(4, 3)
|
@@ -191,7 +192,8 @@ class TestCudaNDArray(CUDATestCase):
|
|
191
192
|
"When changing to a larger dtype,"
|
192
193
|
" its size must be a divisor of the total size in bytes"
|
193
194
|
" of the last axis of the array.",
|
194
|
-
str(e.exception)
|
195
|
+
str(e.exception),
|
196
|
+
)
|
195
197
|
|
196
198
|
def test_devicearray_transpose_ok(self):
|
197
199
|
original = np.array(np.arange(12)).reshape(3, 4)
|
@@ -206,7 +208,7 @@ class TestCudaNDArray(CUDATestCase):
|
|
206
208
|
def test_devicearray_contiguous_slice(self):
|
207
209
|
# memcpys are dumb ranges of bytes, so trying to
|
208
210
|
# copy to a non-contiguous range shouldn't work!
|
209
|
-
a = np.arange(25).reshape(5, 5, order=
|
211
|
+
a = np.arange(25).reshape(5, 5, order="F")
|
210
212
|
s = np.full(fill_value=5, shape=(5,))
|
211
213
|
|
212
214
|
d = cuda.to_device(a)
|
@@ -216,9 +218,7 @@ class TestCudaNDArray(CUDATestCase):
|
|
216
218
|
# (40-byte strides). This means we can't memcpy to it!
|
217
219
|
with self.assertRaises(ValueError) as e:
|
218
220
|
d[2].copy_to_device(s)
|
219
|
-
self.assertEqual(
|
220
|
-
devicearray.errmsg_contiguous_buffer,
|
221
|
-
str(e.exception))
|
221
|
+
self.assertEqual(devicearray.errmsg_contiguous_buffer, str(e.exception))
|
222
222
|
|
223
223
|
# if d[2].copy_to_device(s), then this would pass:
|
224
224
|
# self.assertTrue((a == d.copy_to_host()).all())
|
@@ -236,9 +236,9 @@ class TestCudaNDArray(CUDATestCase):
|
|
236
236
|
(a_c, a_f),
|
237
237
|
(a_c, a_c),
|
238
238
|
]:
|
239
|
-
msg =
|
240
|
-
|
241
|
-
|
239
|
+
msg = "%s => %s" % (
|
240
|
+
"C" if original.flags.c_contiguous else "F",
|
241
|
+
"C" if copy.flags.c_contiguous else "F",
|
242
242
|
)
|
243
243
|
|
244
244
|
d = cuda.to_device(original)
|
@@ -248,17 +248,17 @@ class TestCudaNDArray(CUDATestCase):
|
|
248
248
|
|
249
249
|
def test_devicearray_contiguous_copy_host_3d(self):
|
250
250
|
a_c = np.arange(5 * 5 * 5).reshape(5, 5, 5)
|
251
|
-
a_f = np.array(a_c, order=
|
251
|
+
a_f = np.array(a_c, order="F")
|
252
252
|
self._test_devicearray_contiguous_host_copy(a_c, a_f)
|
253
253
|
|
254
254
|
def test_devicearray_contiguous_copy_host_1d(self):
|
255
255
|
a_c = np.arange(5)
|
256
|
-
a_f = np.array(a_c, order=
|
256
|
+
a_f = np.array(a_c, order="F")
|
257
257
|
self._test_devicearray_contiguous_host_copy(a_c, a_f)
|
258
258
|
|
259
259
|
def test_devicearray_contiguous_copy_device(self):
|
260
260
|
a_c = np.arange(5 * 5 * 5).reshape(5, 5, 5)
|
261
|
-
a_f = np.array(a_c, order=
|
261
|
+
a_f = np.array(a_c, order="F")
|
262
262
|
self.assertTrue(a_c.flags.c_contiguous)
|
263
263
|
self.assertTrue(a_f.flags.f_contiguous)
|
264
264
|
|
@@ -268,7 +268,8 @@ class TestCudaNDArray(CUDATestCase):
|
|
268
268
|
d.copy_to_device(cuda.to_device(a_f))
|
269
269
|
self.assertEqual(
|
270
270
|
"incompatible strides: {} vs. {}".format(a_c.strides, a_f.strides),
|
271
|
-
str(e.exception)
|
271
|
+
str(e.exception),
|
272
|
+
)
|
272
273
|
|
273
274
|
d.copy_to_device(cuda.to_device(a_c))
|
274
275
|
self.assertTrue(np.all(d.copy_to_host() == a_c))
|
@@ -279,7 +280,8 @@ class TestCudaNDArray(CUDATestCase):
|
|
279
280
|
d.copy_to_device(cuda.to_device(a_c))
|
280
281
|
self.assertEqual(
|
281
282
|
"incompatible strides: {} vs. {}".format(a_f.strides, a_c.strides),
|
282
|
-
str(e.exception)
|
283
|
+
str(e.exception),
|
284
|
+
)
|
283
285
|
|
284
286
|
d.copy_to_device(cuda.to_device(a_f))
|
285
287
|
self.assertTrue(np.all(d.copy_to_host() == a_f))
|
@@ -288,8 +290,8 @@ class TestCudaNDArray(CUDATestCase):
|
|
288
290
|
broadsize = 4
|
289
291
|
coreshape = (2, 3)
|
290
292
|
coresize = np.prod(coreshape)
|
291
|
-
core_c = np.arange(coresize).reshape(coreshape, order=
|
292
|
-
core_f = np.arange(coresize).reshape(coreshape, order=
|
293
|
+
core_c = np.arange(coresize).reshape(coreshape, order="C")
|
294
|
+
core_f = np.arange(coresize).reshape(coreshape, order="F")
|
293
295
|
for dim in range(len(coreshape)):
|
294
296
|
newindex = (slice(None),) * dim + (np.newaxis,)
|
295
297
|
broadshape = coreshape[:dim] + (broadsize,) + coreshape[dim:]
|
@@ -318,11 +320,9 @@ class TestCudaNDArray(CUDATestCase):
|
|
318
320
|
|
319
321
|
with self.assertRaises(ValueError) as e:
|
320
322
|
d.copy_to_device(cuda.to_device(arr)[::2])
|
321
|
-
self.assertEqual(
|
322
|
-
devicearray.errmsg_contiguous_buffer,
|
323
|
-
str(e.exception))
|
323
|
+
self.assertEqual(devicearray.errmsg_contiguous_buffer, str(e.exception))
|
324
324
|
|
325
|
-
@skip_on_cudasim(
|
325
|
+
@skip_on_cudasim("DeviceNDArray class not present in simulator")
|
326
326
|
def test_devicearray_relaxed_strides(self):
|
327
327
|
# From the reproducer in Issue #6824.
|
328
328
|
|
@@ -334,86 +334,88 @@ class TestCudaNDArray(CUDATestCase):
|
|
334
334
|
|
335
335
|
# Ensure we still believe the array to be contiguous because
|
336
336
|
# strides checking is relaxed.
|
337
|
-
self.assertTrue(arr.flags[
|
338
|
-
self.assertTrue(arr.flags[
|
337
|
+
self.assertTrue(arr.flags["C_CONTIGUOUS"])
|
338
|
+
self.assertTrue(arr.flags["F_CONTIGUOUS"])
|
339
339
|
|
340
340
|
def test_c_f_contiguity_matches_numpy(self):
|
341
341
|
# From the reproducer in Issue #4943.
|
342
342
|
|
343
343
|
shapes = ((1, 4), (4, 1))
|
344
|
-
orders = (
|
344
|
+
orders = ("C", "F")
|
345
345
|
|
346
346
|
for shape, order in itertools.product(shapes, orders):
|
347
347
|
arr = np.ndarray(shape, order=order)
|
348
348
|
d_arr = cuda.to_device(arr)
|
349
|
-
self.assertEqual(
|
350
|
-
|
351
|
-
|
352
|
-
|
349
|
+
self.assertEqual(
|
350
|
+
arr.flags["C_CONTIGUOUS"], d_arr.flags["C_CONTIGUOUS"]
|
351
|
+
)
|
352
|
+
self.assertEqual(
|
353
|
+
arr.flags["F_CONTIGUOUS"], d_arr.flags["F_CONTIGUOUS"]
|
354
|
+
)
|
353
355
|
|
354
|
-
@skip_on_cudasim(
|
356
|
+
@skip_on_cudasim("Typing not done in the simulator")
|
355
357
|
def test_devicearray_typing_order_simple_c(self):
|
356
358
|
# C-order 1D array
|
357
|
-
a = np.zeros(10, order=
|
359
|
+
a = np.zeros(10, order="C")
|
358
360
|
d = cuda.to_device(a)
|
359
|
-
self.assertEqual(d._numba_type_.layout,
|
361
|
+
self.assertEqual(d._numba_type_.layout, "C")
|
360
362
|
|
361
|
-
@skip_on_cudasim(
|
363
|
+
@skip_on_cudasim("Typing not done in the simulator")
|
362
364
|
def test_devicearray_typing_order_simple_f(self):
|
363
365
|
# F-order array that is also C layout.
|
364
|
-
a = np.zeros(10, order=
|
366
|
+
a = np.zeros(10, order="F")
|
365
367
|
d = cuda.to_device(a)
|
366
|
-
self.assertEqual(d._numba_type_.layout,
|
368
|
+
self.assertEqual(d._numba_type_.layout, "C")
|
367
369
|
|
368
|
-
@skip_on_cudasim(
|
370
|
+
@skip_on_cudasim("Typing not done in the simulator")
|
369
371
|
def test_devicearray_typing_order_2d_c(self):
|
370
372
|
# C-order 2D array
|
371
|
-
a = np.zeros((2, 10), order=
|
373
|
+
a = np.zeros((2, 10), order="C")
|
372
374
|
d = cuda.to_device(a)
|
373
|
-
self.assertEqual(d._numba_type_.layout,
|
375
|
+
self.assertEqual(d._numba_type_.layout, "C")
|
374
376
|
|
375
|
-
@skip_on_cudasim(
|
377
|
+
@skip_on_cudasim("Typing not done in the simulator")
|
376
378
|
def test_devicearray_typing_order_2d_f(self):
|
377
379
|
# F-order array that can only be F layout
|
378
|
-
a = np.zeros((2, 10), order=
|
380
|
+
a = np.zeros((2, 10), order="F")
|
379
381
|
d = cuda.to_device(a)
|
380
|
-
self.assertEqual(d._numba_type_.layout,
|
382
|
+
self.assertEqual(d._numba_type_.layout, "F")
|
381
383
|
|
382
|
-
@skip_on_cudasim(
|
384
|
+
@skip_on_cudasim("Typing not done in the simulator")
|
383
385
|
def test_devicearray_typing_order_noncontig_slice_c(self):
|
384
386
|
# Non-contiguous slice of C-order array
|
385
|
-
a = np.zeros((5, 5), order=
|
386
|
-
d = cuda.to_device(a)[:,2]
|
387
|
-
self.assertEqual(d._numba_type_.layout,
|
387
|
+
a = np.zeros((5, 5), order="C")
|
388
|
+
d = cuda.to_device(a)[:, 2]
|
389
|
+
self.assertEqual(d._numba_type_.layout, "A")
|
388
390
|
|
389
|
-
@skip_on_cudasim(
|
391
|
+
@skip_on_cudasim("Typing not done in the simulator")
|
390
392
|
def test_devicearray_typing_order_noncontig_slice_f(self):
|
391
393
|
# Non-contiguous slice of F-order array
|
392
|
-
a = np.zeros((5, 5), order=
|
393
|
-
d = cuda.to_device(a)[2
|
394
|
-
self.assertEqual(d._numba_type_.layout,
|
394
|
+
a = np.zeros((5, 5), order="F")
|
395
|
+
d = cuda.to_device(a)[2, :]
|
396
|
+
self.assertEqual(d._numba_type_.layout, "A")
|
395
397
|
|
396
|
-
@skip_on_cudasim(
|
398
|
+
@skip_on_cudasim("Typing not done in the simulator")
|
397
399
|
def test_devicearray_typing_order_contig_slice_c(self):
|
398
400
|
# Contiguous slice of C-order array
|
399
|
-
a = np.zeros((5, 5), order=
|
400
|
-
d = cuda.to_device(a)[2
|
401
|
-
self.assertEqual(d._numba_type_.layout,
|
401
|
+
a = np.zeros((5, 5), order="C")
|
402
|
+
d = cuda.to_device(a)[2, :]
|
403
|
+
self.assertEqual(d._numba_type_.layout, "C")
|
402
404
|
|
403
|
-
@skip_on_cudasim(
|
405
|
+
@skip_on_cudasim("Typing not done in the simulator")
|
404
406
|
def test_devicearray_typing_order_contig_slice_f(self):
|
405
407
|
# Contiguous slice of F-order array - is both C- and F-contiguous, so
|
406
408
|
# types as 'C' layout
|
407
|
-
a = np.zeros((5, 5), order=
|
408
|
-
d = cuda.to_device(a)[:,2]
|
409
|
-
self.assertEqual(d._numba_type_.layout,
|
409
|
+
a = np.zeros((5, 5), order="F")
|
410
|
+
d = cuda.to_device(a)[:, 2]
|
411
|
+
self.assertEqual(d._numba_type_.layout, "C")
|
410
412
|
|
411
|
-
@skip_on_cudasim(
|
413
|
+
@skip_on_cudasim("Typing not done in the simulator")
|
412
414
|
def test_devicearray_typing_order_broadcasted(self):
|
413
415
|
# Broadcasted array, similar to that used for passing scalars to ufuncs
|
414
416
|
a = np.broadcast_to(np.array([1]), (10,))
|
415
417
|
d = cuda.to_device(a)
|
416
|
-
self.assertEqual(d._numba_type_.layout,
|
418
|
+
self.assertEqual(d._numba_type_.layout, "A")
|
417
419
|
|
418
420
|
def test_bug6697(self):
|
419
421
|
ary = np.arange(10, dtype=np.int16)
|
@@ -421,7 +423,7 @@ class TestCudaNDArray(CUDATestCase):
|
|
421
423
|
got = np.asarray(dary)
|
422
424
|
self.assertEqual(got.dtype, dary.dtype)
|
423
425
|
|
424
|
-
@skip_on_cudasim(
|
426
|
+
@skip_on_cudasim("DeviceNDArray class not present in simulator")
|
425
427
|
def test_issue_8477(self):
|
426
428
|
# Ensure that we can copy a zero-length device array to a zero-length
|
427
429
|
# host array when the strides of the device and host arrays differ -
|
@@ -430,8 +432,9 @@ class TestCudaNDArray(CUDATestCase):
|
|
430
432
|
# https://github.com/numba/numba/issues/8477.
|
431
433
|
|
432
434
|
# Create a device array with shape (0,) and strides (8,)
|
433
|
-
dev_array = devicearray.DeviceNDArray(
|
434
|
-
|
435
|
+
dev_array = devicearray.DeviceNDArray(
|
436
|
+
shape=(0,), strides=(8,), dtype=np.int8
|
437
|
+
)
|
435
438
|
|
436
439
|
# Create a host array with shape (0,) and strides (0,)
|
437
440
|
host_array = np.ndarray(shape=(0,), strides=(0,), dtype=np.int8)
|
@@ -470,8 +473,7 @@ class TestArrayMethod(CUDATestCase):
|
|
470
473
|
dev_array = cuda.to_device(np.asarray([1.0, 2.0, 3.0]))
|
471
474
|
host_array = np.array(dev_array, dtype=dtype)
|
472
475
|
np.testing.assert_equal(
|
473
|
-
host_array,
|
474
|
-
dev_array.copy_to_host().astype(dtype)
|
476
|
+
host_array, dev_array.copy_to_host().astype(dtype)
|
475
477
|
)
|
476
478
|
|
477
479
|
@unittest.skipUnless(IS_NUMPY_2, "NumPy 1.x does not pass copy kwarg")
|
@@ -490,10 +492,13 @@ class TestArrayMethod(CUDATestCase):
|
|
490
492
|
class TestRecarray(CUDATestCase):
|
491
493
|
def test_recarray(self):
|
492
494
|
# From issue #4111
|
493
|
-
a = np.recarray(
|
494
|
-
(
|
495
|
-
|
496
|
-
|
495
|
+
a = np.recarray(
|
496
|
+
(16,),
|
497
|
+
dtype=[
|
498
|
+
("value1", np.int64),
|
499
|
+
("value2", np.float64),
|
500
|
+
],
|
501
|
+
)
|
497
502
|
a.value1 = np.arange(a.size, dtype=np.int64)
|
498
503
|
a.value2 = np.arange(a.size, dtype=np.float64) / 100
|
499
504
|
|
@@ -518,39 +523,39 @@ class TestCoreContiguous(CUDATestCase):
|
|
518
523
|
def _test_against_array_core(self, view):
|
519
524
|
self.assertEqual(
|
520
525
|
devicearray.is_contiguous(view),
|
521
|
-
devicearray.array_core(view).flags[
|
526
|
+
devicearray.array_core(view).flags["C_CONTIGUOUS"],
|
522
527
|
)
|
523
528
|
|
524
529
|
def test_device_array_like_1d(self):
|
525
|
-
d_a = cuda.device_array(10, order=
|
530
|
+
d_a = cuda.device_array(10, order="C")
|
526
531
|
self._test_against_array_core(d_a)
|
527
532
|
|
528
533
|
def test_device_array_like_2d(self):
|
529
|
-
d_a = cuda.device_array((10, 12), order=
|
534
|
+
d_a = cuda.device_array((10, 12), order="C")
|
530
535
|
self._test_against_array_core(d_a)
|
531
536
|
|
532
537
|
def test_device_array_like_2d_transpose(self):
|
533
|
-
d_a = cuda.device_array((10, 12), order=
|
538
|
+
d_a = cuda.device_array((10, 12), order="C")
|
534
539
|
self._test_against_array_core(d_a.T)
|
535
540
|
|
536
541
|
def test_device_array_like_3d(self):
|
537
|
-
d_a = cuda.device_array((10, 12, 14), order=
|
542
|
+
d_a = cuda.device_array((10, 12, 14), order="C")
|
538
543
|
self._test_against_array_core(d_a)
|
539
544
|
|
540
545
|
def test_device_array_like_1d_f(self):
|
541
|
-
d_a = cuda.device_array(10, order=
|
546
|
+
d_a = cuda.device_array(10, order="F")
|
542
547
|
self._test_against_array_core(d_a)
|
543
548
|
|
544
549
|
def test_device_array_like_2d_f(self):
|
545
|
-
d_a = cuda.device_array((10, 12), order=
|
550
|
+
d_a = cuda.device_array((10, 12), order="F")
|
546
551
|
self._test_against_array_core(d_a)
|
547
552
|
|
548
553
|
def test_device_array_like_2d_f_transpose(self):
|
549
|
-
d_a = cuda.device_array((10, 12), order=
|
554
|
+
d_a = cuda.device_array((10, 12), order="F")
|
550
555
|
self._test_against_array_core(d_a.T)
|
551
556
|
|
552
557
|
def test_device_array_like_3d_f(self):
|
553
|
-
d_a = cuda.device_array((10, 12, 14), order=
|
558
|
+
d_a = cuda.device_array((10, 12, 14), order="F")
|
554
559
|
self._test_against_array_core(d_a)
|
555
560
|
|
556
561
|
def test_1d_view(self):
|
@@ -560,7 +565,7 @@ class TestCoreContiguous(CUDATestCase):
|
|
560
565
|
|
561
566
|
def test_1d_view_f(self):
|
562
567
|
shape = 10
|
563
|
-
view = np.zeros(shape, order=
|
568
|
+
view = np.zeros(shape, order="F")[::2]
|
564
569
|
self._test_against_array_core(view)
|
565
570
|
|
566
571
|
def test_2d_view(self):
|
@@ -570,9 +575,9 @@ class TestCoreContiguous(CUDATestCase):
|
|
570
575
|
|
571
576
|
def test_2d_view_f(self):
|
572
577
|
shape = (10, 12)
|
573
|
-
view = np.zeros(shape, order=
|
578
|
+
view = np.zeros(shape, order="F")[::2, ::2]
|
574
579
|
self._test_against_array_core(view)
|
575
580
|
|
576
581
|
|
577
|
-
if __name__ ==
|
582
|
+
if __name__ == "__main__":
|
578
583
|
unittest.main()
|
@@ -3,14 +3,18 @@ from contextlib import contextmanager
|
|
3
3
|
import numpy as np
|
4
4
|
|
5
5
|
from numba import cuda
|
6
|
-
from numba.cuda.testing import (
|
7
|
-
|
6
|
+
from numba.cuda.testing import (
|
7
|
+
unittest,
|
8
|
+
skip_on_cudasim,
|
9
|
+
skip_if_external_memmgr,
|
10
|
+
CUDATestCase,
|
11
|
+
)
|
8
12
|
from numba.tests.support import captured_stderr
|
9
13
|
from numba.core import config
|
10
14
|
|
11
15
|
|
12
|
-
@skip_on_cudasim(
|
13
|
-
@skip_if_external_memmgr(
|
16
|
+
@skip_on_cudasim("not supported on CUDASIM")
|
17
|
+
@skip_if_external_memmgr("Deallocation specific to Numba memory management")
|
14
18
|
class TestDeallocation(CUDATestCase):
|
15
19
|
def test_max_pending_count(self):
|
16
20
|
# get deallocation manager and flush it
|
@@ -41,8 +45,9 @@ class TestDeallocation(CUDATestCase):
|
|
41
45
|
config.CUDA_DEALLOCS_RATIO = max_pending / mi.total
|
42
46
|
# due to round off error (floor is used in calculating
|
43
47
|
# _max_pending_bytes) it can be off by 1.
|
44
|
-
self.assertAlmostEqual(
|
45
|
-
|
48
|
+
self.assertAlmostEqual(
|
49
|
+
deallocs._max_pending_bytes, max_pending, delta=1
|
50
|
+
)
|
46
51
|
|
47
52
|
# allocate half the max size
|
48
53
|
# this will not trigger deallocation
|
@@ -51,8 +56,11 @@ class TestDeallocation(CUDATestCase):
|
|
51
56
|
|
52
57
|
# allocate another remaining
|
53
58
|
# this will not trigger deallocation
|
54
|
-
cuda.to_device(
|
55
|
-
|
59
|
+
cuda.to_device(
|
60
|
+
np.ones(
|
61
|
+
deallocs._max_pending_bytes - deallocs._size, dtype=np.int8
|
62
|
+
)
|
63
|
+
)
|
56
64
|
self.assertEqual(len(deallocs), 2)
|
57
65
|
|
58
66
|
# another byte to trigger .clear()
|
@@ -64,7 +72,7 @@ class TestDeallocation(CUDATestCase):
|
|
64
72
|
|
65
73
|
|
66
74
|
@skip_on_cudasim("defer_cleanup has no effect in CUDASIM")
|
67
|
-
@skip_if_external_memmgr(
|
75
|
+
@skip_if_external_memmgr("Deallocation specific to Numba memory management")
|
68
76
|
class TestDeferCleanup(CUDATestCase):
|
69
77
|
def test_basic(self):
|
70
78
|
harr = np.arange(5)
|
@@ -138,11 +146,12 @@ class TestDeferCleanupAvail(CUDATestCase):
|
|
138
146
|
pass
|
139
147
|
|
140
148
|
|
141
|
-
@skip_on_cudasim(
|
149
|
+
@skip_on_cudasim("not supported on CUDASIM")
|
142
150
|
class TestDel(CUDATestCase):
|
143
151
|
"""
|
144
152
|
Ensure resources are deleted properly without ignored exception.
|
145
153
|
"""
|
154
|
+
|
146
155
|
@contextmanager
|
147
156
|
def check_ignored_exception(self, ctx):
|
148
157
|
with captured_stderr() as cap:
|
@@ -245,5 +254,5 @@ class TestDel(CUDATestCase):
|
|
245
254
|
pass
|
246
255
|
|
247
256
|
|
248
|
-
if __name__ ==
|
257
|
+
if __name__ == "__main__":
|
249
258
|
unittest.main()
|