numba-cuda 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +232 -113
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_fp16.h +661 -661
- numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
- numba_cuda/numba/cuda/cuda_paths.py +291 -99
- numba_cuda/numba/cuda/cudadecl.py +125 -69
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +463 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +317 -233
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +8 -6
- numba_cuda/numba/cuda/decorators.py +75 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +69 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +1 -1
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
- numba_cuda/numba/cuda/intrinsics.py +31 -27
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +139 -102
- numba_cuda/numba/cuda/target.py +64 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +2 -2
- numba_cuda/numba/cuda/vectorizers.py +37 -32
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
- numba_cuda-0.9.0.dist-info/RECORD +253 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.0.dist-info/RECORD +0 -251
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -12,9 +12,11 @@ from numba.tests.support import override_config
|
|
12
12
|
|
13
13
|
|
14
14
|
def _get_matmulcore_gufunc(dtype=float32):
|
15
|
-
@guvectorize(
|
16
|
-
|
17
|
-
|
15
|
+
@guvectorize(
|
16
|
+
[void(dtype[:, :], dtype[:, :], dtype[:, :])],
|
17
|
+
"(m,n),(n,p)->(m,p)",
|
18
|
+
target="cuda",
|
19
|
+
)
|
18
20
|
def matmulcore(A, B, C):
|
19
21
|
m, n = A.shape
|
20
22
|
n, p = B.shape
|
@@ -27,32 +29,33 @@ def _get_matmulcore_gufunc(dtype=float32):
|
|
27
29
|
return matmulcore
|
28
30
|
|
29
31
|
|
30
|
-
@skip_on_cudasim(
|
32
|
+
@skip_on_cudasim("ufunc API unsupported in the simulator")
|
31
33
|
class TestCUDAGufunc(CUDATestCase):
|
32
|
-
|
33
34
|
def test_gufunc_small(self):
|
34
|
-
|
35
35
|
gufunc = _get_matmulcore_gufunc()
|
36
36
|
|
37
37
|
matrix_ct = 2
|
38
|
-
A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(
|
39
|
-
|
40
|
-
|
41
|
-
|
38
|
+
A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(
|
39
|
+
matrix_ct, 2, 4
|
40
|
+
)
|
41
|
+
B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(
|
42
|
+
matrix_ct, 4, 5
|
43
|
+
)
|
42
44
|
|
43
45
|
C = gufunc(A, B)
|
44
46
|
Gold = np.matmul(A, B)
|
45
47
|
self.assertTrue(np.allclose(C, Gold))
|
46
48
|
|
47
49
|
def test_gufunc_auto_transfer(self):
|
48
|
-
|
49
50
|
gufunc = _get_matmulcore_gufunc()
|
50
51
|
|
51
52
|
matrix_ct = 2
|
52
|
-
A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(
|
53
|
-
|
54
|
-
|
55
|
-
|
53
|
+
A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(
|
54
|
+
matrix_ct, 2, 4
|
55
|
+
)
|
56
|
+
B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(
|
57
|
+
matrix_ct, 4, 5
|
58
|
+
)
|
56
59
|
|
57
60
|
dB = cuda.to_device(B)
|
58
61
|
|
@@ -61,24 +64,24 @@ class TestCUDAGufunc(CUDATestCase):
|
|
61
64
|
self.assertTrue(np.allclose(C, Gold))
|
62
65
|
|
63
66
|
def test_gufunc(self):
|
64
|
-
|
65
67
|
gufunc = _get_matmulcore_gufunc()
|
66
68
|
|
67
|
-
matrix_ct = 1001
|
68
|
-
A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(
|
69
|
-
|
70
|
-
|
71
|
-
|
69
|
+
matrix_ct = 1001 # an odd number to test thread/block division in CUDA
|
70
|
+
A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(
|
71
|
+
matrix_ct, 2, 4
|
72
|
+
)
|
73
|
+
B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(
|
74
|
+
matrix_ct, 4, 5
|
75
|
+
)
|
72
76
|
|
73
77
|
C = gufunc(A, B)
|
74
78
|
Gold = np.matmul(A, B)
|
75
79
|
self.assertTrue(np.allclose(C, Gold))
|
76
80
|
|
77
81
|
def test_gufunc_hidim(self):
|
78
|
-
|
79
82
|
gufunc = _get_matmulcore_gufunc()
|
80
83
|
|
81
|
-
matrix_ct = 100
|
84
|
+
matrix_ct = 100 # an odd number to test thread/block division in CUDA
|
82
85
|
A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(4, 25, 2, 4)
|
83
86
|
B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(4, 25, 4, 5)
|
84
87
|
|
@@ -87,7 +90,6 @@ class TestCUDAGufunc(CUDATestCase):
|
|
87
90
|
self.assertTrue(np.allclose(C, Gold))
|
88
91
|
|
89
92
|
def test_gufunc_new_axis(self):
|
90
|
-
|
91
93
|
gufunc = _get_matmulcore_gufunc(dtype=float64)
|
92
94
|
|
93
95
|
X = np.random.randn(10, 3, 3)
|
@@ -102,15 +104,16 @@ class TestCUDAGufunc(CUDATestCase):
|
|
102
104
|
np.testing.assert_allclose(gold, res2)
|
103
105
|
|
104
106
|
def test_gufunc_stream(self):
|
105
|
-
|
106
107
|
gufunc = _get_matmulcore_gufunc()
|
107
108
|
|
108
|
-
#cuda.driver.flush_pending_free()
|
109
|
-
matrix_ct = 1001
|
110
|
-
A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(
|
111
|
-
|
112
|
-
|
113
|
-
|
109
|
+
# cuda.driver.flush_pending_free()
|
110
|
+
matrix_ct = 1001 # an odd number to test thread/block division in CUDA
|
111
|
+
A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(
|
112
|
+
matrix_ct, 2, 4
|
113
|
+
)
|
114
|
+
B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(
|
115
|
+
matrix_ct, 4, 5
|
116
|
+
)
|
114
117
|
|
115
118
|
stream = cuda.stream()
|
116
119
|
dA = cuda.to_device(A, stream)
|
@@ -126,10 +129,7 @@ class TestCUDAGufunc(CUDATestCase):
|
|
126
129
|
self.assertTrue(np.allclose(C, Gold))
|
127
130
|
|
128
131
|
def test_copy(self):
|
129
|
-
|
130
|
-
@guvectorize([void(float32[:], float32[:])],
|
131
|
-
'(x)->(x)',
|
132
|
-
target='cuda')
|
132
|
+
@guvectorize([void(float32[:], float32[:])], "(x)->(x)", target="cuda")
|
133
133
|
def copy(A, B):
|
134
134
|
for i in range(B.size):
|
135
135
|
B[i] = A[i]
|
@@ -142,9 +142,7 @@ class TestCUDAGufunc(CUDATestCase):
|
|
142
142
|
def test_copy_unspecified_return(self):
|
143
143
|
# Ensure that behaviour is correct when the return type is not
|
144
144
|
# specified in the signature.
|
145
|
-
@guvectorize([(float32[:], float32[:])],
|
146
|
-
'(x)->(x)',
|
147
|
-
target='cuda')
|
145
|
+
@guvectorize([(float32[:], float32[:])], "(x)->(x)", target="cuda")
|
148
146
|
def copy(A, B):
|
149
147
|
for i in range(B.size):
|
150
148
|
B[i] = A[i]
|
@@ -155,10 +153,7 @@ class TestCUDAGufunc(CUDATestCase):
|
|
155
153
|
self.assertTrue(np.allclose(A, B))
|
156
154
|
|
157
155
|
def test_copy_odd(self):
|
158
|
-
|
159
|
-
@guvectorize([void(float32[:], float32[:])],
|
160
|
-
'(x)->(x)',
|
161
|
-
target='cuda')
|
156
|
+
@guvectorize([void(float32[:], float32[:])], "(x)->(x)", target="cuda")
|
162
157
|
def copy(A, B):
|
163
158
|
for i in range(B.size):
|
164
159
|
B[i] = A[i]
|
@@ -169,10 +164,11 @@ class TestCUDAGufunc(CUDATestCase):
|
|
169
164
|
self.assertTrue(np.allclose(A, B))
|
170
165
|
|
171
166
|
def test_copy2d(self):
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
167
|
+
@guvectorize(
|
168
|
+
[void(float32[:, :], float32[:, :])],
|
169
|
+
"(x, y)->(x, y)",
|
170
|
+
target="cuda",
|
171
|
+
)
|
176
172
|
def copy2d(A, B):
|
177
173
|
for x in range(B.shape[0]):
|
178
174
|
for y in range(B.shape[1]):
|
@@ -185,8 +181,7 @@ class TestCUDAGufunc(CUDATestCase):
|
|
185
181
|
|
186
182
|
def test_not_supported_call_from_jit(self):
|
187
183
|
# not supported
|
188
|
-
@guvectorize([void(int32[:], int32[:])],
|
189
|
-
'(n)->(n)', target='cuda')
|
184
|
+
@guvectorize([void(int32[:], int32[:])], "(n)->(n)", target="cuda")
|
190
185
|
def gufunc_copy(A, b):
|
191
186
|
for i in range(A.shape[0]):
|
192
187
|
b[i] = A[i]
|
@@ -195,7 +190,7 @@ class TestCUDAGufunc(CUDATestCase):
|
|
195
190
|
def cuda_jit(A, b):
|
196
191
|
return gufunc_copy(A, b)
|
197
192
|
|
198
|
-
A = np.arange(1024 * 32).astype(
|
193
|
+
A = np.arange(1024 * 32).astype("int32")
|
199
194
|
b = np.zeros_like(A)
|
200
195
|
msg = "Untyped global name 'gufunc_copy'.*"
|
201
196
|
with self.assertRaisesRegex(TypingError, msg):
|
@@ -204,56 +199,68 @@ class TestCUDAGufunc(CUDATestCase):
|
|
204
199
|
# Test inefficient use of the GPU where the inputs are all mapped onto a
|
205
200
|
# single thread in a single block.
|
206
201
|
def test_inefficient_launch_configuration(self):
|
207
|
-
@guvectorize(
|
208
|
-
|
202
|
+
@guvectorize(
|
203
|
+
["void(float32[:], float32[:], float32[:])"],
|
204
|
+
"(n),(n)->(n)",
|
205
|
+
target="cuda",
|
206
|
+
)
|
209
207
|
def numba_dist_cuda(a, b, dist):
|
210
208
|
len = a.shape[0]
|
211
209
|
for i in range(len):
|
212
210
|
dist[i] = a[i] * b[i]
|
213
211
|
|
214
|
-
a = np.random.rand(1024 * 32).astype(
|
215
|
-
b = np.random.rand(1024 * 32).astype(
|
216
|
-
dist = np.zeros(a.shape[0]).astype(
|
212
|
+
a = np.random.rand(1024 * 32).astype("float32")
|
213
|
+
b = np.random.rand(1024 * 32).astype("float32")
|
214
|
+
dist = np.zeros(a.shape[0]).astype("float32")
|
217
215
|
|
218
|
-
with override_config(
|
216
|
+
with override_config("CUDA_LOW_OCCUPANCY_WARNINGS", 1):
|
219
217
|
with warnings.catch_warnings(record=True) as w:
|
220
218
|
numba_dist_cuda(a, b, dist)
|
221
219
|
self.assertEqual(w[0].category, NumbaPerformanceWarning)
|
222
|
-
self.assertIn(
|
223
|
-
self.assertIn(
|
220
|
+
self.assertIn("Grid size", str(w[0].message))
|
221
|
+
self.assertIn("low occupancy", str(w[0].message))
|
224
222
|
|
225
223
|
def test_efficient_launch_configuration(self):
|
226
|
-
@guvectorize(
|
227
|
-
|
224
|
+
@guvectorize(
|
225
|
+
["void(float32[:], float32[:], float32[:])"],
|
226
|
+
"(n),(n)->(n)",
|
227
|
+
nopython=True,
|
228
|
+
target="cuda",
|
229
|
+
)
|
228
230
|
def numba_dist_cuda2(a, b, dist):
|
229
231
|
len = a.shape[0]
|
230
232
|
for i in range(len):
|
231
233
|
dist[i] = a[i] * b[i]
|
232
234
|
|
233
|
-
a = np.random.rand(524288 * 2).astype(
|
234
|
-
|
235
|
-
b = np.random.rand(524288 * 2).astype('float32').\
|
236
|
-
reshape((524288, 2))
|
235
|
+
a = np.random.rand(524288 * 2).astype("float32").reshape((524288, 2))
|
236
|
+
b = np.random.rand(524288 * 2).astype("float32").reshape((524288, 2))
|
237
237
|
dist = np.zeros_like(a)
|
238
238
|
|
239
|
-
with override_config(
|
239
|
+
with override_config("CUDA_LOW_OCCUPANCY_WARNINGS", 1):
|
240
240
|
with warnings.catch_warnings(record=True) as w:
|
241
241
|
numba_dist_cuda2(a, b, dist)
|
242
242
|
self.assertEqual(len(w), 0)
|
243
243
|
|
244
244
|
def test_nopython_flag(self):
|
245
|
-
|
246
245
|
def foo(A, B):
|
247
246
|
pass
|
248
247
|
|
249
248
|
# nopython = True is fine
|
250
|
-
guvectorize(
|
251
|
-
|
249
|
+
guvectorize(
|
250
|
+
[void(float32[:], float32[:])],
|
251
|
+
"(x)->(x)",
|
252
|
+
target="cuda",
|
253
|
+
nopython=True,
|
254
|
+
)(foo)
|
252
255
|
|
253
256
|
# nopython = False is bad
|
254
257
|
with self.assertRaises(TypeError) as raises:
|
255
|
-
guvectorize(
|
256
|
-
|
258
|
+
guvectorize(
|
259
|
+
[void(float32[:], float32[:])],
|
260
|
+
"(x)->(x)",
|
261
|
+
target="cuda",
|
262
|
+
nopython=False,
|
263
|
+
)(foo)
|
257
264
|
self.assertEqual("nopython flag must be True", str(raises.exception))
|
258
265
|
|
259
266
|
def test_invalid_flags(self):
|
@@ -262,17 +269,22 @@ class TestCUDAGufunc(CUDATestCase):
|
|
262
269
|
pass
|
263
270
|
|
264
271
|
with self.assertRaises(TypeError) as raises:
|
265
|
-
guvectorize(
|
266
|
-
|
272
|
+
guvectorize(
|
273
|
+
[void(float32[:], float32[:])],
|
274
|
+
"(x)->(x)",
|
275
|
+
target="cuda",
|
276
|
+
what1=True,
|
277
|
+
ever2=False,
|
278
|
+
)(foo)
|
267
279
|
head = "The following target options are not supported:"
|
268
280
|
msg = str(raises.exception)
|
269
|
-
self.assertEqual(msg[:len(head)], head)
|
270
|
-
items = msg[len(head):].strip().split(
|
281
|
+
self.assertEqual(msg[: len(head)], head)
|
282
|
+
items = msg[len(head) :].strip().split(",")
|
271
283
|
items = [i.strip("'\" ") for i in items]
|
272
|
-
self.assertEqual(set([
|
284
|
+
self.assertEqual(set(["what1", "ever2"]), set(items))
|
273
285
|
|
274
286
|
def test_duplicated_output(self):
|
275
|
-
@guvectorize([void(float32[:], float32[:])],
|
287
|
+
@guvectorize([void(float32[:], float32[:])], "(x)->(x)", target="cuda")
|
276
288
|
def foo(inp, out):
|
277
289
|
pass # intentionally empty; never executed
|
278
290
|
|
@@ -284,8 +296,9 @@ class TestCUDAGufunc(CUDATestCase):
|
|
284
296
|
self.assertEqual(str(raises.exception), msg)
|
285
297
|
|
286
298
|
def check_tuple_arg(self, a, b):
|
287
|
-
@guvectorize(
|
288
|
-
|
299
|
+
@guvectorize(
|
300
|
+
[(float64[:], float64[:], float64[:])], "(n),(n)->()", target="cuda"
|
301
|
+
)
|
289
302
|
def gu_reduce(x, y, r):
|
290
303
|
s = 0
|
291
304
|
for i in range(len(x)):
|
@@ -297,44 +310,40 @@ class TestCUDAGufunc(CUDATestCase):
|
|
297
310
|
np.testing.assert_equal(expected, r)
|
298
311
|
|
299
312
|
def test_tuple_of_tuple_arg(self):
|
300
|
-
a = ((1.0, 2.0, 3.0),
|
301
|
-
|
302
|
-
b = ((1.5, 2.5, 3.5),
|
303
|
-
(4.5, 5.5, 6.5))
|
313
|
+
a = ((1.0, 2.0, 3.0), (4.0, 5.0, 6.0))
|
314
|
+
b = ((1.5, 2.5, 3.5), (4.5, 5.5, 6.5))
|
304
315
|
self.check_tuple_arg(a, b)
|
305
316
|
|
306
317
|
def test_tuple_of_namedtuple_arg(self):
|
307
|
-
Point = namedtuple(
|
308
|
-
a = (Point(x=1.0, y=2.0, z=3.0),
|
309
|
-
|
310
|
-
b = (Point(x=1.5, y=2.5, z=3.5),
|
311
|
-
Point(x=4.5, y=5.5, z=6.5))
|
318
|
+
Point = namedtuple("Point", ("x", "y", "z"))
|
319
|
+
a = (Point(x=1.0, y=2.0, z=3.0), Point(x=4.0, y=5.0, z=6.0))
|
320
|
+
b = (Point(x=1.5, y=2.5, z=3.5), Point(x=4.5, y=5.5, z=6.5))
|
312
321
|
self.check_tuple_arg(a, b)
|
313
322
|
|
314
323
|
def test_tuple_of_array_arg(self):
|
315
|
-
a = (np.asarray((1.0, 2.0, 3.0)),
|
316
|
-
|
317
|
-
b = (np.asarray((1.5, 2.5, 3.5)),
|
318
|
-
np.asarray((4.5, 5.5, 6.5)))
|
324
|
+
a = (np.asarray((1.0, 2.0, 3.0)), np.asarray((4.0, 5.0, 6.0)))
|
325
|
+
b = (np.asarray((1.5, 2.5, 3.5)), np.asarray((4.5, 5.5, 6.5)))
|
319
326
|
self.check_tuple_arg(a, b)
|
320
327
|
|
321
328
|
def test_gufunc_name(self):
|
322
329
|
gufunc = _get_matmulcore_gufunc()
|
323
|
-
self.assertEqual(gufunc.__name__,
|
330
|
+
self.assertEqual(gufunc.__name__, "matmulcore")
|
324
331
|
|
325
332
|
def test_bad_return_type(self):
|
326
333
|
with self.assertRaises(TypeError) as te:
|
327
|
-
|
334
|
+
|
335
|
+
@guvectorize([int32(int32[:], int32[:])], "(m)->(m)", target="cuda")
|
328
336
|
def f(x, y):
|
329
337
|
pass
|
330
338
|
|
331
339
|
msg = str(te.exception)
|
332
|
-
self.assertIn(
|
333
|
-
self.assertIn(
|
340
|
+
self.assertIn("guvectorized functions cannot return values", msg)
|
341
|
+
self.assertIn("specifies int32 return type", msg)
|
334
342
|
|
335
343
|
def test_incorrect_number_of_pos_args(self):
|
336
|
-
@guvectorize(
|
337
|
-
|
344
|
+
@guvectorize(
|
345
|
+
[(int32[:], int32[:], int32[:])], "(m),(m)->(m)", target="cuda"
|
346
|
+
)
|
338
347
|
def f(x, y, z):
|
339
348
|
pass
|
340
349
|
|
@@ -345,26 +354,28 @@ class TestCUDAGufunc(CUDATestCase):
|
|
345
354
|
f(arr)
|
346
355
|
|
347
356
|
msg = str(te.exception)
|
348
|
-
self.assertIn(
|
349
|
-
self.assertIn(
|
350
|
-
self.assertIn(
|
357
|
+
self.assertIn("gufunc accepts 2 positional arguments", msg)
|
358
|
+
self.assertIn("or 3 positional arguments", msg)
|
359
|
+
self.assertIn("Got 1 positional argument.", msg)
|
351
360
|
|
352
361
|
# Inputs and outputs, too many
|
353
362
|
with self.assertRaises(TypeError) as te:
|
354
363
|
f(arr, arr, arr, arr)
|
355
364
|
|
356
365
|
msg = str(te.exception)
|
357
|
-
self.assertIn(
|
358
|
-
self.assertIn(
|
359
|
-
self.assertIn(
|
366
|
+
self.assertIn("gufunc accepts 2 positional arguments", msg)
|
367
|
+
self.assertIn("or 3 positional arguments", msg)
|
368
|
+
self.assertIn("Got 4 positional arguments.", msg)
|
360
369
|
|
361
370
|
|
362
|
-
@skip_on_cudasim(
|
371
|
+
@skip_on_cudasim("ufunc API unsupported in the simulator")
|
363
372
|
class TestMultipleOutputs(CUDATestCase):
|
364
373
|
def test_multiple_outputs_same_type_passed_in(self):
|
365
|
-
@guvectorize(
|
366
|
-
|
367
|
-
|
374
|
+
@guvectorize(
|
375
|
+
[void(float32[:], float32[:], float32[:])],
|
376
|
+
"(x)->(x),(x)",
|
377
|
+
target="cuda",
|
378
|
+
)
|
368
379
|
def copy(A, B, C):
|
369
380
|
for i in range(B.size):
|
370
381
|
B[i] = A[i]
|
@@ -378,10 +389,11 @@ class TestMultipleOutputs(CUDATestCase):
|
|
378
389
|
np.testing.assert_allclose(A, C)
|
379
390
|
|
380
391
|
def test_multiple_outputs_distinct_values(self):
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
392
|
+
@guvectorize(
|
393
|
+
[void(float32[:], float32[:], float32[:])],
|
394
|
+
"(x)->(x),(x)",
|
395
|
+
target="cuda",
|
396
|
+
)
|
385
397
|
def copy_and_double(A, B, C):
|
386
398
|
for i in range(B.size):
|
387
399
|
B[i] = A[i]
|
@@ -395,9 +407,11 @@ class TestMultipleOutputs(CUDATestCase):
|
|
395
407
|
np.testing.assert_allclose(A * 2, C)
|
396
408
|
|
397
409
|
def test_multiple_output_allocation(self):
|
398
|
-
@guvectorize(
|
399
|
-
|
400
|
-
|
410
|
+
@guvectorize(
|
411
|
+
[void(float32[:], float32[:], float32[:])],
|
412
|
+
"(x)->(x),(x)",
|
413
|
+
target="cuda",
|
414
|
+
)
|
401
415
|
def copy_and_double(A, B, C):
|
402
416
|
for i in range(B.size):
|
403
417
|
B[i] = A[i]
|
@@ -409,10 +423,11 @@ class TestMultipleOutputs(CUDATestCase):
|
|
409
423
|
np.testing.assert_allclose(A * 2, C)
|
410
424
|
|
411
425
|
def test_multiple_output_dtypes(self):
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
426
|
+
@guvectorize(
|
427
|
+
[void(int32[:], int32[:], float64[:])],
|
428
|
+
"(x)->(x),(x)",
|
429
|
+
target="cuda",
|
430
|
+
)
|
416
431
|
def copy_and_multiply(A, B, C):
|
417
432
|
for i in range(B.size):
|
418
433
|
B[i] = A[i]
|
@@ -426,8 +441,11 @@ class TestMultipleOutputs(CUDATestCase):
|
|
426
441
|
np.testing.assert_allclose(A * np.float64(1.5), C)
|
427
442
|
|
428
443
|
def test_incorrect_number_of_pos_args(self):
|
429
|
-
@guvectorize(
|
430
|
-
|
444
|
+
@guvectorize(
|
445
|
+
[(int32[:], int32[:], int32[:], int32[:])],
|
446
|
+
"(m),(m)->(m),(m)",
|
447
|
+
target="cuda",
|
448
|
+
)
|
431
449
|
def f(x, y, z, w):
|
432
450
|
pass
|
433
451
|
|
@@ -438,19 +456,19 @@ class TestMultipleOutputs(CUDATestCase):
|
|
438
456
|
f(arr)
|
439
457
|
|
440
458
|
msg = str(te.exception)
|
441
|
-
self.assertIn(
|
442
|
-
self.assertIn(
|
443
|
-
self.assertIn(
|
459
|
+
self.assertIn("gufunc accepts 2 positional arguments", msg)
|
460
|
+
self.assertIn("or 4 positional arguments", msg)
|
461
|
+
self.assertIn("Got 1 positional argument.", msg)
|
444
462
|
|
445
463
|
# Inputs and outputs, too many
|
446
464
|
with self.assertRaises(TypeError) as te:
|
447
465
|
f(arr, arr, arr, arr, arr)
|
448
466
|
|
449
467
|
msg = str(te.exception)
|
450
|
-
self.assertIn(
|
451
|
-
self.assertIn(
|
452
|
-
self.assertIn(
|
468
|
+
self.assertIn("gufunc accepts 2 positional arguments", msg)
|
469
|
+
self.assertIn("or 4 positional arguments", msg)
|
470
|
+
self.assertIn("Got 5 positional arguments.", msg)
|
453
471
|
|
454
472
|
|
455
|
-
if __name__ ==
|
473
|
+
if __name__ == "__main__":
|
456
474
|
unittest.main()
|
@@ -3,13 +3,14 @@
|
|
3
3
|
See Numpy documentation for detail about gufunc:
|
4
4
|
http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html
|
5
5
|
"""
|
6
|
+
|
6
7
|
import numpy as np
|
7
8
|
from numba import guvectorize, cuda
|
8
9
|
from numba.cuda.testing import skip_on_cudasim, CUDATestCase
|
9
10
|
import unittest
|
10
11
|
|
11
12
|
|
12
|
-
@skip_on_cudasim(
|
13
|
+
@skip_on_cudasim("ufunc API unsupported in the simulator")
|
13
14
|
class TestGUFuncScalar(CUDATestCase):
|
14
15
|
def test_gufunc_scalar_output(self):
|
15
16
|
# function type:
|
@@ -20,9 +21,9 @@ class TestGUFuncScalar(CUDATestCase):
|
|
20
21
|
# signature: (n)->()
|
21
22
|
# - the function takes an array of n-element and output a scalar.
|
22
23
|
|
23
|
-
@guvectorize([
|
24
|
+
@guvectorize(["void(int32[:], int32[:])"], "(n)->()", target="cuda")
|
24
25
|
def sum_row(inp, out):
|
25
|
-
tmp = 0.
|
26
|
+
tmp = 0.0
|
26
27
|
for i in range(inp.shape[0]):
|
27
28
|
tmp += inp[i]
|
28
29
|
out[0] = tmp
|
@@ -38,15 +39,14 @@ class TestGUFuncScalar(CUDATestCase):
|
|
38
39
|
out1 = np.empty(100, dtype=inp.dtype)
|
39
40
|
out2 = np.empty(100, dtype=inp.dtype)
|
40
41
|
|
41
|
-
dev_inp = cuda.to_device(
|
42
|
-
|
43
|
-
dev_out1 = cuda.to_device(out1, copy=False) # alloc only
|
42
|
+
dev_inp = cuda.to_device(inp) # alloc and copy input data
|
43
|
+
dev_out1 = cuda.to_device(out1, copy=False) # alloc only
|
44
44
|
|
45
|
-
sum_row(dev_inp, out=dev_out1)
|
46
|
-
dev_out2 = sum_row(dev_inp)
|
45
|
+
sum_row(dev_inp, out=dev_out1) # invoke the gufunc
|
46
|
+
dev_out2 = sum_row(dev_inp) # invoke the gufunc
|
47
47
|
|
48
|
-
dev_out1.copy_to_host(out1)
|
49
|
-
dev_out2.copy_to_host(out2)
|
48
|
+
dev_out1.copy_to_host(out1) # retrieve the result
|
49
|
+
dev_out2.copy_to_host(out2) # retrieve the result
|
50
50
|
|
51
51
|
# verify result
|
52
52
|
for i in range(inp.shape[0]):
|
@@ -55,7 +55,7 @@ class TestGUFuncScalar(CUDATestCase):
|
|
55
55
|
|
56
56
|
def test_gufunc_scalar_output_bug(self):
|
57
57
|
# Issue 2812: Error due to using input argument types as output argument
|
58
|
-
@guvectorize([
|
58
|
+
@guvectorize(["void(int32, int32[:])"], "()->()", target="cuda")
|
59
59
|
def twice(inp, out):
|
60
60
|
out[0] = inp * 2
|
61
61
|
|
@@ -64,8 +64,11 @@ class TestGUFuncScalar(CUDATestCase):
|
|
64
64
|
self.assertPreciseEqual(twice(arg), arg * 2)
|
65
65
|
|
66
66
|
def test_gufunc_scalar_input_saxpy(self):
|
67
|
-
@guvectorize(
|
68
|
-
|
67
|
+
@guvectorize(
|
68
|
+
["void(float32, float32[:], float32[:], float32[:])"],
|
69
|
+
"(),(t),(t)->(t)",
|
70
|
+
target="cuda",
|
71
|
+
)
|
69
72
|
def saxpy(a, x, y, out):
|
70
73
|
for i in range(out.shape[0]):
|
71
74
|
out[i] = a * x[i] + y[i]
|
@@ -99,8 +102,9 @@ class TestGUFuncScalar(CUDATestCase):
|
|
99
102
|
self.assertTrue(exp == out[j, i], (exp, out[j, i]))
|
100
103
|
|
101
104
|
def test_gufunc_scalar_cast(self):
|
102
|
-
@guvectorize(
|
103
|
-
|
105
|
+
@guvectorize(
|
106
|
+
["void(int32, int32[:], int32[:])"], "(),(t)->(t)", target="cuda"
|
107
|
+
)
|
104
108
|
def foo(a, b, out):
|
105
109
|
for i in range(b.size):
|
106
110
|
out[i] = a * b[i]
|
@@ -121,8 +125,9 @@ class TestGUFuncScalar(CUDATestCase):
|
|
121
125
|
|
122
126
|
def test_gufunc_old_style_scalar_as_array(self):
|
123
127
|
# Example from issue #2579
|
124
|
-
@guvectorize(
|
125
|
-
|
128
|
+
@guvectorize(
|
129
|
+
["void(int32[:],int32[:],int32[:])"], "(n),()->(n)", target="cuda"
|
130
|
+
)
|
126
131
|
def gufunc(x, y, res):
|
127
132
|
for i in range(x.shape[0]):
|
128
133
|
res[i] = x[i] + y[0]
|
@@ -155,5 +160,5 @@ class TestGUFuncScalar(CUDATestCase):
|
|
155
160
|
np.testing.assert_almost_equal(expected, res)
|
156
161
|
|
157
162
|
|
158
|
-
if __name__ ==
|
163
|
+
if __name__ == "__main__":
|
159
164
|
unittest.main()
|