numba-cuda 0.0.0__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +1 -0
- _numba_cuda_redirector.py +74 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +5 -0
- numba_cuda/_version.py +19 -0
- numba_cuda/numba/cuda/__init__.py +22 -0
- numba_cuda/numba/cuda/api.py +526 -0
- numba_cuda/numba/cuda/api_util.py +30 -0
- numba_cuda/numba/cuda/args.py +77 -0
- numba_cuda/numba/cuda/cg.py +62 -0
- numba_cuda/numba/cuda/codegen.py +378 -0
- numba_cuda/numba/cuda/compiler.py +422 -0
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
- numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
- numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
- numba_cuda/numba/cuda/cuda_paths.py +258 -0
- numba_cuda/numba/cuda/cudadecl.py +806 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
- numba_cuda/numba/cuda/cudadrv/error.py +36 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
- numba_cuda/numba/cuda/cudaimpl.py +1055 -0
- numba_cuda/numba/cuda/cudamath.py +140 -0
- numba_cuda/numba/cuda/decorators.py +189 -0
- numba_cuda/numba/cuda/descriptor.py +33 -0
- numba_cuda/numba/cuda/device_init.py +89 -0
- numba_cuda/numba/cuda/deviceufunc.py +908 -0
- numba_cuda/numba/cuda/dispatcher.py +1057 -0
- numba_cuda/numba/cuda/errors.py +59 -0
- numba_cuda/numba/cuda/extending.py +7 -0
- numba_cuda/numba/cuda/initialize.py +13 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
- numba_cuda/numba/cuda/intrinsics.py +198 -0
- numba_cuda/numba/cuda/kernels/__init__.py +0 -0
- numba_cuda/numba/cuda/kernels/reduction.py +262 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3382 -0
- numba_cuda/numba/cuda/libdevicedecl.py +17 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
- numba_cuda/numba/cuda/mathimpl.py +448 -0
- numba_cuda/numba/cuda/models.py +48 -0
- numba_cuda/numba/cuda/nvvmutils.py +235 -0
- numba_cuda/numba/cuda/printimpl.py +86 -0
- numba_cuda/numba/cuda/random.py +292 -0
- numba_cuda/numba/cuda/simulator/__init__.py +38 -0
- numba_cuda/numba/cuda/simulator/api.py +110 -0
- numba_cuda/numba/cuda/simulator/compiler.py +9 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
- numba_cuda/numba/cuda/simulator/kernel.py +308 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
- numba_cuda/numba/cuda/simulator/reduction.py +15 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
- numba_cuda/numba/cuda/simulator_init.py +17 -0
- numba_cuda/numba/cuda/stubs.py +902 -0
- numba_cuda/numba/cuda/target.py +440 -0
- numba_cuda/numba/cuda/testing.py +202 -0
- numba_cuda/numba/cuda/tests/__init__.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +7 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
- numba_cuda/numba/cuda/types.py +37 -0
- numba_cuda/numba/cuda/ufuncs.py +662 -0
- numba_cuda/numba/cuda/vector_types.py +209 -0
- numba_cuda/numba/cuda/vectorizers.py +252 -0
- numba_cuda-0.0.12.dist-info/LICENSE +25 -0
- numba_cuda-0.0.12.dist-info/METADATA +68 -0
- numba_cuda-0.0.12.dist-info/RECORD +231 -0
- {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
- numba_cuda-0.0.0.dist-info/METADATA +0 -6
- numba_cuda-0.0.0.dist-info/RECORD +0 -5
- {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,125 @@
|
|
1
|
+
from numba import cuda
|
2
|
+
from numba.core.errors import TypingError
|
3
|
+
from numba.cuda.testing import CUDATestCase, skip_on_cudasim
|
4
|
+
import numpy as np
|
5
|
+
import unittest
|
6
|
+
|
7
|
+
|
8
|
+
class TestSelfRecursion(CUDATestCase):
|
9
|
+
|
10
|
+
def setUp(self):
|
11
|
+
# Avoid importing this module at the top level, as it triggers
|
12
|
+
# compilation and can therefore fail
|
13
|
+
from numba.cuda.tests.cudapy import recursion_usecases
|
14
|
+
self.mod = recursion_usecases
|
15
|
+
super().setUp()
|
16
|
+
|
17
|
+
def check_fib(self, cfunc):
|
18
|
+
@cuda.jit
|
19
|
+
def kernel(r, x):
|
20
|
+
r[0] = cfunc(x[0])
|
21
|
+
|
22
|
+
x = np.asarray([10], dtype=np.int64)
|
23
|
+
r = np.zeros_like(x)
|
24
|
+
kernel[1, 1](r, x)
|
25
|
+
|
26
|
+
actual = r[0]
|
27
|
+
expected = 55
|
28
|
+
self.assertPreciseEqual(actual, expected)
|
29
|
+
|
30
|
+
def test_global_explicit_sig(self):
|
31
|
+
self.check_fib(self.mod.fib1)
|
32
|
+
|
33
|
+
def test_inner_explicit_sig(self):
|
34
|
+
self.check_fib(self.mod.fib2)
|
35
|
+
|
36
|
+
def test_global_implicit_sig(self):
|
37
|
+
self.check_fib(self.mod.fib3)
|
38
|
+
|
39
|
+
@skip_on_cudasim('Simulator does not compile')
|
40
|
+
def test_runaway(self):
|
41
|
+
with self.assertRaises(TypingError) as raises:
|
42
|
+
cfunc = self.mod.runaway_self
|
43
|
+
|
44
|
+
@cuda.jit('void()')
|
45
|
+
def kernel():
|
46
|
+
cfunc(1)
|
47
|
+
|
48
|
+
self.assertIn("cannot type infer runaway recursion",
|
49
|
+
str(raises.exception))
|
50
|
+
|
51
|
+
@unittest.skip('Needs insert_unresolved_ref support in target')
|
52
|
+
def test_type_change(self):
|
53
|
+
pfunc = self.mod.type_change_self.py_func
|
54
|
+
cfunc = self.mod.type_change_self
|
55
|
+
|
56
|
+
@cuda.jit
|
57
|
+
def kernel(r, x, y):
|
58
|
+
r[0] = cfunc(x[0], y[0])
|
59
|
+
|
60
|
+
args = 13, 0.125
|
61
|
+
x = np.asarray([args[0]], dtype=np.int64)
|
62
|
+
y = np.asarray([args[1]], dtype=np.float64)
|
63
|
+
r = np.zeros_like(x)
|
64
|
+
|
65
|
+
kernel[1, 1](r, x, y)
|
66
|
+
|
67
|
+
expected = pfunc(*args)
|
68
|
+
actual = r[0]
|
69
|
+
|
70
|
+
self.assertPreciseEqual(actual, expected)
|
71
|
+
|
72
|
+
@unittest.expectedFailure
|
73
|
+
def test_raise(self):
|
74
|
+
# This is an expected failure because reporting of exceptions raised in
|
75
|
+
# device functions does not work correctly - see Issue #8036:
|
76
|
+
# https://github.com/numba/numba/issues/8036
|
77
|
+
with self.assertRaises(ValueError) as raises:
|
78
|
+
self.mod.raise_self_kernel[1, 1](3)
|
79
|
+
|
80
|
+
self.assertEqual(str(raises.exception), "raise_self")
|
81
|
+
|
82
|
+
@unittest.skip('Needs insert_unresolved_ref support in target')
|
83
|
+
def test_optional_return(self):
|
84
|
+
pfunc = self.mod.make_optional_return_case()
|
85
|
+
cfunc = self.mod.make_optional_return_case(cuda.jit)
|
86
|
+
|
87
|
+
@cuda.jit
|
88
|
+
def kernel(r, x):
|
89
|
+
res = cfunc(x[0])
|
90
|
+
if res is None:
|
91
|
+
res = 999
|
92
|
+
r[0] = res
|
93
|
+
|
94
|
+
def cpu_kernel(x):
|
95
|
+
res = pfunc(x)
|
96
|
+
if res is None:
|
97
|
+
res = 999
|
98
|
+
return res
|
99
|
+
|
100
|
+
for arg in (0, 5, 10, 15):
|
101
|
+
expected = cpu_kernel(arg)
|
102
|
+
x = np.asarray([arg], dtype=np.int64)
|
103
|
+
r = np.zeros_like(x)
|
104
|
+
kernel[1, 1](r, x)
|
105
|
+
actual = r[0]
|
106
|
+
|
107
|
+
self.assertEqual(expected, actual)
|
108
|
+
|
109
|
+
@skip_on_cudasim('Recursion handled because simulator does not compile')
|
110
|
+
def test_growing_return_tuple(self):
|
111
|
+
cfunc = self.mod.make_growing_tuple_case(cuda.jit)
|
112
|
+
|
113
|
+
with self.assertRaises(TypingError) as raises:
|
114
|
+
@cuda.jit('void()')
|
115
|
+
def kernel():
|
116
|
+
cfunc(100)
|
117
|
+
|
118
|
+
self.assertIn(
|
119
|
+
"Return type of recursive function does not converge",
|
120
|
+
str(raises.exception),
|
121
|
+
)
|
122
|
+
|
123
|
+
|
124
|
+
if __name__ == '__main__':
|
125
|
+
unittest.main()
|
@@ -0,0 +1,76 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from numba import cuda
|
3
|
+
from numba.core.config import ENABLE_CUDASIM
|
4
|
+
from numba.cuda.testing import CUDATestCase
|
5
|
+
import unittest
|
6
|
+
|
7
|
+
# Avoid recompilation of the sum_reduce function by keeping it at global scope
|
8
|
+
sum_reduce = cuda.Reduce(lambda a, b: a + b)
|
9
|
+
|
10
|
+
|
11
|
+
class TestReduction(CUDATestCase):
|
12
|
+
def _sum_reduce(self, n):
|
13
|
+
A = (np.arange(n, dtype=np.float64) + 1)
|
14
|
+
expect = A.sum()
|
15
|
+
got = sum_reduce(A)
|
16
|
+
self.assertEqual(expect, got)
|
17
|
+
|
18
|
+
def test_sum_reduce(self):
|
19
|
+
if ENABLE_CUDASIM:
|
20
|
+
# Minimal test set for the simulator (which only wraps
|
21
|
+
# functools.reduce)
|
22
|
+
test_sizes = [ 1, 16 ]
|
23
|
+
else:
|
24
|
+
# Tests around the points where blocksize changes, and around larger
|
25
|
+
# powers of two, sums of powers of two, and some "random" sizes
|
26
|
+
test_sizes = [ 1, 15, 16, 17, 127, 128, 129, 1023, 1024,
|
27
|
+
1025, 1536, 1048576, 1049600, 1049728, 34567 ]
|
28
|
+
# Avoid recompilation by keeping sum_reduce here
|
29
|
+
for n in test_sizes:
|
30
|
+
self._sum_reduce(n)
|
31
|
+
|
32
|
+
def test_empty_array_host(self):
|
33
|
+
A = (np.arange(0, dtype=np.float64) + 1)
|
34
|
+
expect = A.sum()
|
35
|
+
got = sum_reduce(A)
|
36
|
+
self.assertEqual(expect, got)
|
37
|
+
|
38
|
+
def test_empty_array_device(self):
|
39
|
+
A = (np.arange(0, dtype=np.float64) + 1)
|
40
|
+
dA = cuda.to_device(A)
|
41
|
+
expect = A.sum()
|
42
|
+
got = sum_reduce(dA)
|
43
|
+
self.assertEqual(expect, got)
|
44
|
+
|
45
|
+
def test_prod_reduce(self):
|
46
|
+
prod_reduce = cuda.reduce(lambda a, b: a * b)
|
47
|
+
A = (np.arange(64, dtype=np.float64) + 1)
|
48
|
+
expect = A.prod()
|
49
|
+
got = prod_reduce(A, init=1)
|
50
|
+
np.testing.assert_allclose(expect, got)
|
51
|
+
|
52
|
+
def test_max_reduce(self):
|
53
|
+
max_reduce = cuda.Reduce(lambda a, b: max(a, b))
|
54
|
+
A = (np.arange(3717, dtype=np.float64) + 1)
|
55
|
+
expect = A.max()
|
56
|
+
got = max_reduce(A, init=0)
|
57
|
+
self.assertEqual(expect, got)
|
58
|
+
|
59
|
+
def test_non_identity_init(self):
|
60
|
+
init = 3
|
61
|
+
A = (np.arange(10, dtype=np.float64) + 1)
|
62
|
+
expect = A.sum() + init
|
63
|
+
got = sum_reduce(A, init=init)
|
64
|
+
self.assertEqual(expect, got)
|
65
|
+
|
66
|
+
def test_result_on_device(self):
|
67
|
+
A = (np.arange(10, dtype=np.float64) + 1)
|
68
|
+
got = cuda.to_device(np.zeros(1, dtype=np.float64))
|
69
|
+
expect = A.sum()
|
70
|
+
res = sum_reduce(A, res=got)
|
71
|
+
self.assertIsNone(res)
|
72
|
+
self.assertEqual(expect, got[0])
|
73
|
+
|
74
|
+
|
75
|
+
if __name__ == '__main__':
|
76
|
+
unittest.main()
|
@@ -0,0 +1,83 @@
|
|
1
|
+
import numpy as np
|
2
|
+
|
3
|
+
from numba import cuda
|
4
|
+
from numba.cuda.args import wrap_arg
|
5
|
+
from numba.cuda.testing import CUDATestCase
|
6
|
+
import unittest
|
7
|
+
|
8
|
+
|
9
|
+
class DefaultIn(object):
|
10
|
+
def prepare_args(self, ty, val, **kwargs):
|
11
|
+
return ty, wrap_arg(val, default=cuda.In)
|
12
|
+
|
13
|
+
|
14
|
+
def nocopy(kernel):
|
15
|
+
kernel.extensions.append(DefaultIn())
|
16
|
+
return kernel
|
17
|
+
|
18
|
+
|
19
|
+
def set_array_to_three(arr):
|
20
|
+
arr[0] = 3
|
21
|
+
|
22
|
+
|
23
|
+
def set_record_to_three(rec):
|
24
|
+
rec[0]['b'] = 3
|
25
|
+
|
26
|
+
|
27
|
+
recordtype = np.dtype(
|
28
|
+
[('b', np.int32)],
|
29
|
+
align=True
|
30
|
+
)
|
31
|
+
|
32
|
+
|
33
|
+
class TestRetrieveAutoconvertedArrays(CUDATestCase):
|
34
|
+
def setUp(self):
|
35
|
+
super().setUp()
|
36
|
+
self.set_array_to_three = cuda.jit(set_array_to_three)
|
37
|
+
self.set_array_to_three_nocopy = nocopy(cuda.jit(set_array_to_three))
|
38
|
+
self.set_record_to_three = cuda.jit(set_record_to_three)
|
39
|
+
self.set_record_to_three_nocopy = nocopy(cuda.jit(set_record_to_three))
|
40
|
+
|
41
|
+
def test_array_inout(self):
|
42
|
+
host_arr = np.zeros(1, dtype=np.int64)
|
43
|
+
self.set_array_to_three[1, 1](cuda.InOut(host_arr))
|
44
|
+
self.assertEqual(3, host_arr[0])
|
45
|
+
|
46
|
+
def test_array_in(self):
|
47
|
+
host_arr = np.zeros(1, dtype=np.int64)
|
48
|
+
self.set_array_to_three[1, 1](cuda.In(host_arr))
|
49
|
+
self.assertEqual(0, host_arr[0])
|
50
|
+
|
51
|
+
def test_array_in_from_config(self):
|
52
|
+
host_arr = np.zeros(1, dtype=np.int64)
|
53
|
+
self.set_array_to_three_nocopy[1, 1](host_arr)
|
54
|
+
self.assertEqual(0, host_arr[0])
|
55
|
+
|
56
|
+
def test_array_default(self):
|
57
|
+
host_arr = np.zeros(1, dtype=np.int64)
|
58
|
+
self.set_array_to_three[1, 1](host_arr)
|
59
|
+
self.assertEqual(3, host_arr[0])
|
60
|
+
|
61
|
+
def test_record_in(self):
|
62
|
+
host_rec = np.zeros(1, dtype=recordtype)
|
63
|
+
self.set_record_to_three[1, 1](cuda.In(host_rec))
|
64
|
+
self.assertEqual(0, host_rec[0]['b'])
|
65
|
+
|
66
|
+
def test_record_inout(self):
|
67
|
+
host_rec = np.zeros(1, dtype=recordtype)
|
68
|
+
self.set_record_to_three[1, 1](cuda.InOut(host_rec))
|
69
|
+
self.assertEqual(3, host_rec[0]['b'])
|
70
|
+
|
71
|
+
def test_record_default(self):
|
72
|
+
host_rec = np.zeros(1, dtype=recordtype)
|
73
|
+
self.set_record_to_three[1, 1](host_rec)
|
74
|
+
self.assertEqual(3, host_rec[0]['b'])
|
75
|
+
|
76
|
+
def test_record_in_from_config(self):
|
77
|
+
host_rec = np.zeros(1, dtype=recordtype)
|
78
|
+
self.set_record_to_three_nocopy[1, 1](host_rec)
|
79
|
+
self.assertEqual(0, host_rec[0]['b'])
|
80
|
+
|
81
|
+
|
82
|
+
if __name__ == '__main__':
|
83
|
+
unittest.main()
|
@@ -0,0 +1,85 @@
|
|
1
|
+
import pickle
|
2
|
+
import numpy as np
|
3
|
+
from numba import cuda, vectorize
|
4
|
+
from numba.core import types
|
5
|
+
from numba.cuda.testing import skip_on_cudasim, CUDATestCase
|
6
|
+
import unittest
|
7
|
+
from numba.np import numpy_support
|
8
|
+
|
9
|
+
|
10
|
+
@skip_on_cudasim('pickling not supported in CUDASIM')
|
11
|
+
class TestPickle(CUDATestCase):
|
12
|
+
|
13
|
+
def check_call(self, callee):
|
14
|
+
arr = np.array([100])
|
15
|
+
expected = callee[1, 1](arr)
|
16
|
+
|
17
|
+
# serialize and rebuild
|
18
|
+
foo1 = pickle.loads(pickle.dumps(callee))
|
19
|
+
del callee
|
20
|
+
# call rebuild function
|
21
|
+
got1 = foo1[1, 1](arr)
|
22
|
+
np.testing.assert_equal(got1, expected)
|
23
|
+
del got1
|
24
|
+
|
25
|
+
# test serialization of previously serialized object
|
26
|
+
foo2 = pickle.loads(pickle.dumps(foo1))
|
27
|
+
del foo1
|
28
|
+
# call rebuild function
|
29
|
+
got2 = foo2[1, 1](arr)
|
30
|
+
np.testing.assert_equal(got2, expected)
|
31
|
+
del got2
|
32
|
+
|
33
|
+
# test propagation of thread, block config
|
34
|
+
foo3 = pickle.loads(pickle.dumps(foo2[5, 8]))
|
35
|
+
del foo2
|
36
|
+
self.assertEqual(foo3.griddim, (5, 1, 1))
|
37
|
+
self.assertEqual(foo3.blockdim, (8, 1, 1))
|
38
|
+
|
39
|
+
def test_pickling_jit_typing(self):
|
40
|
+
@cuda.jit(device=True)
|
41
|
+
def inner(a):
|
42
|
+
return a + 1
|
43
|
+
|
44
|
+
@cuda.jit('void(intp[:])')
|
45
|
+
def foo(arr):
|
46
|
+
arr[0] = inner(arr[0])
|
47
|
+
|
48
|
+
self.check_call(foo)
|
49
|
+
|
50
|
+
def test_pickling_jit(self):
|
51
|
+
|
52
|
+
@cuda.jit(device=True)
|
53
|
+
def inner(a):
|
54
|
+
return a + 1
|
55
|
+
|
56
|
+
@cuda.jit
|
57
|
+
def foo(arr):
|
58
|
+
arr[0] = inner(arr[0])
|
59
|
+
|
60
|
+
self.check_call(foo)
|
61
|
+
|
62
|
+
def test_pickling_vectorize(self):
|
63
|
+
@vectorize(['intp(intp)', 'float64(float64)'], target='cuda')
|
64
|
+
def cuda_vect(x):
|
65
|
+
return x * 2
|
66
|
+
|
67
|
+
# accommodate int representations in np.arange
|
68
|
+
npty = numpy_support.as_dtype(types.intp)
|
69
|
+
# get expected result
|
70
|
+
ary = np.arange(10, dtype=npty)
|
71
|
+
expected = cuda_vect(ary)
|
72
|
+
# first pickle
|
73
|
+
foo1 = pickle.loads(pickle.dumps(cuda_vect))
|
74
|
+
del cuda_vect
|
75
|
+
got1 = foo1(ary)
|
76
|
+
np.testing.assert_equal(expected, got1)
|
77
|
+
# second pickle
|
78
|
+
foo2 = pickle.loads(pickle.dumps(foo1))
|
79
|
+
del foo1
|
80
|
+
got2 = foo2(ary)
|
81
|
+
np.testing.assert_equal(expected, got2)
|
82
|
+
|
83
|
+
|
84
|
+
if __name__ == '__main__':
|
85
|
+
unittest.main()
|
@@ -0,0 +1,37 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from numba import cuda
|
3
|
+
from numba.cuda.testing import unittest, CUDATestCase
|
4
|
+
|
5
|
+
|
6
|
+
def foo(inp, out):
|
7
|
+
for i in range(out.shape[0]):
|
8
|
+
out[i] = inp[i]
|
9
|
+
|
10
|
+
|
11
|
+
def copy(inp, out):
|
12
|
+
i = cuda.grid(1)
|
13
|
+
cufoo(inp[i, :], out[i, :])
|
14
|
+
|
15
|
+
|
16
|
+
class TestCudaSlicing(CUDATestCase):
|
17
|
+
def test_slice_as_arg(self):
|
18
|
+
global cufoo
|
19
|
+
cufoo = cuda.jit("void(int32[:], int32[:])", device=True)(foo)
|
20
|
+
cucopy = cuda.jit("void(int32[:,:], int32[:,:])")(copy)
|
21
|
+
|
22
|
+
inp = np.arange(100, dtype=np.int32).reshape(10, 10)
|
23
|
+
out = np.zeros_like(inp)
|
24
|
+
|
25
|
+
cucopy[1, 10](inp, out)
|
26
|
+
|
27
|
+
def test_assign_empty_slice(self):
|
28
|
+
# Issue #5017. Assigning to an empty slice should not result in a
|
29
|
+
# CudaAPIError.
|
30
|
+
N = 0
|
31
|
+
a = range(N)
|
32
|
+
arr = cuda.device_array(len(a))
|
33
|
+
arr[:] = cuda.to_device(a)
|
34
|
+
|
35
|
+
|
36
|
+
if __name__ == '__main__':
|
37
|
+
unittest.main()
|