numba-cuda 0.0.0__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +1 -0
- _numba_cuda_redirector.py +74 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +5 -0
- numba_cuda/_version.py +19 -0
- numba_cuda/numba/cuda/__init__.py +22 -0
- numba_cuda/numba/cuda/api.py +526 -0
- numba_cuda/numba/cuda/api_util.py +30 -0
- numba_cuda/numba/cuda/args.py +77 -0
- numba_cuda/numba/cuda/cg.py +62 -0
- numba_cuda/numba/cuda/codegen.py +378 -0
- numba_cuda/numba/cuda/compiler.py +422 -0
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
- numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
- numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
- numba_cuda/numba/cuda/cuda_paths.py +258 -0
- numba_cuda/numba/cuda/cudadecl.py +806 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
- numba_cuda/numba/cuda/cudadrv/error.py +36 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
- numba_cuda/numba/cuda/cudaimpl.py +1055 -0
- numba_cuda/numba/cuda/cudamath.py +140 -0
- numba_cuda/numba/cuda/decorators.py +189 -0
- numba_cuda/numba/cuda/descriptor.py +33 -0
- numba_cuda/numba/cuda/device_init.py +89 -0
- numba_cuda/numba/cuda/deviceufunc.py +908 -0
- numba_cuda/numba/cuda/dispatcher.py +1057 -0
- numba_cuda/numba/cuda/errors.py +59 -0
- numba_cuda/numba/cuda/extending.py +7 -0
- numba_cuda/numba/cuda/initialize.py +13 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
- numba_cuda/numba/cuda/intrinsics.py +198 -0
- numba_cuda/numba/cuda/kernels/__init__.py +0 -0
- numba_cuda/numba/cuda/kernels/reduction.py +262 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3382 -0
- numba_cuda/numba/cuda/libdevicedecl.py +17 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
- numba_cuda/numba/cuda/mathimpl.py +448 -0
- numba_cuda/numba/cuda/models.py +48 -0
- numba_cuda/numba/cuda/nvvmutils.py +235 -0
- numba_cuda/numba/cuda/printimpl.py +86 -0
- numba_cuda/numba/cuda/random.py +292 -0
- numba_cuda/numba/cuda/simulator/__init__.py +38 -0
- numba_cuda/numba/cuda/simulator/api.py +110 -0
- numba_cuda/numba/cuda/simulator/compiler.py +9 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
- numba_cuda/numba/cuda/simulator/kernel.py +308 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
- numba_cuda/numba/cuda/simulator/reduction.py +15 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
- numba_cuda/numba/cuda/simulator_init.py +17 -0
- numba_cuda/numba/cuda/stubs.py +902 -0
- numba_cuda/numba/cuda/target.py +440 -0
- numba_cuda/numba/cuda/testing.py +202 -0
- numba_cuda/numba/cuda/tests/__init__.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +7 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
- numba_cuda/numba/cuda/types.py +37 -0
- numba_cuda/numba/cuda/ufuncs.py +662 -0
- numba_cuda/numba/cuda/vector_types.py +209 -0
- numba_cuda/numba/cuda/vectorizers.py +252 -0
- numba_cuda-0.0.12.dist-info/LICENSE +25 -0
- numba_cuda-0.0.12.dist-info/METADATA +68 -0
- numba_cuda-0.0.12.dist-info/RECORD +231 -0
- {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
- numba_cuda-0.0.0.dist-info/METADATA +0 -6
- numba_cuda-0.0.0.dist-info/RECORD +0 -5
- {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,234 @@
|
|
1
|
+
from numba import cuda
|
2
|
+
from numba.cuda.testing import CUDATestCase
|
3
|
+
import numpy as np
|
4
|
+
import sys
|
5
|
+
|
6
|
+
|
7
|
+
class UseCase:
|
8
|
+
"""
|
9
|
+
Provide a way to call a kernel as if it were a function.
|
10
|
+
|
11
|
+
This allows the CUDA cache tests to closely match the CPU cache tests, and
|
12
|
+
also to support calling cache use cases as njitted functions. The class
|
13
|
+
wraps a function that takes an array for the return value and arguments,
|
14
|
+
and provides an interface that accepts arguments, launches the kernel
|
15
|
+
appropriately, and returns the stored return value.
|
16
|
+
|
17
|
+
The return type is inferred from the type of the first argument, unless it
|
18
|
+
is explicitly overridden by the ``retty`` kwarg.
|
19
|
+
"""
|
20
|
+
def __init__(self, func, retty=None):
|
21
|
+
self._func = func
|
22
|
+
self._retty = retty
|
23
|
+
|
24
|
+
def __call__(self, *args):
|
25
|
+
array_args = [np.asarray(arg) for arg in args]
|
26
|
+
if self._retty:
|
27
|
+
array_return = np.ndarray((), dtype=self._retty)
|
28
|
+
else:
|
29
|
+
array_return = np.zeros_like(array_args[0])
|
30
|
+
|
31
|
+
self._call(array_return, *array_args)
|
32
|
+
return array_return[()]
|
33
|
+
|
34
|
+
@property
|
35
|
+
def func(self):
|
36
|
+
return self._func
|
37
|
+
|
38
|
+
|
39
|
+
class CUDAUseCase(UseCase):
|
40
|
+
def _call(self, ret, *args):
|
41
|
+
self._func[1, 1](ret, *args)
|
42
|
+
|
43
|
+
|
44
|
+
@cuda.jit(cache=True)
|
45
|
+
def add_usecase_kernel(r, x, y):
|
46
|
+
r[()] = x[()] + y[()] + Z
|
47
|
+
|
48
|
+
|
49
|
+
@cuda.jit(cache=False)
|
50
|
+
def add_nocache_usecase_kernel(r, x, y):
|
51
|
+
r[()] = x[()] + y[()] + Z
|
52
|
+
|
53
|
+
|
54
|
+
add_usecase = CUDAUseCase(add_usecase_kernel)
|
55
|
+
add_nocache_usecase = CUDAUseCase(add_nocache_usecase_kernel)
|
56
|
+
|
57
|
+
Z = 1
|
58
|
+
|
59
|
+
|
60
|
+
# Inner / outer cached / uncached cases
|
61
|
+
|
62
|
+
@cuda.jit(cache=True)
|
63
|
+
def inner(x, y):
|
64
|
+
return x + y + Z
|
65
|
+
|
66
|
+
|
67
|
+
@cuda.jit(cache=True)
|
68
|
+
def outer_kernel(r, x, y):
|
69
|
+
r[()] = inner(-y[()], x[()])
|
70
|
+
|
71
|
+
|
72
|
+
@cuda.jit(cache=False)
|
73
|
+
def outer_uncached_kernel(r, x, y):
|
74
|
+
r[()] = inner(-y[()], x[()])
|
75
|
+
|
76
|
+
|
77
|
+
outer = CUDAUseCase(outer_kernel)
|
78
|
+
outer_uncached = CUDAUseCase(outer_uncached_kernel)
|
79
|
+
|
80
|
+
|
81
|
+
# Exercise returning a record instance. This used to hardcode the dtype
|
82
|
+
# pointer's value in the bitcode.
|
83
|
+
|
84
|
+
packed_record_type = np.dtype([('a', np.int8), ('b', np.float64)])
|
85
|
+
aligned_record_type = np.dtype([('a', np.int8), ('b', np.float64)], align=True)
|
86
|
+
|
87
|
+
packed_arr = np.empty(2, dtype=packed_record_type)
|
88
|
+
for i in range(packed_arr.size):
|
89
|
+
packed_arr[i]['a'] = i + 1
|
90
|
+
packed_arr[i]['b'] = i + 42.5
|
91
|
+
|
92
|
+
aligned_arr = np.array(packed_arr, dtype=aligned_record_type)
|
93
|
+
|
94
|
+
|
95
|
+
@cuda.jit(cache=True)
|
96
|
+
def record_return(r, ary, i):
|
97
|
+
r[()] = ary[i]
|
98
|
+
|
99
|
+
|
100
|
+
record_return_packed = CUDAUseCase(record_return, retty=packed_record_type)
|
101
|
+
record_return_aligned = CUDAUseCase(record_return, retty=aligned_record_type)
|
102
|
+
|
103
|
+
|
104
|
+
# Closure test cases
|
105
|
+
|
106
|
+
def make_closure(x):
|
107
|
+
@cuda.jit(cache=True)
|
108
|
+
def closure(r, y):
|
109
|
+
r[()] = x + y[()]
|
110
|
+
|
111
|
+
return CUDAUseCase(closure)
|
112
|
+
|
113
|
+
|
114
|
+
closure1 = make_closure(3)
|
115
|
+
closure2 = make_closure(5)
|
116
|
+
closure3 = make_closure(7)
|
117
|
+
closure4 = make_closure(9)
|
118
|
+
|
119
|
+
|
120
|
+
# Ambiguous / renamed functions
|
121
|
+
|
122
|
+
@cuda.jit(cache=True)
|
123
|
+
def ambiguous_function(r, x):
|
124
|
+
r[()] = x[()] + 2
|
125
|
+
|
126
|
+
|
127
|
+
renamed_function1 = CUDAUseCase(ambiguous_function)
|
128
|
+
|
129
|
+
|
130
|
+
@cuda.jit(cache=True)
|
131
|
+
def ambiguous_function(r, x):
|
132
|
+
r[()] = x[()] + 6
|
133
|
+
|
134
|
+
|
135
|
+
renamed_function2 = CUDAUseCase(ambiguous_function)
|
136
|
+
|
137
|
+
|
138
|
+
@cuda.jit(cache=True)
|
139
|
+
def many_locals():
|
140
|
+
aa = cuda.local.array((1, 1), np.float64)
|
141
|
+
ab = cuda.local.array((1, 1), np.float64)
|
142
|
+
ac = cuda.local.array((1, 1), np.float64)
|
143
|
+
ad = cuda.local.array((1, 1), np.float64)
|
144
|
+
ae = cuda.local.array((1, 1), np.float64)
|
145
|
+
af = cuda.local.array((1, 1), np.float64)
|
146
|
+
ag = cuda.local.array((1, 1), np.float64)
|
147
|
+
ah = cuda.local.array((1, 1), np.float64)
|
148
|
+
ai = cuda.local.array((1, 1), np.float64)
|
149
|
+
aj = cuda.local.array((1, 1), np.float64)
|
150
|
+
ak = cuda.local.array((1, 1), np.float64)
|
151
|
+
al = cuda.local.array((1, 1), np.float64)
|
152
|
+
am = cuda.local.array((1, 1), np.float64)
|
153
|
+
an = cuda.local.array((1, 1), np.float64)
|
154
|
+
ao = cuda.local.array((1, 1), np.float64)
|
155
|
+
ap = cuda.local.array((1, 1), np.float64)
|
156
|
+
ar = cuda.local.array((1, 1), np.float64)
|
157
|
+
at = cuda.local.array((1, 1), np.float64)
|
158
|
+
au = cuda.local.array((1, 1), np.float64)
|
159
|
+
av = cuda.local.array((1, 1), np.float64)
|
160
|
+
aw = cuda.local.array((1, 1), np.float64)
|
161
|
+
ax = cuda.local.array((1, 1), np.float64)
|
162
|
+
ay = cuda.local.array((1, 1), np.float64)
|
163
|
+
az = cuda.local.array((1, 1), np.float64)
|
164
|
+
|
165
|
+
aa[:] = 0
|
166
|
+
ab[:] = 0
|
167
|
+
ac[:] = 0
|
168
|
+
ad[:] = 0
|
169
|
+
ae[:] = 0
|
170
|
+
af[:] = 0
|
171
|
+
ag[:] = 0
|
172
|
+
ah[:] = 0
|
173
|
+
ai[:] = 0
|
174
|
+
aj[:] = 0
|
175
|
+
ak[:] = 0
|
176
|
+
al[:] = 0
|
177
|
+
am[:] = 0
|
178
|
+
an[:] = 0
|
179
|
+
ao[:] = 0
|
180
|
+
ap[:] = 0
|
181
|
+
ar[:] = 0
|
182
|
+
at[:] = 0
|
183
|
+
au[:] = 0
|
184
|
+
av[:] = 0
|
185
|
+
aw[:] = 0
|
186
|
+
ax[:] = 0
|
187
|
+
ay[:] = 0
|
188
|
+
az[:] = 0
|
189
|
+
|
190
|
+
|
191
|
+
# Simple use case for multiprocessing test
|
192
|
+
|
193
|
+
@cuda.jit(cache=True)
|
194
|
+
def simple_usecase_kernel(r, x):
|
195
|
+
r[()] = x[()]
|
196
|
+
|
197
|
+
|
198
|
+
simple_usecase_caller = CUDAUseCase(simple_usecase_kernel)
|
199
|
+
|
200
|
+
|
201
|
+
# Usecase with cooperative groups
|
202
|
+
|
203
|
+
@cuda.jit(cache=True)
|
204
|
+
def cg_usecase_kernel(r, x):
|
205
|
+
grid = cuda.cg.this_grid()
|
206
|
+
grid.sync()
|
207
|
+
|
208
|
+
|
209
|
+
cg_usecase = CUDAUseCase(cg_usecase_kernel)
|
210
|
+
|
211
|
+
|
212
|
+
class _TestModule(CUDATestCase):
|
213
|
+
"""
|
214
|
+
Tests for functionality of this module's functions.
|
215
|
+
Note this does not define any "test_*" method, instead check_module()
|
216
|
+
should be called by hand.
|
217
|
+
"""
|
218
|
+
|
219
|
+
def check_module(self, mod):
|
220
|
+
self.assertPreciseEqual(mod.add_usecase(2, 3), 6)
|
221
|
+
self.assertPreciseEqual(mod.outer_uncached(3, 2), 2)
|
222
|
+
self.assertPreciseEqual(mod.outer(3, 2), 2)
|
223
|
+
|
224
|
+
packed_rec = mod.record_return_packed(mod.packed_arr, 1)
|
225
|
+
self.assertPreciseEqual(tuple(packed_rec), (2, 43.5))
|
226
|
+
aligned_rec = mod.record_return_aligned(mod.aligned_arr, 1)
|
227
|
+
self.assertPreciseEqual(tuple(aligned_rec), (2, 43.5))
|
228
|
+
|
229
|
+
mod.simple_usecase_caller(2)
|
230
|
+
|
231
|
+
|
232
|
+
def self_test():
|
233
|
+
mod = sys.modules[__name__]
|
234
|
+
_TestModule().check_module(mod)
|
@@ -0,0 +1,41 @@
|
|
1
|
+
import sys
|
2
|
+
|
3
|
+
from numba import cuda, njit
|
4
|
+
from numba.cuda.testing import CUDATestCase
|
5
|
+
from numba.cuda.tests.cudapy.cache_usecases import CUDAUseCase, UseCase
|
6
|
+
|
7
|
+
|
8
|
+
class CPUUseCase(UseCase):
|
9
|
+
def _call(self, ret, *args):
|
10
|
+
self._func(ret, *args)
|
11
|
+
|
12
|
+
|
13
|
+
# Using the same function as a cached CPU and CUDA-jitted function
|
14
|
+
|
15
|
+
def target_shared_assign(r, x):
|
16
|
+
r[()] = x[()]
|
17
|
+
|
18
|
+
|
19
|
+
assign_cuda_kernel = cuda.jit(cache=True)(target_shared_assign)
|
20
|
+
assign_cuda = CUDAUseCase(assign_cuda_kernel)
|
21
|
+
assign_cpu_jitted = njit(cache=True)(target_shared_assign)
|
22
|
+
assign_cpu = CPUUseCase(assign_cpu_jitted)
|
23
|
+
|
24
|
+
|
25
|
+
class _TestModule(CUDATestCase):
|
26
|
+
"""
|
27
|
+
Tests for functionality of this module's functions.
|
28
|
+
Note this does not define any "test_*" method, instead check_module()
|
29
|
+
should be called by hand.
|
30
|
+
"""
|
31
|
+
|
32
|
+
def check_module(self, mod):
|
33
|
+
self.assertPreciseEqual(mod.assign_cpu(5), 5)
|
34
|
+
self.assertPreciseEqual(mod.assign_cpu(5.5), 5.5)
|
35
|
+
self.assertPreciseEqual(mod.assign_cuda(5), 5)
|
36
|
+
self.assertPreciseEqual(mod.assign_cuda(5.5), 5.5)
|
37
|
+
|
38
|
+
|
39
|
+
def self_test():
|
40
|
+
mod = sys.modules[__name__]
|
41
|
+
_TestModule().check_module(mod)
|
@@ -0,0 +1,58 @@
|
|
1
|
+
from numba import types
|
2
|
+
from numba.core import config
|
3
|
+
|
4
|
+
|
5
|
+
class TestStruct:
|
6
|
+
def __init__(self, x, y):
|
7
|
+
self.x = x
|
8
|
+
self.y = y
|
9
|
+
|
10
|
+
|
11
|
+
class TestStructModelType(types.Type):
|
12
|
+
def __init__(self):
|
13
|
+
super().__init__(name="TestStructModelType")
|
14
|
+
|
15
|
+
|
16
|
+
test_struct_model_type = TestStructModelType()
|
17
|
+
|
18
|
+
|
19
|
+
if not config.ENABLE_CUDASIM:
|
20
|
+
from numba import int32
|
21
|
+
from numba.core.extending import (
|
22
|
+
models,
|
23
|
+
register_model,
|
24
|
+
make_attribute_wrapper,
|
25
|
+
typeof_impl,
|
26
|
+
type_callable
|
27
|
+
)
|
28
|
+
from numba.cuda.cudaimpl import lower
|
29
|
+
from numba.core import cgutils
|
30
|
+
|
31
|
+
@typeof_impl.register(TestStruct)
|
32
|
+
def typeof_teststruct(val, c):
|
33
|
+
return test_struct_model_type
|
34
|
+
|
35
|
+
@register_model(TestStructModelType)
|
36
|
+
class TestStructModel(models.StructModel):
|
37
|
+
def __init__(self, dmm, fe_type):
|
38
|
+
members = [("x", int32), ("y", int32)]
|
39
|
+
super().__init__(dmm, fe_type, members)
|
40
|
+
|
41
|
+
make_attribute_wrapper(TestStructModelType, 'x', 'x')
|
42
|
+
make_attribute_wrapper(TestStructModelType, 'y', 'y')
|
43
|
+
|
44
|
+
@type_callable(TestStruct)
|
45
|
+
def type_test_struct(context):
|
46
|
+
def typer(x, y):
|
47
|
+
if isinstance(x, types.Integer) and isinstance(y, types.Integer):
|
48
|
+
return test_struct_model_type
|
49
|
+
return typer
|
50
|
+
|
51
|
+
@lower(TestStruct, types.Integer, types.Integer)
|
52
|
+
def lower_test_type_ctor(context, builder, sig, args):
|
53
|
+
obj = cgutils.create_struct_proxy(
|
54
|
+
test_struct_model_type
|
55
|
+
)(context, builder)
|
56
|
+
obj.x = args[0]
|
57
|
+
obj.y = args[1]
|
58
|
+
return obj._getvalue()
|
@@ -0,0 +1,30 @@
|
|
1
|
+
//
|
2
|
+
// Generated by NVIDIA NVVM Compiler
|
3
|
+
// Compiler built on Tue Apr 1 03:34:02 2014 (1396341242)
|
4
|
+
// Cuda compilation tools, release 6.0, V6.0.1
|
5
|
+
//
|
6
|
+
|
7
|
+
.version 4.0
|
8
|
+
.target sm_20
|
9
|
+
.address_size 64
|
10
|
+
|
11
|
+
|
12
|
+
.visible .func (.param .b32 func_retval0) bar(
|
13
|
+
.param .b64 bar_param_0,
|
14
|
+
.param .b32 bar_param_1
|
15
|
+
)
|
16
|
+
{
|
17
|
+
.reg .s32 %r<4>;
|
18
|
+
.reg .s64 %rd<2>;
|
19
|
+
|
20
|
+
|
21
|
+
ld.param.u64 %rd1, [bar_param_0];
|
22
|
+
ld.param.u32 %r1, [bar_param_1];
|
23
|
+
shl.b32 %r2, %r1, 1;
|
24
|
+
st.u32 [%rd1], %r2;
|
25
|
+
mov.u32 %r3, 0;
|
26
|
+
st.param.b32 [func_retval0+0], %r3;
|
27
|
+
ret;
|
28
|
+
}
|
29
|
+
|
30
|
+
|
@@ -0,0 +1,100 @@
|
|
1
|
+
"""
|
2
|
+
Usecases of recursive functions in the CUDA target, many derived from
|
3
|
+
numba/tests/recursion_usecases.py.
|
4
|
+
|
5
|
+
Some functions are compiled at import time, hence a separate module.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from numba import cuda
|
9
|
+
|
10
|
+
|
11
|
+
@cuda.jit("i8(i8)", device=True)
|
12
|
+
def fib1(n):
|
13
|
+
if n < 2:
|
14
|
+
return n
|
15
|
+
# Note the second call does not use a named argument, unlike the CPU target
|
16
|
+
# usecase
|
17
|
+
return fib1(n - 1) + fib1(n - 2)
|
18
|
+
|
19
|
+
|
20
|
+
def make_fib2():
|
21
|
+
@cuda.jit("i8(i8)", device=True)
|
22
|
+
def fib2(n):
|
23
|
+
if n < 2:
|
24
|
+
return n
|
25
|
+
return fib2(n - 1) + fib2(n - 2)
|
26
|
+
|
27
|
+
return fib2
|
28
|
+
|
29
|
+
|
30
|
+
fib2 = make_fib2()
|
31
|
+
|
32
|
+
|
33
|
+
@cuda.jit
|
34
|
+
def type_change_self(x, y):
|
35
|
+
if x > 1 and y > 0:
|
36
|
+
return x + type_change_self(x - y, y)
|
37
|
+
else:
|
38
|
+
return y
|
39
|
+
|
40
|
+
|
41
|
+
# Implicit signature
|
42
|
+
@cuda.jit(device=True)
|
43
|
+
def fib3(n):
|
44
|
+
if n < 2:
|
45
|
+
return n
|
46
|
+
|
47
|
+
return fib3(n - 1) + fib3(n - 2)
|
48
|
+
|
49
|
+
|
50
|
+
# Run-away self recursion
|
51
|
+
@cuda.jit(device=True)
|
52
|
+
def runaway_self(x):
|
53
|
+
return runaway_self(x)
|
54
|
+
|
55
|
+
|
56
|
+
@cuda.jit(device=True)
|
57
|
+
def raise_self(x):
|
58
|
+
if x == 1:
|
59
|
+
raise ValueError("raise_self")
|
60
|
+
elif x > 0:
|
61
|
+
return raise_self(x - 1)
|
62
|
+
else:
|
63
|
+
return 1
|
64
|
+
|
65
|
+
|
66
|
+
@cuda.jit(debug=True, opt=False)
|
67
|
+
def raise_self_kernel(x):
|
68
|
+
raise_self(x)
|
69
|
+
|
70
|
+
|
71
|
+
def make_optional_return_case(jit=lambda x: x):
|
72
|
+
@jit
|
73
|
+
def foo(x):
|
74
|
+
if x > 5:
|
75
|
+
return x - 1
|
76
|
+
else:
|
77
|
+
return
|
78
|
+
|
79
|
+
@jit
|
80
|
+
def bar(x):
|
81
|
+
out = foo(x)
|
82
|
+
if out is None:
|
83
|
+
return out
|
84
|
+
elif out < 8:
|
85
|
+
return out
|
86
|
+
else:
|
87
|
+
return x * bar(out)
|
88
|
+
|
89
|
+
return bar
|
90
|
+
|
91
|
+
|
92
|
+
def make_growing_tuple_case(jit=lambda x: x):
|
93
|
+
# From issue #4387
|
94
|
+
@jit
|
95
|
+
def make_list(n):
|
96
|
+
if n <= 0:
|
97
|
+
return None
|
98
|
+
|
99
|
+
return (n, make_list(n - 1))
|
100
|
+
return make_list
|
@@ -0,0 +1,42 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from numba import from_dtype, cuda
|
3
|
+
from numba.cuda.testing import skip_on_cudasim, CUDATestCase
|
4
|
+
import unittest
|
5
|
+
|
6
|
+
|
7
|
+
class TestAlignment(CUDATestCase):
|
8
|
+
def test_record_alignment(self):
|
9
|
+
rec_dtype = np.dtype([('a', 'int32'), ('b', 'float64')], align=True)
|
10
|
+
rec = from_dtype(rec_dtype)
|
11
|
+
|
12
|
+
@cuda.jit((rec[:],))
|
13
|
+
def foo(a):
|
14
|
+
i = cuda.grid(1)
|
15
|
+
a[i].a = a[i].b
|
16
|
+
|
17
|
+
a_recarray = np.recarray(3, dtype=rec_dtype)
|
18
|
+
for i in range(a_recarray.size):
|
19
|
+
a_rec = a_recarray[i]
|
20
|
+
a_rec.a = 0
|
21
|
+
a_rec.b = (i + 1) * 123
|
22
|
+
|
23
|
+
foo[1, 3](a_recarray)
|
24
|
+
|
25
|
+
self.assertTrue(np.all(a_recarray.a == a_recarray.b))
|
26
|
+
|
27
|
+
@skip_on_cudasim('Simulator does not check alignment')
|
28
|
+
def test_record_alignment_error(self):
|
29
|
+
rec_dtype = np.dtype([('a', 'int32'), ('b', 'float64')])
|
30
|
+
rec = from_dtype(rec_dtype)
|
31
|
+
|
32
|
+
with self.assertRaises(Exception) as raises:
|
33
|
+
@cuda.jit((rec[:],))
|
34
|
+
def foo(a):
|
35
|
+
i = cuda.grid(1)
|
36
|
+
a[i].a = a[i].b
|
37
|
+
|
38
|
+
self.assertTrue('type float64 is not aligned' in str(raises.exception))
|
39
|
+
|
40
|
+
|
41
|
+
if __name__ == '__main__':
|
42
|
+
unittest.main()
|