numba-cuda 0.0.1__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +1 -0
- _numba_cuda_redirector.py +74 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +5 -0
- numba_cuda/_version.py +19 -0
- numba_cuda/numba/cuda/__init__.py +22 -0
- numba_cuda/numba/cuda/api.py +526 -0
- numba_cuda/numba/cuda/api_util.py +30 -0
- numba_cuda/numba/cuda/args.py +77 -0
- numba_cuda/numba/cuda/cg.py +62 -0
- numba_cuda/numba/cuda/codegen.py +378 -0
- numba_cuda/numba/cuda/compiler.py +422 -0
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
- numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
- numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
- numba_cuda/numba/cuda/cuda_paths.py +258 -0
- numba_cuda/numba/cuda/cudadecl.py +806 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
- numba_cuda/numba/cuda/cudadrv/error.py +36 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
- numba_cuda/numba/cuda/cudaimpl.py +1055 -0
- numba_cuda/numba/cuda/cudamath.py +140 -0
- numba_cuda/numba/cuda/decorators.py +189 -0
- numba_cuda/numba/cuda/descriptor.py +33 -0
- numba_cuda/numba/cuda/device_init.py +89 -0
- numba_cuda/numba/cuda/deviceufunc.py +908 -0
- numba_cuda/numba/cuda/dispatcher.py +1057 -0
- numba_cuda/numba/cuda/errors.py +59 -0
- numba_cuda/numba/cuda/extending.py +7 -0
- numba_cuda/numba/cuda/initialize.py +13 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
- numba_cuda/numba/cuda/intrinsics.py +198 -0
- numba_cuda/numba/cuda/kernels/__init__.py +0 -0
- numba_cuda/numba/cuda/kernels/reduction.py +262 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3382 -0
- numba_cuda/numba/cuda/libdevicedecl.py +17 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
- numba_cuda/numba/cuda/mathimpl.py +448 -0
- numba_cuda/numba/cuda/models.py +48 -0
- numba_cuda/numba/cuda/nvvmutils.py +235 -0
- numba_cuda/numba/cuda/printimpl.py +86 -0
- numba_cuda/numba/cuda/random.py +292 -0
- numba_cuda/numba/cuda/simulator/__init__.py +38 -0
- numba_cuda/numba/cuda/simulator/api.py +110 -0
- numba_cuda/numba/cuda/simulator/compiler.py +9 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
- numba_cuda/numba/cuda/simulator/kernel.py +308 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
- numba_cuda/numba/cuda/simulator/reduction.py +15 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
- numba_cuda/numba/cuda/simulator_init.py +17 -0
- numba_cuda/numba/cuda/stubs.py +902 -0
- numba_cuda/numba/cuda/target.py +440 -0
- numba_cuda/numba/cuda/testing.py +202 -0
- numba_cuda/numba/cuda/tests/__init__.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +7 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
- numba_cuda/numba/cuda/types.py +37 -0
- numba_cuda/numba/cuda/ufuncs.py +662 -0
- numba_cuda/numba/cuda/vector_types.py +209 -0
- numba_cuda/numba/cuda/vectorizers.py +252 -0
- numba_cuda-0.0.12.dist-info/LICENSE +25 -0
- numba_cuda-0.0.12.dist-info/METADATA +68 -0
- numba_cuda-0.0.12.dist-info/RECORD +231 -0
- {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
- numba_cuda-0.0.1.dist-info/METADATA +0 -10
- numba_cuda-0.0.1.dist-info/RECORD +0 -5
- {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,335 @@
|
|
1
|
+
from numba import cuda, njit, types, version_info
|
2
|
+
from numba.core.errors import TypingError
|
3
|
+
from numba.core.extending import overload, overload_attribute
|
4
|
+
from numba.core.typing.typeof import typeof
|
5
|
+
from numba.cuda.testing import CUDATestCase, skip_on_cudasim, unittest
|
6
|
+
import numpy as np
|
7
|
+
|
8
|
+
|
9
|
+
# Dummy function definitions to overload
|
10
|
+
|
11
|
+
def generic_func_1():
|
12
|
+
pass
|
13
|
+
|
14
|
+
|
15
|
+
def cuda_func_1():
|
16
|
+
pass
|
17
|
+
|
18
|
+
|
19
|
+
def generic_func_2():
|
20
|
+
pass
|
21
|
+
|
22
|
+
|
23
|
+
def cuda_func_2():
|
24
|
+
pass
|
25
|
+
|
26
|
+
|
27
|
+
def generic_calls_generic():
|
28
|
+
pass
|
29
|
+
|
30
|
+
|
31
|
+
def generic_calls_cuda():
|
32
|
+
pass
|
33
|
+
|
34
|
+
|
35
|
+
def cuda_calls_generic():
|
36
|
+
pass
|
37
|
+
|
38
|
+
|
39
|
+
def cuda_calls_cuda():
|
40
|
+
pass
|
41
|
+
|
42
|
+
|
43
|
+
def target_overloaded():
|
44
|
+
pass
|
45
|
+
|
46
|
+
|
47
|
+
def generic_calls_target_overloaded():
|
48
|
+
pass
|
49
|
+
|
50
|
+
|
51
|
+
def cuda_calls_target_overloaded():
|
52
|
+
pass
|
53
|
+
|
54
|
+
|
55
|
+
def target_overloaded_calls_target_overloaded():
|
56
|
+
pass
|
57
|
+
|
58
|
+
|
59
|
+
# To recognise which functions are resolved for a call, we identify each with a
|
60
|
+
# prime number. Each function called multiplies a value by its prime (starting
|
61
|
+
# with the value 1), and we can check that the result is as expected based on
|
62
|
+
# the final value after all multiplications.
|
63
|
+
|
64
|
+
GENERIC_FUNCTION_1 = 2
|
65
|
+
CUDA_FUNCTION_1 = 3
|
66
|
+
GENERIC_FUNCTION_2 = 5
|
67
|
+
CUDA_FUNCTION_2 = 7
|
68
|
+
GENERIC_CALLS_GENERIC = 11
|
69
|
+
GENERIC_CALLS_CUDA = 13
|
70
|
+
CUDA_CALLS_GENERIC = 17
|
71
|
+
CUDA_CALLS_CUDA = 19
|
72
|
+
GENERIC_TARGET_OL = 23
|
73
|
+
CUDA_TARGET_OL = 29
|
74
|
+
GENERIC_CALLS_TARGET_OL = 31
|
75
|
+
CUDA_CALLS_TARGET_OL = 37
|
76
|
+
GENERIC_TARGET_OL_CALLS_TARGET_OL = 41
|
77
|
+
CUDA_TARGET_OL_CALLS_TARGET_OL = 43
|
78
|
+
|
79
|
+
|
80
|
+
# Overload implementations
|
81
|
+
|
82
|
+
@overload(generic_func_1, target='generic')
|
83
|
+
def ol_generic_func_1(x):
|
84
|
+
def impl(x):
|
85
|
+
x[0] *= GENERIC_FUNCTION_1
|
86
|
+
return impl
|
87
|
+
|
88
|
+
|
89
|
+
@overload(cuda_func_1, target='cuda')
|
90
|
+
def ol_cuda_func_1(x):
|
91
|
+
def impl(x):
|
92
|
+
x[0] *= CUDA_FUNCTION_1
|
93
|
+
return impl
|
94
|
+
|
95
|
+
|
96
|
+
@overload(generic_func_2, target='generic')
|
97
|
+
def ol_generic_func_2(x):
|
98
|
+
def impl(x):
|
99
|
+
x[0] *= GENERIC_FUNCTION_2
|
100
|
+
return impl
|
101
|
+
|
102
|
+
|
103
|
+
@overload(cuda_func_2, target='cuda')
|
104
|
+
def ol_cuda_func(x):
|
105
|
+
def impl(x):
|
106
|
+
x[0] *= CUDA_FUNCTION_2
|
107
|
+
return impl
|
108
|
+
|
109
|
+
|
110
|
+
@overload(generic_calls_generic, target='generic')
|
111
|
+
def ol_generic_calls_generic(x):
|
112
|
+
def impl(x):
|
113
|
+
x[0] *= GENERIC_CALLS_GENERIC
|
114
|
+
generic_func_1(x)
|
115
|
+
return impl
|
116
|
+
|
117
|
+
|
118
|
+
@overload(generic_calls_cuda, target='generic')
|
119
|
+
def ol_generic_calls_cuda(x):
|
120
|
+
def impl(x):
|
121
|
+
x[0] *= GENERIC_CALLS_CUDA
|
122
|
+
cuda_func_1(x)
|
123
|
+
return impl
|
124
|
+
|
125
|
+
|
126
|
+
@overload(cuda_calls_generic, target='cuda')
|
127
|
+
def ol_cuda_calls_generic(x):
|
128
|
+
def impl(x):
|
129
|
+
x[0] *= CUDA_CALLS_GENERIC
|
130
|
+
generic_func_1(x)
|
131
|
+
return impl
|
132
|
+
|
133
|
+
|
134
|
+
@overload(cuda_calls_cuda, target='cuda')
|
135
|
+
def ol_cuda_calls_cuda(x):
|
136
|
+
def impl(x):
|
137
|
+
x[0] *= CUDA_CALLS_CUDA
|
138
|
+
cuda_func_1(x)
|
139
|
+
return impl
|
140
|
+
|
141
|
+
|
142
|
+
@overload(target_overloaded, target='generic')
|
143
|
+
def ol_target_overloaded_generic(x):
|
144
|
+
def impl(x):
|
145
|
+
x[0] *= GENERIC_TARGET_OL
|
146
|
+
return impl
|
147
|
+
|
148
|
+
|
149
|
+
@overload(target_overloaded, target='cuda')
|
150
|
+
def ol_target_overloaded_cuda(x):
|
151
|
+
def impl(x):
|
152
|
+
x[0] *= CUDA_TARGET_OL
|
153
|
+
return impl
|
154
|
+
|
155
|
+
|
156
|
+
@overload(generic_calls_target_overloaded, target='generic')
|
157
|
+
def ol_generic_calls_target_overloaded(x):
|
158
|
+
def impl(x):
|
159
|
+
x[0] *= GENERIC_CALLS_TARGET_OL
|
160
|
+
target_overloaded(x)
|
161
|
+
return impl
|
162
|
+
|
163
|
+
|
164
|
+
@overload(cuda_calls_target_overloaded, target='cuda')
|
165
|
+
def ol_cuda_calls_target_overloaded(x):
|
166
|
+
def impl(x):
|
167
|
+
x[0] *= CUDA_CALLS_TARGET_OL
|
168
|
+
target_overloaded(x)
|
169
|
+
return impl
|
170
|
+
|
171
|
+
|
172
|
+
@overload(target_overloaded_calls_target_overloaded, target='generic')
|
173
|
+
def ol_generic_calls_target_overloaded_generic(x):
|
174
|
+
def impl(x):
|
175
|
+
x[0] *= GENERIC_TARGET_OL_CALLS_TARGET_OL
|
176
|
+
target_overloaded(x)
|
177
|
+
return impl
|
178
|
+
|
179
|
+
|
180
|
+
@overload(target_overloaded_calls_target_overloaded, target='cuda')
|
181
|
+
def ol_generic_calls_target_overloaded_cuda(x):
|
182
|
+
def impl(x):
|
183
|
+
x[0] *= CUDA_TARGET_OL_CALLS_TARGET_OL
|
184
|
+
target_overloaded(x)
|
185
|
+
return impl
|
186
|
+
|
187
|
+
|
188
|
+
@skip_on_cudasim('Overloading not supported in cudasim')
|
189
|
+
class TestOverload(CUDATestCase):
|
190
|
+
def check_overload(self, kernel, expected):
|
191
|
+
x = np.ones(1, dtype=np.int32)
|
192
|
+
cuda.jit(kernel)[1, 1](x)
|
193
|
+
self.assertEqual(x[0], expected)
|
194
|
+
|
195
|
+
def check_overload_cpu(self, kernel, expected):
|
196
|
+
x = np.ones(1, dtype=np.int32)
|
197
|
+
njit(kernel)(x)
|
198
|
+
self.assertEqual(x[0], expected)
|
199
|
+
|
200
|
+
def test_generic(self):
|
201
|
+
def kernel(x):
|
202
|
+
generic_func_1(x)
|
203
|
+
|
204
|
+
expected = GENERIC_FUNCTION_1
|
205
|
+
self.check_overload(kernel, expected)
|
206
|
+
|
207
|
+
def test_cuda(self):
|
208
|
+
def kernel(x):
|
209
|
+
cuda_func_1(x)
|
210
|
+
|
211
|
+
expected = CUDA_FUNCTION_1
|
212
|
+
self.check_overload(kernel, expected)
|
213
|
+
|
214
|
+
def test_generic_and_cuda(self):
|
215
|
+
def kernel(x):
|
216
|
+
generic_func_1(x)
|
217
|
+
cuda_func_1(x)
|
218
|
+
|
219
|
+
expected = GENERIC_FUNCTION_1 * CUDA_FUNCTION_1
|
220
|
+
self.check_overload(kernel, expected)
|
221
|
+
|
222
|
+
def test_call_two_generic_calls(self):
|
223
|
+
def kernel(x):
|
224
|
+
generic_func_1(x)
|
225
|
+
generic_func_2(x)
|
226
|
+
|
227
|
+
expected = GENERIC_FUNCTION_1 * GENERIC_FUNCTION_2
|
228
|
+
self.check_overload(kernel, expected)
|
229
|
+
|
230
|
+
def test_call_two_cuda_calls(self):
|
231
|
+
def kernel(x):
|
232
|
+
cuda_func_1(x)
|
233
|
+
cuda_func_2(x)
|
234
|
+
|
235
|
+
expected = CUDA_FUNCTION_1 * CUDA_FUNCTION_2
|
236
|
+
self.check_overload(kernel, expected)
|
237
|
+
|
238
|
+
def test_generic_calls_generic(self):
|
239
|
+
def kernel(x):
|
240
|
+
generic_calls_generic(x)
|
241
|
+
|
242
|
+
expected = GENERIC_CALLS_GENERIC * GENERIC_FUNCTION_1
|
243
|
+
self.check_overload(kernel, expected)
|
244
|
+
|
245
|
+
def test_generic_calls_cuda(self):
|
246
|
+
def kernel(x):
|
247
|
+
generic_calls_cuda(x)
|
248
|
+
|
249
|
+
expected = GENERIC_CALLS_CUDA * CUDA_FUNCTION_1
|
250
|
+
self.check_overload(kernel, expected)
|
251
|
+
|
252
|
+
def test_cuda_calls_generic(self):
|
253
|
+
def kernel(x):
|
254
|
+
cuda_calls_generic(x)
|
255
|
+
|
256
|
+
expected = CUDA_CALLS_GENERIC * GENERIC_FUNCTION_1
|
257
|
+
self.check_overload(kernel, expected)
|
258
|
+
|
259
|
+
def test_cuda_calls_cuda(self):
|
260
|
+
def kernel(x):
|
261
|
+
cuda_calls_cuda(x)
|
262
|
+
|
263
|
+
expected = CUDA_CALLS_CUDA * CUDA_FUNCTION_1
|
264
|
+
self.check_overload(kernel, expected)
|
265
|
+
|
266
|
+
def test_call_target_overloaded(self):
|
267
|
+
def kernel(x):
|
268
|
+
target_overloaded(x)
|
269
|
+
|
270
|
+
expected = CUDA_TARGET_OL
|
271
|
+
self.check_overload(kernel, expected)
|
272
|
+
|
273
|
+
def test_generic_calls_target_overloaded(self):
|
274
|
+
def kernel(x):
|
275
|
+
generic_calls_target_overloaded(x)
|
276
|
+
|
277
|
+
expected = GENERIC_CALLS_TARGET_OL * CUDA_TARGET_OL
|
278
|
+
self.check_overload(kernel, expected)
|
279
|
+
|
280
|
+
def test_cuda_calls_target_overloaded(self):
|
281
|
+
def kernel(x):
|
282
|
+
cuda_calls_target_overloaded(x)
|
283
|
+
|
284
|
+
expected = CUDA_CALLS_TARGET_OL * CUDA_TARGET_OL
|
285
|
+
self.check_overload(kernel, expected)
|
286
|
+
|
287
|
+
def test_target_overloaded_calls_target_overloaded(self):
|
288
|
+
def kernel(x):
|
289
|
+
target_overloaded_calls_target_overloaded(x)
|
290
|
+
|
291
|
+
# Check the CUDA overloads are used on CUDA
|
292
|
+
expected = CUDA_TARGET_OL_CALLS_TARGET_OL * CUDA_TARGET_OL
|
293
|
+
self.check_overload(kernel, expected)
|
294
|
+
|
295
|
+
# Also check that the CPU overloads are used on the CPU
|
296
|
+
expected = GENERIC_TARGET_OL_CALLS_TARGET_OL * GENERIC_TARGET_OL
|
297
|
+
self.check_overload_cpu(kernel, expected)
|
298
|
+
|
299
|
+
def test_overload_attribute_target(self):
|
300
|
+
MyDummy, MyDummyType = self.make_dummy_type()
|
301
|
+
mydummy_type = typeof(MyDummy())
|
302
|
+
|
303
|
+
@overload_attribute(MyDummyType, 'cuda_only', target='cuda')
|
304
|
+
def ov_dummy_cuda_attr(obj):
|
305
|
+
def imp(obj):
|
306
|
+
return 42
|
307
|
+
|
308
|
+
return imp
|
309
|
+
|
310
|
+
# Ensure that we cannot use the CUDA target-specific attribute on the
|
311
|
+
# CPU, and that an appropriate typing error is raised
|
312
|
+
|
313
|
+
# A different error is produced prior to version 0.60
|
314
|
+
# (the fixes in #9454 improved the message)
|
315
|
+
# https://github.com/numba/numba/pull/9454
|
316
|
+
if version_info[:2] < (0, 60):
|
317
|
+
msg = 'resolving type of attribute "cuda_only" of "x"'
|
318
|
+
else:
|
319
|
+
msg = "Unknown attribute 'cuda_only'"
|
320
|
+
|
321
|
+
with self.assertRaisesRegex(TypingError, msg):
|
322
|
+
@njit(types.int64(mydummy_type))
|
323
|
+
def illegal_target_attr_use(x):
|
324
|
+
return x.cuda_only
|
325
|
+
|
326
|
+
# Ensure that the CUDA target-specific attribute is usable and works
|
327
|
+
# correctly when the target is CUDA - note eager compilation via
|
328
|
+
# signature
|
329
|
+
@cuda.jit(types.void(types.int64[::1], mydummy_type))
|
330
|
+
def cuda_target_attr_use(res, dummy):
|
331
|
+
res[0] = dummy.cuda_only
|
332
|
+
|
333
|
+
|
334
|
+
if __name__ == '__main__':
|
335
|
+
unittest.main()
|
@@ -0,0 +1,124 @@
|
|
1
|
+
import math
|
2
|
+
import numpy as np
|
3
|
+
from numba import cuda, float64, int8, int32, void
|
4
|
+
from numba.cuda.testing import unittest, CUDATestCase
|
5
|
+
|
6
|
+
|
7
|
+
def cu_mat_power(A, power, power_A):
|
8
|
+
y, x = cuda.grid(2)
|
9
|
+
|
10
|
+
m, n = power_A.shape
|
11
|
+
if x >= n or y >= m:
|
12
|
+
return
|
13
|
+
|
14
|
+
power_A[y, x] = math.pow(A[y, x], int32(power))
|
15
|
+
|
16
|
+
|
17
|
+
def cu_mat_power_binop(A, power, power_A):
|
18
|
+
y, x = cuda.grid(2)
|
19
|
+
|
20
|
+
m, n = power_A.shape
|
21
|
+
if x >= n or y >= m:
|
22
|
+
return
|
23
|
+
|
24
|
+
power_A[y, x] = A[y, x] ** power
|
25
|
+
|
26
|
+
|
27
|
+
def vec_pow(r, x, y):
|
28
|
+
i = cuda.grid(1)
|
29
|
+
|
30
|
+
if i < len(r):
|
31
|
+
r[i] = pow(x[i], y[i])
|
32
|
+
|
33
|
+
|
34
|
+
def vec_pow_binop(r, x, y):
|
35
|
+
i = cuda.grid(1)
|
36
|
+
|
37
|
+
if i < len(r):
|
38
|
+
r[i] = x[i] ** y[i]
|
39
|
+
|
40
|
+
|
41
|
+
def vec_pow_inplace_binop(r, x):
|
42
|
+
i = cuda.grid(1)
|
43
|
+
|
44
|
+
if i < len(r):
|
45
|
+
r[i] **= x[i]
|
46
|
+
|
47
|
+
|
48
|
+
def random_complex(N):
|
49
|
+
np.random.seed(123)
|
50
|
+
return (np.random.random(1) + np.random.random(1) * 1j)
|
51
|
+
|
52
|
+
|
53
|
+
class TestCudaPowi(CUDATestCase):
|
54
|
+
def test_powi(self):
|
55
|
+
dec = cuda.jit(void(float64[:, :], int8, float64[:, :]))
|
56
|
+
kernel = dec(cu_mat_power)
|
57
|
+
|
58
|
+
power = 2
|
59
|
+
A = np.arange(10, dtype=np.float64).reshape(2, 5)
|
60
|
+
Aout = np.empty_like(A)
|
61
|
+
kernel[1, A.shape](A, power, Aout)
|
62
|
+
self.assertTrue(np.allclose(Aout, A ** power))
|
63
|
+
|
64
|
+
def test_powi_binop(self):
|
65
|
+
dec = cuda.jit(void(float64[:, :], int8, float64[:, :]))
|
66
|
+
kernel = dec(cu_mat_power_binop)
|
67
|
+
|
68
|
+
power = 2
|
69
|
+
A = np.arange(10, dtype=np.float64).reshape(2, 5)
|
70
|
+
Aout = np.empty_like(A)
|
71
|
+
kernel[1, A.shape](A, power, Aout)
|
72
|
+
self.assertTrue(np.allclose(Aout, A ** power))
|
73
|
+
|
74
|
+
# Relative tolerance kwarg is provided because 1.0e-7 (the default for
|
75
|
+
# assert_allclose) is a bit tight for single precision.
|
76
|
+
def _test_cpow(self, dtype, func, rtol=1.0e-7):
|
77
|
+
N = 32
|
78
|
+
x = random_complex(N).astype(dtype)
|
79
|
+
y = random_complex(N).astype(dtype)
|
80
|
+
r = np.zeros_like(x)
|
81
|
+
|
82
|
+
cfunc = cuda.jit(func)
|
83
|
+
cfunc[1, N](r, x, y)
|
84
|
+
np.testing.assert_allclose(r, x ** y, rtol=rtol)
|
85
|
+
|
86
|
+
# Checks special cases
|
87
|
+
x = np.asarray([0.0j, 1.0j], dtype=dtype)
|
88
|
+
y = np.asarray([0.0j, 1.0], dtype=dtype)
|
89
|
+
r = np.zeros_like(x)
|
90
|
+
|
91
|
+
cfunc[1, 2](r, x, y)
|
92
|
+
np.testing.assert_allclose(r, x ** y, rtol=rtol)
|
93
|
+
|
94
|
+
def test_cpow_complex64_pow(self):
|
95
|
+
self._test_cpow(np.complex64, vec_pow, rtol=3.0e-7)
|
96
|
+
|
97
|
+
def test_cpow_complex64_binop(self):
|
98
|
+
self._test_cpow(np.complex64, vec_pow_binop, rtol=3.0e-7)
|
99
|
+
|
100
|
+
def test_cpow_complex128_pow(self):
|
101
|
+
self._test_cpow(np.complex128, vec_pow)
|
102
|
+
|
103
|
+
def test_cpow_complex128_binop(self):
|
104
|
+
self._test_cpow(np.complex128, vec_pow_binop)
|
105
|
+
|
106
|
+
def _test_cpow_inplace_binop(self, dtype, rtol=1.0e-7):
|
107
|
+
N = 32
|
108
|
+
x = random_complex(N).astype(dtype)
|
109
|
+
y = random_complex(N).astype(dtype)
|
110
|
+
r = x ** y
|
111
|
+
|
112
|
+
cfunc = cuda.jit(vec_pow_inplace_binop)
|
113
|
+
cfunc[1, N](x, y)
|
114
|
+
np.testing.assert_allclose(x, r, rtol=rtol)
|
115
|
+
|
116
|
+
def test_cpow_complex64_inplace_binop(self):
|
117
|
+
self._test_cpow_inplace_binop(np.complex64, rtol=3.0e-7)
|
118
|
+
|
119
|
+
def test_cpow_complex128_inplace_binop(self):
|
120
|
+
self._test_cpow_inplace_binop(np.complex128, rtol=3.0e-7)
|
121
|
+
|
122
|
+
|
123
|
+
if __name__ == '__main__':
|
124
|
+
unittest.main()
|
@@ -0,0 +1,128 @@
|
|
1
|
+
from numba.cuda.testing import CUDATestCase, skip_on_cudasim
|
2
|
+
import subprocess
|
3
|
+
import sys
|
4
|
+
import unittest
|
5
|
+
|
6
|
+
|
7
|
+
cuhello_usecase = """\
|
8
|
+
from numba import cuda
|
9
|
+
|
10
|
+
@cuda.jit
|
11
|
+
def cuhello():
|
12
|
+
i = cuda.grid(1)
|
13
|
+
print(i, 999)
|
14
|
+
print(-42)
|
15
|
+
|
16
|
+
cuhello[2, 3]()
|
17
|
+
cuda.synchronize()
|
18
|
+
"""
|
19
|
+
|
20
|
+
|
21
|
+
printfloat_usecase = """\
|
22
|
+
from numba import cuda
|
23
|
+
|
24
|
+
@cuda.jit
|
25
|
+
def printfloat():
|
26
|
+
i = cuda.grid(1)
|
27
|
+
print(i, 23, 34.75, 321)
|
28
|
+
|
29
|
+
printfloat[1, 1]()
|
30
|
+
cuda.synchronize()
|
31
|
+
"""
|
32
|
+
|
33
|
+
|
34
|
+
printstring_usecase = """\
|
35
|
+
from numba import cuda
|
36
|
+
|
37
|
+
@cuda.jit
|
38
|
+
def printstring():
|
39
|
+
i = cuda.grid(1)
|
40
|
+
print(i, "hop!", 999)
|
41
|
+
|
42
|
+
printstring[1, 3]()
|
43
|
+
cuda.synchronize()
|
44
|
+
"""
|
45
|
+
|
46
|
+
printempty_usecase = """\
|
47
|
+
from numba import cuda
|
48
|
+
|
49
|
+
@cuda.jit
|
50
|
+
def printempty():
|
51
|
+
print()
|
52
|
+
|
53
|
+
printempty[1, 1]()
|
54
|
+
cuda.synchronize()
|
55
|
+
"""
|
56
|
+
|
57
|
+
|
58
|
+
print_too_many_usecase = """\
|
59
|
+
from numba import cuda
|
60
|
+
import numpy as np
|
61
|
+
|
62
|
+
@cuda.jit
|
63
|
+
def print_too_many(r):
|
64
|
+
print(r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8], r[9], r[10],
|
65
|
+
r[11], r[12], r[13], r[14], r[15], r[16], r[17], r[18], r[19], r[20],
|
66
|
+
r[21], r[22], r[23], r[24], r[25], r[26], r[27], r[28], r[29], r[30],
|
67
|
+
r[31], r[32])
|
68
|
+
|
69
|
+
print_too_many[1, 1](np.arange(33))
|
70
|
+
cuda.synchronize()
|
71
|
+
"""
|
72
|
+
|
73
|
+
|
74
|
+
class TestPrint(CUDATestCase):
|
75
|
+
# Note that in these tests we generally strip the output to avoid dealing
|
76
|
+
# with platform-specific line ending issues, e.g. '\r\n' vs '\n' etc.
|
77
|
+
|
78
|
+
def run_code(self, code):
|
79
|
+
"""Runs code in a subprocess and returns the captured output"""
|
80
|
+
cmd = [sys.executable, "-c", code]
|
81
|
+
cp = subprocess.run(cmd, timeout=60, capture_output=True, check=True)
|
82
|
+
return cp.stdout.decode(), cp.stderr.decode()
|
83
|
+
|
84
|
+
def test_cuhello(self):
|
85
|
+
output, _ = self.run_code(cuhello_usecase)
|
86
|
+
actual = [line.strip() for line in output.splitlines()]
|
87
|
+
expected = ['-42'] * 6 + ['%d 999' % i for i in range(6)]
|
88
|
+
# The output of GPU threads is intermingled, but each print()
|
89
|
+
# call is still atomic
|
90
|
+
self.assertEqual(sorted(actual), expected)
|
91
|
+
|
92
|
+
def test_printfloat(self):
|
93
|
+
output, _ = self.run_code(printfloat_usecase)
|
94
|
+
# CUDA and the simulator use different formats for float formatting
|
95
|
+
expected_cases = ["0 23 34.750000 321", "0 23 34.75 321"]
|
96
|
+
self.assertIn(output.strip(), expected_cases)
|
97
|
+
|
98
|
+
def test_printempty(self):
|
99
|
+
output, _ = self.run_code(printempty_usecase)
|
100
|
+
self.assertEqual(output.strip(), "")
|
101
|
+
|
102
|
+
def test_string(self):
|
103
|
+
output, _ = self.run_code(printstring_usecase)
|
104
|
+
lines = [line.strip() for line in output.splitlines(True)]
|
105
|
+
expected = ['%d hop! 999' % i for i in range(3)]
|
106
|
+
self.assertEqual(sorted(lines), expected)
|
107
|
+
|
108
|
+
@skip_on_cudasim('cudasim can print unlimited output')
|
109
|
+
def test_too_many_args(self):
|
110
|
+
# Tests that we emit the format string and warn when there are more
|
111
|
+
# than 32 arguments, in common with CUDA C/C++ printf - this is due to
|
112
|
+
# a limitation in CUDA vprintf, see:
|
113
|
+
# https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#limitations
|
114
|
+
|
115
|
+
output, errors = self.run_code(print_too_many_usecase)
|
116
|
+
|
117
|
+
# Check that the format string was printed instead of formatted garbage
|
118
|
+
expected_fmt_string = ' '.join(['%lld' for _ in range(33)])
|
119
|
+
self.assertIn(expected_fmt_string, output)
|
120
|
+
|
121
|
+
# Check for the expected warning about formatting more than 32 items
|
122
|
+
warn_msg = ('CUDA print() cannot print more than 32 items. The raw '
|
123
|
+
'format string will be emitted by the kernel instead.')
|
124
|
+
self.assertIn(warn_msg, errors)
|
125
|
+
|
126
|
+
|
127
|
+
if __name__ == '__main__':
|
128
|
+
unittest.main()
|
@@ -0,0 +1,33 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from numba import cuda, float32, int32, void
|
3
|
+
from numba.cuda.testing import unittest, CUDATestCase
|
4
|
+
|
5
|
+
|
6
|
+
class TestCudaPy2Div(CUDATestCase):
|
7
|
+
def test_py2_div_issue(self):
|
8
|
+
@cuda.jit(void(float32[:], float32[:], float32[:], int32))
|
9
|
+
def preCalc(y, yA, yB, numDataPoints):
|
10
|
+
i = cuda.grid(1)
|
11
|
+
# k is unused, but may be part of the trigger for the bug this
|
12
|
+
# tests for.
|
13
|
+
k = i % numDataPoints # noqa: F841
|
14
|
+
|
15
|
+
ans = float32(1.001 * float32(i))
|
16
|
+
|
17
|
+
y[i] = ans
|
18
|
+
yA[i] = ans * 1.0
|
19
|
+
yB[i] = ans / 1.0
|
20
|
+
|
21
|
+
numDataPoints = 15
|
22
|
+
|
23
|
+
y = np.zeros(numDataPoints, dtype=np.float32)
|
24
|
+
yA = np.zeros(numDataPoints, dtype=np.float32)
|
25
|
+
yB = np.zeros(numDataPoints, dtype=np.float32)
|
26
|
+
preCalc[1, 15](y, yA, yB, numDataPoints)
|
27
|
+
|
28
|
+
self.assertTrue(np.all(y == yA))
|
29
|
+
self.assertTrue(np.all(y == yB))
|
30
|
+
|
31
|
+
|
32
|
+
if __name__ == '__main__':
|
33
|
+
unittest.main()
|