numba-cuda 0.0.0__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +1 -0
- _numba_cuda_redirector.py +74 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +5 -0
- numba_cuda/_version.py +19 -0
- numba_cuda/numba/cuda/__init__.py +22 -0
- numba_cuda/numba/cuda/api.py +526 -0
- numba_cuda/numba/cuda/api_util.py +30 -0
- numba_cuda/numba/cuda/args.py +77 -0
- numba_cuda/numba/cuda/cg.py +62 -0
- numba_cuda/numba/cuda/codegen.py +378 -0
- numba_cuda/numba/cuda/compiler.py +422 -0
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
- numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
- numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
- numba_cuda/numba/cuda/cuda_paths.py +258 -0
- numba_cuda/numba/cuda/cudadecl.py +806 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
- numba_cuda/numba/cuda/cudadrv/error.py +36 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
- numba_cuda/numba/cuda/cudaimpl.py +1055 -0
- numba_cuda/numba/cuda/cudamath.py +140 -0
- numba_cuda/numba/cuda/decorators.py +189 -0
- numba_cuda/numba/cuda/descriptor.py +33 -0
- numba_cuda/numba/cuda/device_init.py +89 -0
- numba_cuda/numba/cuda/deviceufunc.py +908 -0
- numba_cuda/numba/cuda/dispatcher.py +1057 -0
- numba_cuda/numba/cuda/errors.py +59 -0
- numba_cuda/numba/cuda/extending.py +7 -0
- numba_cuda/numba/cuda/initialize.py +13 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
- numba_cuda/numba/cuda/intrinsics.py +198 -0
- numba_cuda/numba/cuda/kernels/__init__.py +0 -0
- numba_cuda/numba/cuda/kernels/reduction.py +262 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3382 -0
- numba_cuda/numba/cuda/libdevicedecl.py +17 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
- numba_cuda/numba/cuda/mathimpl.py +448 -0
- numba_cuda/numba/cuda/models.py +48 -0
- numba_cuda/numba/cuda/nvvmutils.py +235 -0
- numba_cuda/numba/cuda/printimpl.py +86 -0
- numba_cuda/numba/cuda/random.py +292 -0
- numba_cuda/numba/cuda/simulator/__init__.py +38 -0
- numba_cuda/numba/cuda/simulator/api.py +110 -0
- numba_cuda/numba/cuda/simulator/compiler.py +9 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
- numba_cuda/numba/cuda/simulator/kernel.py +308 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
- numba_cuda/numba/cuda/simulator/reduction.py +15 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
- numba_cuda/numba/cuda/simulator_init.py +17 -0
- numba_cuda/numba/cuda/stubs.py +902 -0
- numba_cuda/numba/cuda/target.py +440 -0
- numba_cuda/numba/cuda/testing.py +202 -0
- numba_cuda/numba/cuda/tests/__init__.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +7 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
- numba_cuda/numba/cuda/types.py +37 -0
- numba_cuda/numba/cuda/ufuncs.py +662 -0
- numba_cuda/numba/cuda/vector_types.py +209 -0
- numba_cuda/numba/cuda/vectorizers.py +252 -0
- numba_cuda-0.0.12.dist-info/LICENSE +25 -0
- numba_cuda-0.0.12.dist-info/METADATA +68 -0
- numba_cuda-0.0.12.dist-info/RECORD +231 -0
- {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
- numba_cuda-0.0.0.dist-info/METADATA +0 -6
- numba_cuda-0.0.0.dist-info/RECORD +0 -5
- {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,187 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from numba.core import types
|
3
|
+
from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase
|
4
|
+
from numba import cuda
|
5
|
+
from numba.cuda import libdevice, compile_ptx
|
6
|
+
from numba.cuda.libdevicefuncs import functions, create_signature
|
7
|
+
|
8
|
+
|
9
|
+
def use_sincos(s, c, x):
|
10
|
+
i = cuda.grid(1)
|
11
|
+
|
12
|
+
if i < len(x):
|
13
|
+
sr, cr = libdevice.sincos(x[i])
|
14
|
+
s[i] = sr
|
15
|
+
c[i] = cr
|
16
|
+
|
17
|
+
|
18
|
+
def use_frexp(frac, exp, x):
|
19
|
+
i = cuda.grid(1)
|
20
|
+
|
21
|
+
if i < len(x):
|
22
|
+
fracr, expr = libdevice.frexp(x[i])
|
23
|
+
frac[i] = fracr
|
24
|
+
exp[i] = expr
|
25
|
+
|
26
|
+
|
27
|
+
def use_sad(r, x, y, z):
|
28
|
+
i = cuda.grid(1)
|
29
|
+
|
30
|
+
if i < len(x):
|
31
|
+
r[i] = libdevice.sad(x[i], y[i], z[i])
|
32
|
+
|
33
|
+
|
34
|
+
@skip_on_cudasim('Libdevice functions are not supported on cudasim')
|
35
|
+
class TestLibdevice(CUDATestCase):
|
36
|
+
"""
|
37
|
+
Some tests of libdevice function wrappers that check the returned values.
|
38
|
+
|
39
|
+
These are mainly to check that the generation of the implementations
|
40
|
+
results in correct typing and lowering for each type of function return
|
41
|
+
(e.g. scalar return, UniTuple return, Tuple return, etc.).
|
42
|
+
"""
|
43
|
+
|
44
|
+
def test_sincos(self):
|
45
|
+
# Tests return of a UniTuple from a libdevice function
|
46
|
+
arr = np.arange(100, dtype=np.float64)
|
47
|
+
sres = np.zeros_like(arr)
|
48
|
+
cres = np.zeros_like(arr)
|
49
|
+
|
50
|
+
cufunc = cuda.jit(use_sincos)
|
51
|
+
cufunc[4, 32](sres, cres, arr)
|
52
|
+
|
53
|
+
np.testing.assert_allclose(np.cos(arr), cres)
|
54
|
+
np.testing.assert_allclose(np.sin(arr), sres)
|
55
|
+
|
56
|
+
def test_frexp(self):
|
57
|
+
# Test return of a Tuple from a libdevice function
|
58
|
+
arr = np.linspace(start=1.0, stop=10.0, num=100, dtype=np.float64)
|
59
|
+
fracres = np.zeros_like(arr)
|
60
|
+
expres = np.zeros(shape=arr.shape, dtype=np.int32)
|
61
|
+
|
62
|
+
cufunc = cuda.jit(use_frexp)
|
63
|
+
cufunc[4, 32](fracres, expres, arr)
|
64
|
+
|
65
|
+
frac_expect, exp_expect = np.frexp(arr)
|
66
|
+
|
67
|
+
np.testing.assert_array_equal(frac_expect, fracres)
|
68
|
+
np.testing.assert_array_equal(exp_expect, expres)
|
69
|
+
|
70
|
+
def test_sad(self):
|
71
|
+
# Test return of a scalar from a libdevice function
|
72
|
+
x = np.arange(0, 200, 2)
|
73
|
+
y = np.arange(50, 150)
|
74
|
+
z = np.arange(15, 115)
|
75
|
+
r = np.zeros_like(x)
|
76
|
+
|
77
|
+
cufunc = cuda.jit(use_sad)
|
78
|
+
cufunc[4, 32](r, x, y, z)
|
79
|
+
|
80
|
+
np.testing.assert_array_equal(np.abs(x - y) + z, r)
|
81
|
+
|
82
|
+
|
83
|
+
# A template for generating tests of compiling calls to libdevice functions.
|
84
|
+
# The purpose of the call and assignment of the return variables is to ensure
|
85
|
+
# the actual function implementations are not thrown away resulting in a PTX
|
86
|
+
# implementation that only contains the ret instruction - this may hide certain
|
87
|
+
# errors.
|
88
|
+
function_template = """\
|
89
|
+
from numba.cuda import libdevice
|
90
|
+
|
91
|
+
def pyfunc(%(pyargs)s):
|
92
|
+
ret = libdevice.%(func)s(%(funcargs)s)
|
93
|
+
%(retvars)s = ret
|
94
|
+
"""
|
95
|
+
|
96
|
+
|
97
|
+
def make_test_call(libname):
|
98
|
+
"""
|
99
|
+
Generates a test function for each libdevice function.
|
100
|
+
"""
|
101
|
+
|
102
|
+
def _test_call_functions(self):
|
103
|
+
# Strip off '__nv_' from libdevice name to get Python name
|
104
|
+
apiname = libname[5:]
|
105
|
+
apifunc = getattr(libdevice, apiname)
|
106
|
+
retty, args = functions[libname]
|
107
|
+
sig = create_signature(retty, args)
|
108
|
+
|
109
|
+
# Construct arguments to the libdevice function. These are all
|
110
|
+
# non-pointer arguments to the underlying bitcode function.
|
111
|
+
funcargs = ", ".join(['a%d' % i for i, arg in enumerate(args) if not
|
112
|
+
arg.is_ptr])
|
113
|
+
|
114
|
+
# Arguments to the Python function (`pyfunc` in the template above) are
|
115
|
+
# the arguments to the libdevice function, plus as many extra arguments
|
116
|
+
# as there are in the return type of the libdevice function - one for
|
117
|
+
# scalar-valued returns, or the length of the tuple for tuple-valued
|
118
|
+
# returns.
|
119
|
+
if isinstance(sig.return_type, (types.Tuple, types.UniTuple)):
|
120
|
+
# Start with the parameters for the return values
|
121
|
+
pyargs = ", ".join(['r%d' % i for i in
|
122
|
+
range(len(sig.return_type))])
|
123
|
+
# Add the parameters for the argument values
|
124
|
+
pyargs += ", " + funcargs
|
125
|
+
# Generate the unpacking of the return value from the libdevice
|
126
|
+
# function into the Python function return values (`r0`, `r1`,
|
127
|
+
# etc.).
|
128
|
+
retvars = ", ".join(['r%d[0]' % i for i in
|
129
|
+
range(len(sig.return_type))])
|
130
|
+
else:
|
131
|
+
# Scalar return is a more straightforward case
|
132
|
+
pyargs = "r0, " + funcargs
|
133
|
+
retvars = "r0[0]"
|
134
|
+
|
135
|
+
# Create the string containing the function to compile
|
136
|
+
d = { 'func': apiname,
|
137
|
+
'pyargs': pyargs,
|
138
|
+
'funcargs': funcargs,
|
139
|
+
'retvars': retvars }
|
140
|
+
code = function_template % d
|
141
|
+
|
142
|
+
# Convert the string to a Python function
|
143
|
+
locals = {}
|
144
|
+
exec(code, globals(), locals)
|
145
|
+
pyfunc = locals['pyfunc']
|
146
|
+
|
147
|
+
# Compute the signature for compilation. This mirrors the creation of
|
148
|
+
# arguments to the Python function above.
|
149
|
+
pyargs = [ arg.ty for arg in args if not arg.is_ptr ]
|
150
|
+
if isinstance(sig.return_type, (types.Tuple, types.UniTuple)):
|
151
|
+
pyreturns = [ret[::1] for ret in sig.return_type]
|
152
|
+
pyargs = pyreturns + pyargs
|
153
|
+
else:
|
154
|
+
pyargs.insert(0, sig.return_type[::1])
|
155
|
+
|
156
|
+
pyargs = tuple(pyargs)
|
157
|
+
ptx, resty = compile_ptx(pyfunc, pyargs)
|
158
|
+
|
159
|
+
# If the function body was discarded by optimization (therefore making
|
160
|
+
# the test a bit weak), there won't be any loading of parameters -
|
161
|
+
# ensure that a load from parameters occurs somewhere in the PTX
|
162
|
+
self.assertIn('ld.param', ptx)
|
163
|
+
|
164
|
+
# Returning the result (through a passed-in array) should also require
|
165
|
+
# a store to global memory, so check for at least one of those too.
|
166
|
+
self.assertIn('st.global', ptx)
|
167
|
+
|
168
|
+
return _test_call_functions
|
169
|
+
|
170
|
+
|
171
|
+
@skip_on_cudasim('Compilation to PTX is not supported on cudasim')
|
172
|
+
class TestLibdeviceCompilation(unittest.TestCase):
|
173
|
+
"""
|
174
|
+
Class for holding all tests of compiling calls to libdevice functions. We
|
175
|
+
generate the actual tests in this class (as opposed to using subTest and
|
176
|
+
one test within this class) because there are a lot of tests, and it makes
|
177
|
+
the test suite appear frozen to test them all as subTests in one test.
|
178
|
+
"""
|
179
|
+
|
180
|
+
|
181
|
+
for libname in functions:
|
182
|
+
setattr(TestLibdeviceCompilation, 'test_%s' % libname,
|
183
|
+
make_test_call(libname))
|
184
|
+
|
185
|
+
|
186
|
+
if __name__ == '__main__':
|
187
|
+
unittest.main()
|
@@ -0,0 +1,199 @@
|
|
1
|
+
from numba import cuda, float32, int32
|
2
|
+
from numba.core.errors import NumbaInvalidConfigWarning
|
3
|
+
from numba.cuda.testing import CUDATestCase, skip_on_cudasim
|
4
|
+
from numba.tests.support import ignore_internal_warnings
|
5
|
+
import re
|
6
|
+
import unittest
|
7
|
+
import warnings
|
8
|
+
|
9
|
+
|
10
|
+
@skip_on_cudasim('Simulator does not produce lineinfo')
|
11
|
+
class TestCudaLineInfo(CUDATestCase):
|
12
|
+
def _loc_directive_regex(self):
|
13
|
+
# This is used in several tests
|
14
|
+
|
15
|
+
pat = (
|
16
|
+
r'\.loc' # .loc directive beginning
|
17
|
+
r'\s+[0-9]+' # whitespace then file index
|
18
|
+
r'\s+[0-9]+' # whitespace then line number
|
19
|
+
r'\s+[0-9]+' # whitespace then column position
|
20
|
+
)
|
21
|
+
return re.compile(pat)
|
22
|
+
|
23
|
+
def _check(self, fn, sig, expect):
|
24
|
+
fn.compile(sig)
|
25
|
+
llvm = fn.inspect_llvm(sig)
|
26
|
+
ptx = fn.inspect_asm(sig)
|
27
|
+
assertfn = self.assertIsNotNone if expect else self.assertIsNone
|
28
|
+
|
29
|
+
# DICompileUnit debug info metadata should all be of the
|
30
|
+
# DebugDirectivesOnly kind, and not the FullDebug kind
|
31
|
+
pat = (
|
32
|
+
r'!DICompileUnit\(.*' # Opening of DICompileUnit metadata. Since
|
33
|
+
# the order of attributes is not
|
34
|
+
# guaranteed, we need to match arbitrarily
|
35
|
+
# afterwards.
|
36
|
+
r'emissionKind:\s+' # The emissionKind attribute followed by
|
37
|
+
# whitespace.
|
38
|
+
r'DebugDirectivesOnly' # The correct emissionKind.
|
39
|
+
)
|
40
|
+
match = re.compile(pat).search(llvm)
|
41
|
+
assertfn(match, msg=ptx)
|
42
|
+
|
43
|
+
pat = (
|
44
|
+
r'!DICompileUnit\(.*' # Same as the pattern above, but for the
|
45
|
+
r'emissionKind:\s+' # incorrect FullDebug emissionKind.
|
46
|
+
r'FullDebug' #
|
47
|
+
)
|
48
|
+
match = re.compile(pat).search(llvm)
|
49
|
+
self.assertIsNone(match, msg=ptx)
|
50
|
+
|
51
|
+
# The name of this file should be present in the line mapping
|
52
|
+
# if lineinfo was propagated through correctly.
|
53
|
+
pat = (
|
54
|
+
r'\.file' # .file directive beginning
|
55
|
+
r'\s+[0-9]+\s+' # file number surrounded by whitespace
|
56
|
+
r'".*test_lineinfo.py"' # filename in quotes, ignoring full path
|
57
|
+
)
|
58
|
+
match = re.compile(pat).search(ptx)
|
59
|
+
assertfn(match, msg=ptx)
|
60
|
+
|
61
|
+
# .loc directives should be present in the ptx
|
62
|
+
self._loc_directive_regex().search(ptx)
|
63
|
+
assertfn(match, msg=ptx)
|
64
|
+
|
65
|
+
# Debug info sections should not be present when only lineinfo is
|
66
|
+
# generated
|
67
|
+
pat = (
|
68
|
+
r'\.section\s+' # .section directive beginning
|
69
|
+
r'\.debug_info' # Section named ".debug_info"
|
70
|
+
)
|
71
|
+
match = re.compile(pat).search(ptx)
|
72
|
+
self.assertIsNone(match, msg=ptx)
|
73
|
+
|
74
|
+
def test_no_lineinfo_in_asm(self):
|
75
|
+
@cuda.jit(lineinfo=False)
|
76
|
+
def foo(x):
|
77
|
+
x[0] = 1
|
78
|
+
|
79
|
+
self._check(foo, sig=(int32[:],), expect=False)
|
80
|
+
|
81
|
+
def test_lineinfo_in_asm(self):
|
82
|
+
@cuda.jit(lineinfo=True)
|
83
|
+
def foo(x):
|
84
|
+
x[0] = 1
|
85
|
+
|
86
|
+
self._check(foo, sig=(int32[:],), expect=True)
|
87
|
+
|
88
|
+
def test_lineinfo_maintains_error_model(self):
|
89
|
+
sig = (float32[::1], float32[::1])
|
90
|
+
|
91
|
+
@cuda.jit(sig, lineinfo=True)
|
92
|
+
def divide_kernel(x, y):
|
93
|
+
x[0] /= y[0]
|
94
|
+
|
95
|
+
llvm = divide_kernel.inspect_llvm(sig)
|
96
|
+
|
97
|
+
# When the error model is Python, the device function returns 1 to
|
98
|
+
# signal an exception (e.g. divide by zero) has occurred. When the
|
99
|
+
# error model is the default NumPy one (as it should be when only
|
100
|
+
# lineinfo is enabled) the device function always returns 0.
|
101
|
+
self.assertNotIn('ret i32 1', llvm)
|
102
|
+
|
103
|
+
def test_no_lineinfo_in_device_function(self):
|
104
|
+
# Ensure that no lineinfo is generated in device functions by default.
|
105
|
+
@cuda.jit
|
106
|
+
def callee(x):
|
107
|
+
x[0] += 1
|
108
|
+
|
109
|
+
@cuda.jit
|
110
|
+
def caller(x):
|
111
|
+
x[0] = 1
|
112
|
+
callee(x)
|
113
|
+
|
114
|
+
sig = (int32[:],)
|
115
|
+
self._check(caller, sig=sig, expect=False)
|
116
|
+
|
117
|
+
def test_lineinfo_in_device_function(self):
|
118
|
+
# First we define a device function / kernel pair and run the usual
|
119
|
+
# checks on the generated LLVM and PTX.
|
120
|
+
|
121
|
+
@cuda.jit(lineinfo=True)
|
122
|
+
def callee(x):
|
123
|
+
x[0] += 1
|
124
|
+
|
125
|
+
@cuda.jit(lineinfo=True)
|
126
|
+
def caller(x):
|
127
|
+
x[0] = 1
|
128
|
+
callee(x)
|
129
|
+
|
130
|
+
sig = (int32[:],)
|
131
|
+
self._check(caller, sig=sig, expect=True)
|
132
|
+
|
133
|
+
# Now we can check the PTX of the device function specifically.
|
134
|
+
|
135
|
+
ptx = caller.inspect_asm(sig)
|
136
|
+
ptxlines = ptx.splitlines()
|
137
|
+
|
138
|
+
# Check that there is no device function in the PTX
|
139
|
+
|
140
|
+
# A line beginning with ".weak .func" that identifies a device function
|
141
|
+
devfn_start = re.compile(r'^\.weak\s+\.func')
|
142
|
+
|
143
|
+
for line in ptxlines:
|
144
|
+
if devfn_start.match(line) is not None:
|
145
|
+
self.fail(f"Found device function in PTX:\n\n{ptx}")
|
146
|
+
|
147
|
+
# Scan for .loc directives that refer to an inlined device function
|
148
|
+
|
149
|
+
loc_directive = self._loc_directive_regex()
|
150
|
+
found = False
|
151
|
+
|
152
|
+
for line in ptxlines:
|
153
|
+
if loc_directive.search(line) is not None:
|
154
|
+
if 'inlined_at' in line:
|
155
|
+
found = True
|
156
|
+
break
|
157
|
+
|
158
|
+
if not found:
|
159
|
+
self.fail(f'No .loc directive with inlined_at info found'
|
160
|
+
f'in:\n\n{ptx}')
|
161
|
+
|
162
|
+
# We also inspect the LLVM to ensure that there's debug info for each
|
163
|
+
# subprogram (function). A lightweight way to check this is to ensure
|
164
|
+
# that we have as many DISubprograms as we expect.
|
165
|
+
|
166
|
+
llvm = caller.inspect_llvm(sig)
|
167
|
+
subprograms = 0
|
168
|
+
for line in llvm.splitlines():
|
169
|
+
if 'distinct !DISubprogram' in line:
|
170
|
+
subprograms += 1
|
171
|
+
|
172
|
+
# One DISubprogram for each of:
|
173
|
+
# - The kernel wrapper
|
174
|
+
# - The caller
|
175
|
+
# - The callee
|
176
|
+
expected_subprograms = 3
|
177
|
+
|
178
|
+
self.assertEqual(subprograms, expected_subprograms,
|
179
|
+
f'"Expected {expected_subprograms} DISubprograms; '
|
180
|
+
f'got {subprograms}')
|
181
|
+
|
182
|
+
def test_debug_and_lineinfo_warning(self):
|
183
|
+
with warnings.catch_warnings(record=True) as w:
|
184
|
+
ignore_internal_warnings()
|
185
|
+
|
186
|
+
# We pass opt=False to prevent the warning about opt and debug
|
187
|
+
# occurring as well
|
188
|
+
@cuda.jit(debug=True, lineinfo=True, opt=False)
|
189
|
+
def f():
|
190
|
+
pass
|
191
|
+
|
192
|
+
self.assertEqual(len(w), 1)
|
193
|
+
self.assertEqual(w[0].category, NumbaInvalidConfigWarning)
|
194
|
+
self.assertIn('debug and lineinfo are mutually exclusive',
|
195
|
+
str(w[0].message))
|
196
|
+
|
197
|
+
|
198
|
+
if __name__ == '__main__':
|
199
|
+
unittest.main()
|
@@ -0,0 +1,164 @@
|
|
1
|
+
import numpy as np
|
2
|
+
|
3
|
+
from numba import cuda, int32, complex128, void
|
4
|
+
from numba.core import types
|
5
|
+
from numba.core.errors import TypingError
|
6
|
+
from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
|
7
|
+
from .extensions_usecases import test_struct_model_type, TestStruct
|
8
|
+
|
9
|
+
|
10
|
+
def culocal(A, B):
|
11
|
+
C = cuda.local.array(1000, dtype=int32)
|
12
|
+
for i in range(C.shape[0]):
|
13
|
+
C[i] = A[i]
|
14
|
+
for i in range(C.shape[0]):
|
15
|
+
B[i] = C[i]
|
16
|
+
|
17
|
+
|
18
|
+
def culocalcomplex(A, B):
|
19
|
+
C = cuda.local.array(100, dtype=complex128)
|
20
|
+
for i in range(C.shape[0]):
|
21
|
+
C[i] = A[i]
|
22
|
+
for i in range(C.shape[0]):
|
23
|
+
B[i] = C[i]
|
24
|
+
|
25
|
+
|
26
|
+
def culocal1tuple(A, B):
|
27
|
+
C = cuda.local.array((5,), dtype=int32)
|
28
|
+
for i in range(C.shape[0]):
|
29
|
+
C[i] = A[i]
|
30
|
+
for i in range(C.shape[0]):
|
31
|
+
B[i] = C[i]
|
32
|
+
|
33
|
+
|
34
|
+
@skip_on_cudasim('PTX inspection not available in cudasim')
|
35
|
+
class TestCudaLocalMem(CUDATestCase):
|
36
|
+
def test_local_array(self):
|
37
|
+
sig = (int32[:], int32[:])
|
38
|
+
jculocal = cuda.jit(sig)(culocal)
|
39
|
+
self.assertTrue('.local' in jculocal.inspect_asm(sig))
|
40
|
+
A = np.arange(1000, dtype='int32')
|
41
|
+
B = np.zeros_like(A)
|
42
|
+
jculocal[1, 1](A, B)
|
43
|
+
self.assertTrue(np.all(A == B))
|
44
|
+
|
45
|
+
def test_local_array_1_tuple(self):
|
46
|
+
"""Ensure that local arrays can be constructed with 1-tuple shape
|
47
|
+
"""
|
48
|
+
jculocal = cuda.jit('void(int32[:], int32[:])')(culocal1tuple)
|
49
|
+
# Don't check if .local is in the ptx because the optimizer
|
50
|
+
# may reduce it to registers.
|
51
|
+
A = np.arange(5, dtype='int32')
|
52
|
+
B = np.zeros_like(A)
|
53
|
+
jculocal[1, 1](A, B)
|
54
|
+
self.assertTrue(np.all(A == B))
|
55
|
+
|
56
|
+
def test_local_array_complex(self):
|
57
|
+
sig = 'void(complex128[:], complex128[:])'
|
58
|
+
jculocalcomplex = cuda.jit(sig)(culocalcomplex)
|
59
|
+
A = (np.arange(100, dtype='complex128') - 1) / 2j
|
60
|
+
B = np.zeros_like(A)
|
61
|
+
jculocalcomplex[1, 1](A, B)
|
62
|
+
self.assertTrue(np.all(A == B))
|
63
|
+
|
64
|
+
def check_dtype(self, f, dtype):
|
65
|
+
# Find the typing of the dtype argument to cuda.local.array
|
66
|
+
annotation = next(iter(f.overloads.values()))._type_annotation
|
67
|
+
l_dtype = annotation.typemap['l'].dtype
|
68
|
+
# Ensure that the typing is correct
|
69
|
+
self.assertEqual(l_dtype, dtype)
|
70
|
+
|
71
|
+
@skip_on_cudasim("Can't check typing in simulator")
|
72
|
+
def test_numba_dtype(self):
|
73
|
+
# Check that Numba types can be used as the dtype of a local array
|
74
|
+
@cuda.jit(void(int32[::1]))
|
75
|
+
def f(x):
|
76
|
+
l = cuda.local.array(10, dtype=int32)
|
77
|
+
l[0] = x[0]
|
78
|
+
x[0] = l[0]
|
79
|
+
|
80
|
+
self.check_dtype(f, int32)
|
81
|
+
|
82
|
+
@skip_on_cudasim("Can't check typing in simulator")
|
83
|
+
def test_numpy_dtype(self):
|
84
|
+
# Check that NumPy types can be used as the dtype of a local array
|
85
|
+
@cuda.jit(void(int32[::1]))
|
86
|
+
def f(x):
|
87
|
+
l = cuda.local.array(10, dtype=np.int32)
|
88
|
+
l[0] = x[0]
|
89
|
+
x[0] = l[0]
|
90
|
+
|
91
|
+
self.check_dtype(f, int32)
|
92
|
+
|
93
|
+
@skip_on_cudasim("Can't check typing in simulator")
|
94
|
+
def test_string_dtype(self):
|
95
|
+
# Check that strings can be used to specify the dtype of a local array
|
96
|
+
@cuda.jit(void(int32[::1]))
|
97
|
+
def f(x):
|
98
|
+
l = cuda.local.array(10, dtype='int32')
|
99
|
+
l[0] = x[0]
|
100
|
+
x[0] = l[0]
|
101
|
+
|
102
|
+
self.check_dtype(f, int32)
|
103
|
+
|
104
|
+
@skip_on_cudasim("Can't check typing in simulator")
|
105
|
+
def test_invalid_string_dtype(self):
|
106
|
+
# Check that strings of invalid dtypes cause a typing error
|
107
|
+
re = ".*Invalid NumPy dtype specified: 'int33'.*"
|
108
|
+
with self.assertRaisesRegex(TypingError, re):
|
109
|
+
@cuda.jit(void(int32[::1]))
|
110
|
+
def f(x):
|
111
|
+
l = cuda.local.array(10, dtype='int33')
|
112
|
+
l[0] = x[0]
|
113
|
+
x[0] = l[0]
|
114
|
+
|
115
|
+
def test_type_with_struct_data_model(self):
|
116
|
+
@cuda.jit(void(test_struct_model_type[::1]))
|
117
|
+
def f(x):
|
118
|
+
l = cuda.local.array(10, dtype=test_struct_model_type)
|
119
|
+
l[0] = x[0]
|
120
|
+
x[0] = l[0]
|
121
|
+
|
122
|
+
self.check_dtype(f, test_struct_model_type)
|
123
|
+
|
124
|
+
def test_struct_model_type_arr(self):
|
125
|
+
@cuda.jit(void(int32[::1], int32[::1]))
|
126
|
+
def f(outx, outy):
|
127
|
+
# Test creation
|
128
|
+
arr = cuda.local.array(10, dtype=test_struct_model_type)
|
129
|
+
# Test set to arr
|
130
|
+
for i in range(len(arr)):
|
131
|
+
obj = TestStruct(int32(i), int32(i * 2))
|
132
|
+
arr[i] = obj
|
133
|
+
# Test get from arr
|
134
|
+
for i in range(len(arr)):
|
135
|
+
outx[i] = arr[i].x
|
136
|
+
outy[i] = arr[i].y
|
137
|
+
|
138
|
+
arrx = np.array((10,), dtype="int32")
|
139
|
+
arry = np.array((10,), dtype="int32")
|
140
|
+
|
141
|
+
f[1, 1](arrx, arry)
|
142
|
+
|
143
|
+
for i, x in enumerate(arrx):
|
144
|
+
self.assertEqual(x, i)
|
145
|
+
for i, y in enumerate(arry):
|
146
|
+
self.assertEqual(y, i * 2)
|
147
|
+
|
148
|
+
def _check_local_array_size_fp16(self, shape, expected, ty):
|
149
|
+
@cuda.jit
|
150
|
+
def s(a):
|
151
|
+
arr = cuda.local.array(shape, dtype=ty)
|
152
|
+
a[0] = arr.size
|
153
|
+
|
154
|
+
result = np.zeros(1, dtype=np.float16)
|
155
|
+
s[1, 1](result)
|
156
|
+
self.assertEqual(result[0], expected)
|
157
|
+
|
158
|
+
def test_issue_fp16_support(self):
|
159
|
+
self._check_local_array_size_fp16(2, 2, types.float16)
|
160
|
+
self._check_local_array_size_fp16(2, 2, np.float16)
|
161
|
+
|
162
|
+
|
163
|
+
if __name__ == '__main__':
|
164
|
+
unittest.main()
|
@@ -0,0 +1,37 @@
|
|
1
|
+
from numba import float64, uint32
|
2
|
+
from numba.cuda.compiler import compile_ptx
|
3
|
+
from numba.cuda.testing import skip_on_cudasim, unittest
|
4
|
+
|
5
|
+
|
6
|
+
@skip_on_cudasim('Compilation unsupported in the simulator')
|
7
|
+
class TestCudaMandel(unittest.TestCase):
|
8
|
+
def test_mandel(self):
|
9
|
+
"""Just make sure we can compile this
|
10
|
+
"""
|
11
|
+
|
12
|
+
def mandel(tid, min_x, max_x, min_y, max_y, width, height, iters):
|
13
|
+
pixel_size_x = (max_x - min_x) / width
|
14
|
+
pixel_size_y = (max_y - min_y) / height
|
15
|
+
|
16
|
+
x = tid % width
|
17
|
+
y = tid / width
|
18
|
+
|
19
|
+
real = min_x + x * pixel_size_x
|
20
|
+
imag = min_y + y * pixel_size_y
|
21
|
+
|
22
|
+
c = complex(real, imag)
|
23
|
+
z = 0.0j
|
24
|
+
|
25
|
+
for i in range(iters):
|
26
|
+
z = z * z + c
|
27
|
+
if (z.real * z.real + z.imag * z.imag) >= 4:
|
28
|
+
return i
|
29
|
+
return iters
|
30
|
+
|
31
|
+
args = (uint32, float64, float64, float64, float64,
|
32
|
+
uint32, uint32, uint32)
|
33
|
+
compile_ptx(mandel, args, device=True)
|
34
|
+
|
35
|
+
|
36
|
+
if __name__ == '__main__':
|
37
|
+
unittest.main()
|