numba-cuda 0.0.0__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +1 -0
- _numba_cuda_redirector.py +74 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +5 -0
- numba_cuda/_version.py +19 -0
- numba_cuda/numba/cuda/__init__.py +22 -0
- numba_cuda/numba/cuda/api.py +526 -0
- numba_cuda/numba/cuda/api_util.py +30 -0
- numba_cuda/numba/cuda/args.py +77 -0
- numba_cuda/numba/cuda/cg.py +62 -0
- numba_cuda/numba/cuda/codegen.py +378 -0
- numba_cuda/numba/cuda/compiler.py +422 -0
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
- numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
- numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
- numba_cuda/numba/cuda/cuda_paths.py +258 -0
- numba_cuda/numba/cuda/cudadecl.py +806 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
- numba_cuda/numba/cuda/cudadrv/error.py +36 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
- numba_cuda/numba/cuda/cudaimpl.py +1055 -0
- numba_cuda/numba/cuda/cudamath.py +140 -0
- numba_cuda/numba/cuda/decorators.py +189 -0
- numba_cuda/numba/cuda/descriptor.py +33 -0
- numba_cuda/numba/cuda/device_init.py +89 -0
- numba_cuda/numba/cuda/deviceufunc.py +908 -0
- numba_cuda/numba/cuda/dispatcher.py +1057 -0
- numba_cuda/numba/cuda/errors.py +59 -0
- numba_cuda/numba/cuda/extending.py +7 -0
- numba_cuda/numba/cuda/initialize.py +13 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
- numba_cuda/numba/cuda/intrinsics.py +198 -0
- numba_cuda/numba/cuda/kernels/__init__.py +0 -0
- numba_cuda/numba/cuda/kernels/reduction.py +262 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3382 -0
- numba_cuda/numba/cuda/libdevicedecl.py +17 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
- numba_cuda/numba/cuda/mathimpl.py +448 -0
- numba_cuda/numba/cuda/models.py +48 -0
- numba_cuda/numba/cuda/nvvmutils.py +235 -0
- numba_cuda/numba/cuda/printimpl.py +86 -0
- numba_cuda/numba/cuda/random.py +292 -0
- numba_cuda/numba/cuda/simulator/__init__.py +38 -0
- numba_cuda/numba/cuda/simulator/api.py +110 -0
- numba_cuda/numba/cuda/simulator/compiler.py +9 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
- numba_cuda/numba/cuda/simulator/kernel.py +308 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
- numba_cuda/numba/cuda/simulator/reduction.py +15 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
- numba_cuda/numba/cuda/simulator_init.py +17 -0
- numba_cuda/numba/cuda/stubs.py +902 -0
- numba_cuda/numba/cuda/target.py +440 -0
- numba_cuda/numba/cuda/testing.py +202 -0
- numba_cuda/numba/cuda/tests/__init__.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +7 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
- numba_cuda/numba/cuda/types.py +37 -0
- numba_cuda/numba/cuda/ufuncs.py +662 -0
- numba_cuda/numba/cuda/vector_types.py +209 -0
- numba_cuda/numba/cuda/vectorizers.py +252 -0
- numba_cuda-0.0.12.dist-info/LICENSE +25 -0
- numba_cuda-0.0.12.dist-info/METADATA +68 -0
- numba_cuda-0.0.12.dist-info/RECORD +231 -0
- {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
- numba_cuda-0.0.0.dist-info/METADATA +0 -6
- numba_cuda-0.0.0.dist-info/RECORD +0 -5
- {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,244 @@
|
|
1
|
+
from typing import List
|
2
|
+
from dataclasses import dataclass, field
|
3
|
+
from numba import cuda, float32
|
4
|
+
from numba.cuda.compiler import compile_ptx_for_current_device, compile_ptx
|
5
|
+
from math import cos, sin, tan, exp, log, log10, log2, pow, tanh
|
6
|
+
from operator import truediv
|
7
|
+
import numpy as np
|
8
|
+
from numba.cuda.testing import (CUDATestCase, skip_on_cudasim,
|
9
|
+
skip_unless_cc_75)
|
10
|
+
import unittest
|
11
|
+
|
12
|
+
|
13
|
+
@dataclass
|
14
|
+
class FastMathCriterion:
|
15
|
+
fast_expected: List[str] = field(default_factory=list)
|
16
|
+
fast_unexpected: List[str] = field(default_factory=list)
|
17
|
+
prec_expected: List[str] = field(default_factory=list)
|
18
|
+
prec_unexpected: List[str] = field(default_factory=list)
|
19
|
+
|
20
|
+
def check(self, test: CUDATestCase, fast: str, prec: str):
|
21
|
+
test.assertTrue(all(i in fast for i in self.fast_expected))
|
22
|
+
test.assertTrue(all(i not in fast for i in self.fast_unexpected))
|
23
|
+
test.assertTrue(all(i in prec for i in self.prec_expected))
|
24
|
+
test.assertTrue(all(i not in prec for i in self.prec_unexpected))
|
25
|
+
|
26
|
+
|
27
|
+
@skip_on_cudasim('Fastmath and PTX inspection not available on cudasim')
|
28
|
+
class TestFastMathOption(CUDATestCase):
|
29
|
+
def _test_fast_math_common(self, pyfunc, sig, device, criterion):
|
30
|
+
|
31
|
+
# Test jit code path
|
32
|
+
fastver = cuda.jit(sig, device=device, fastmath=True)(pyfunc)
|
33
|
+
precver = cuda.jit(sig, device=device)(pyfunc)
|
34
|
+
|
35
|
+
criterion.check(
|
36
|
+
self, fastver.inspect_asm(sig), precver.inspect_asm(sig)
|
37
|
+
)
|
38
|
+
|
39
|
+
# Test compile_ptx code path
|
40
|
+
fastptx, _ = compile_ptx_for_current_device(
|
41
|
+
pyfunc, sig, device=device, fastmath=True
|
42
|
+
)
|
43
|
+
precptx, _ = compile_ptx_for_current_device(
|
44
|
+
pyfunc, sig, device=device
|
45
|
+
)
|
46
|
+
|
47
|
+
criterion.check(self, fastptx, precptx)
|
48
|
+
|
49
|
+
def _test_fast_math_unary(self, op, criterion: FastMathCriterion):
|
50
|
+
def kernel(r, x):
|
51
|
+
r[0] = op(x)
|
52
|
+
|
53
|
+
def device_function(x):
|
54
|
+
return op(x)
|
55
|
+
|
56
|
+
self._test_fast_math_common(
|
57
|
+
kernel, (float32[::1], float32), device=False, criterion=criterion
|
58
|
+
)
|
59
|
+
self._test_fast_math_common(
|
60
|
+
device_function, (float32,), device=True, criterion=criterion
|
61
|
+
)
|
62
|
+
|
63
|
+
def _test_fast_math_binary(self, op, criterion: FastMathCriterion):
|
64
|
+
def kernel(r, x, y):
|
65
|
+
r[0] = op(x, y)
|
66
|
+
|
67
|
+
def device(x, y):
|
68
|
+
return op(x, y)
|
69
|
+
|
70
|
+
self._test_fast_math_common(
|
71
|
+
kernel,
|
72
|
+
(float32[::1], float32, float32), device=False, criterion=criterion
|
73
|
+
)
|
74
|
+
self._test_fast_math_common(
|
75
|
+
device, (float32, float32), device=True, criterion=criterion
|
76
|
+
)
|
77
|
+
|
78
|
+
def test_cosf(self):
|
79
|
+
self._test_fast_math_unary(
|
80
|
+
cos,
|
81
|
+
FastMathCriterion(
|
82
|
+
fast_expected=['cos.approx.ftz.f32 '],
|
83
|
+
prec_unexpected=['cos.approx.ftz.f32 ']
|
84
|
+
)
|
85
|
+
)
|
86
|
+
|
87
|
+
def test_sinf(self):
|
88
|
+
self._test_fast_math_unary(
|
89
|
+
sin,
|
90
|
+
FastMathCriterion(
|
91
|
+
fast_expected=['sin.approx.ftz.f32 '],
|
92
|
+
prec_unexpected=['sin.approx.ftz.f32 ']
|
93
|
+
)
|
94
|
+
)
|
95
|
+
|
96
|
+
def test_tanf(self):
|
97
|
+
self._test_fast_math_unary(
|
98
|
+
tan,
|
99
|
+
FastMathCriterion(fast_expected=[
|
100
|
+
'sin.approx.ftz.f32 ',
|
101
|
+
'cos.approx.ftz.f32 ',
|
102
|
+
'div.approx.ftz.f32 '
|
103
|
+
], prec_unexpected=['sin.approx.ftz.f32 '])
|
104
|
+
)
|
105
|
+
|
106
|
+
@skip_unless_cc_75
|
107
|
+
def test_tanhf(self):
|
108
|
+
|
109
|
+
self._test_fast_math_unary(
|
110
|
+
tanh,
|
111
|
+
FastMathCriterion(
|
112
|
+
fast_expected=['tanh.approx.f32 '],
|
113
|
+
prec_unexpected=['tanh.approx.f32 ']
|
114
|
+
)
|
115
|
+
)
|
116
|
+
|
117
|
+
def test_tanhf_compile_ptx(self):
|
118
|
+
def tanh_kernel(r, x):
|
119
|
+
r[0] = tanh(x)
|
120
|
+
|
121
|
+
def tanh_common_test(cc, criterion):
|
122
|
+
fastptx, _ = compile_ptx(tanh_kernel, (float32[::1], float32),
|
123
|
+
fastmath=True, cc=cc)
|
124
|
+
precptx, _ = compile_ptx(tanh_kernel, (float32[::1], float32),
|
125
|
+
cc=cc)
|
126
|
+
criterion.check(self, fastptx, precptx)
|
127
|
+
|
128
|
+
tanh_common_test(cc=(7, 5), criterion=FastMathCriterion(
|
129
|
+
fast_expected=['tanh.approx.f32 '],
|
130
|
+
prec_unexpected=['tanh.approx.f32 ']
|
131
|
+
))
|
132
|
+
|
133
|
+
tanh_common_test(cc=(7, 0),
|
134
|
+
criterion=FastMathCriterion(
|
135
|
+
fast_expected=['ex2.approx.ftz.f32 ',
|
136
|
+
'rcp.approx.ftz.f32 '],
|
137
|
+
prec_unexpected=['tanh.approx.f32 ']))
|
138
|
+
|
139
|
+
def test_expf(self):
|
140
|
+
self._test_fast_math_unary(
|
141
|
+
exp,
|
142
|
+
FastMathCriterion(
|
143
|
+
fast_unexpected=['fma.rn.f32 '],
|
144
|
+
prec_expected=['fma.rn.f32 ']
|
145
|
+
)
|
146
|
+
)
|
147
|
+
|
148
|
+
def test_logf(self):
|
149
|
+
# Look for constant used to convert from log base 2 to log base e
|
150
|
+
self._test_fast_math_unary(
|
151
|
+
log, FastMathCriterion(
|
152
|
+
fast_expected=['lg2.approx.ftz.f32 ', '0f3F317218'],
|
153
|
+
prec_unexpected=['lg2.approx.ftz.f32 '],
|
154
|
+
)
|
155
|
+
)
|
156
|
+
|
157
|
+
def test_log10f(self):
|
158
|
+
# Look for constant used to convert from log base 2 to log base 10
|
159
|
+
self._test_fast_math_unary(
|
160
|
+
log10, FastMathCriterion(
|
161
|
+
fast_expected=['lg2.approx.ftz.f32 ', '0f3E9A209B'],
|
162
|
+
prec_unexpected=['lg2.approx.ftz.f32 ']
|
163
|
+
)
|
164
|
+
)
|
165
|
+
|
166
|
+
def test_log2f(self):
|
167
|
+
self._test_fast_math_unary(
|
168
|
+
log2, FastMathCriterion(
|
169
|
+
fast_expected=['lg2.approx.ftz.f32 '],
|
170
|
+
prec_unexpected=['lg2.approx.ftz.f32 ']
|
171
|
+
)
|
172
|
+
)
|
173
|
+
|
174
|
+
def test_powf(self):
|
175
|
+
self._test_fast_math_binary(
|
176
|
+
pow, FastMathCriterion(
|
177
|
+
fast_expected=['lg2.approx.ftz.f32 '],
|
178
|
+
prec_unexpected=['lg2.approx.ftz.f32 '],
|
179
|
+
)
|
180
|
+
)
|
181
|
+
|
182
|
+
def test_divf(self):
|
183
|
+
self._test_fast_math_binary(
|
184
|
+
truediv, FastMathCriterion(
|
185
|
+
fast_expected=['div.approx.ftz.f32 '],
|
186
|
+
fast_unexpected=['div.rn.f32'],
|
187
|
+
prec_expected=['div.rn.f32'],
|
188
|
+
prec_unexpected=['div.approx.ftz.f32 '],
|
189
|
+
)
|
190
|
+
)
|
191
|
+
|
192
|
+
def test_divf_exception(self):
|
193
|
+
# LTO optimizes away the exception status due to an oversight
|
194
|
+
# in the way we generate it (it is not added to the used list).
|
195
|
+
self.skip_if_lto("Exceptions not supported with LTO")
|
196
|
+
|
197
|
+
def f10(r, x, y):
|
198
|
+
r[0] = x / y
|
199
|
+
|
200
|
+
sig = (float32[::1], float32, float32)
|
201
|
+
fastver = cuda.jit(sig, fastmath=True, debug=True)(f10)
|
202
|
+
precver = cuda.jit(sig, debug=True)(f10)
|
203
|
+
nelem = 10
|
204
|
+
ary = np.empty(nelem, dtype=np.float32)
|
205
|
+
with self.assertRaises(ZeroDivisionError):
|
206
|
+
precver[1, nelem](ary, 10.0, 0.0)
|
207
|
+
|
208
|
+
try:
|
209
|
+
fastver[1, nelem](ary, 10.0, 0.0)
|
210
|
+
except ZeroDivisionError:
|
211
|
+
self.fail("Divide in fastmath should not throw ZeroDivisionError")
|
212
|
+
|
213
|
+
@unittest.expectedFailure
|
214
|
+
def test_device_fastmath_propagation(self):
|
215
|
+
# The fastmath option doesn't presently propagate to device functions
|
216
|
+
# from their callees - arguably it should do, so this test is presently
|
217
|
+
# an xfail.
|
218
|
+
@cuda.jit("float32(float32, float32)", device=True)
|
219
|
+
def foo(a, b):
|
220
|
+
return a / b
|
221
|
+
|
222
|
+
def bar(arr, val):
|
223
|
+
i = cuda.grid(1)
|
224
|
+
if i < arr.size:
|
225
|
+
arr[i] = foo(i, val)
|
226
|
+
|
227
|
+
sig = (float32[::1], float32)
|
228
|
+
fastver = cuda.jit(sig, fastmath=True)(bar)
|
229
|
+
precver = cuda.jit(sig)(bar)
|
230
|
+
|
231
|
+
# Variants of the div instruction are further documented at:
|
232
|
+
# https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-div
|
233
|
+
|
234
|
+
# The fast version should use the "fast, approximate divide" variant
|
235
|
+
self.assertIn('div.approx.f32', fastver.inspect_asm(sig))
|
236
|
+
# The precise version should use the "IEEE 754 compliant rounding"
|
237
|
+
# variant, and neither of the "approximate divide" variants.
|
238
|
+
self.assertIn('div.rn.f32', precver.inspect_asm(sig))
|
239
|
+
self.assertNotIn('div.approx.f32', precver.inspect_asm(sig))
|
240
|
+
self.assertNotIn('div.full.f32', precver.inspect_asm(sig))
|
241
|
+
|
242
|
+
|
243
|
+
if __name__ == '__main__':
|
244
|
+
unittest.main()
|
@@ -0,0 +1,52 @@
|
|
1
|
+
import numpy as np
|
2
|
+
|
3
|
+
from numba import cuda
|
4
|
+
import unittest
|
5
|
+
from numba.cuda.testing import CUDATestCase
|
6
|
+
|
7
|
+
|
8
|
+
@cuda.jit
|
9
|
+
def foo(x):
|
10
|
+
i = cuda.grid(1)
|
11
|
+
if i < x.size:
|
12
|
+
x[i] += 1
|
13
|
+
|
14
|
+
|
15
|
+
class TestForAll(CUDATestCase):
|
16
|
+
def test_forall_1(self):
|
17
|
+
arr = np.arange(11)
|
18
|
+
orig = arr.copy()
|
19
|
+
foo.forall(arr.size)(arr)
|
20
|
+
np.testing.assert_array_almost_equal(arr, orig + 1)
|
21
|
+
|
22
|
+
def test_forall_2(self):
|
23
|
+
@cuda.jit("void(float32, float32[:], float32[:])")
|
24
|
+
def bar(a, x, y):
|
25
|
+
i = cuda.grid(1)
|
26
|
+
if i < x.size:
|
27
|
+
y[i] = a * x[i] + y[i]
|
28
|
+
|
29
|
+
x = np.arange(13, dtype=np.float32)
|
30
|
+
y = np.arange(13, dtype=np.float32)
|
31
|
+
oldy = y.copy()
|
32
|
+
a = 1.234
|
33
|
+
bar.forall(y.size)(a, x, y)
|
34
|
+
np.testing.assert_array_almost_equal(y, a * x + oldy, decimal=3)
|
35
|
+
|
36
|
+
def test_forall_no_work(self):
|
37
|
+
# Ensure that forall doesn't launch a kernel with no blocks when called
|
38
|
+
# with 0 elements. See Issue #5017.
|
39
|
+
arr = np.arange(11)
|
40
|
+
foo.forall(0)(arr)
|
41
|
+
|
42
|
+
def test_forall_negative_work(self):
|
43
|
+
# Ensure that forall doesn't allow the creation of a forall with a
|
44
|
+
# negative element count.
|
45
|
+
with self.assertRaises(ValueError) as raises:
|
46
|
+
foo.forall(-1)
|
47
|
+
self.assertIn("Can't create ForAll with negative task count",
|
48
|
+
str(raises.exception))
|
49
|
+
|
50
|
+
|
51
|
+
if __name__ == '__main__':
|
52
|
+
unittest.main()
|
@@ -0,0 +1,29 @@
|
|
1
|
+
import numpy as np
|
2
|
+
|
3
|
+
from numba import cuda
|
4
|
+
from numba.cuda.testing import unittest, CUDATestCase
|
5
|
+
|
6
|
+
|
7
|
+
class TestFreeVar(CUDATestCase):
|
8
|
+
def test_freevar(self):
|
9
|
+
"""Make sure we can compile the following kernel with freevar reference
|
10
|
+
in arguments to shared.array
|
11
|
+
"""
|
12
|
+
from numba import float32
|
13
|
+
|
14
|
+
size = 1024
|
15
|
+
nbtype = float32
|
16
|
+
|
17
|
+
@cuda.jit("(float32[::1], intp)")
|
18
|
+
def foo(A, i):
|
19
|
+
"Dummy function"
|
20
|
+
sdata = cuda.shared.array(size, # size is freevar
|
21
|
+
dtype=nbtype) # nbtype is freevar
|
22
|
+
A[i] = sdata[i]
|
23
|
+
|
24
|
+
A = np.arange(2, dtype="float32")
|
25
|
+
foo[1, 1](A, 0)
|
26
|
+
|
27
|
+
|
28
|
+
if __name__ == '__main__':
|
29
|
+
unittest.main()
|
@@ -0,0 +1,66 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import math
|
3
|
+
from numba import cuda
|
4
|
+
from numba.types import float32, float64, int32, void
|
5
|
+
from numba.cuda.testing import unittest, CUDATestCase
|
6
|
+
|
7
|
+
|
8
|
+
def simple_frexp(aryx, aryexp, arg):
|
9
|
+
aryx[0], aryexp[0] = math.frexp(arg)
|
10
|
+
|
11
|
+
|
12
|
+
def simple_ldexp(aryx, arg, exp):
|
13
|
+
aryx[0] = math.ldexp(arg, exp)
|
14
|
+
|
15
|
+
|
16
|
+
class TestCudaFrexpLdexp(CUDATestCase):
|
17
|
+
def template_test_frexp(self, nptype, nbtype):
|
18
|
+
compiled = cuda.jit(void(nbtype[:], int32[:], nbtype))(simple_frexp)
|
19
|
+
arg = 3.1415
|
20
|
+
aryx = np.zeros(1, dtype=nptype)
|
21
|
+
aryexp = np.zeros(1, dtype=np.int32)
|
22
|
+
compiled[1, 1](aryx, aryexp, arg)
|
23
|
+
np.testing.assert_array_equal(aryx, nptype(0.785375))
|
24
|
+
self.assertEqual(aryexp, 2)
|
25
|
+
|
26
|
+
arg = np.inf
|
27
|
+
compiled[1, 1](aryx, aryexp, arg)
|
28
|
+
np.testing.assert_array_equal(aryx, nptype(np.inf))
|
29
|
+
self.assertEqual(aryexp, 0) # np.frexp gives -1
|
30
|
+
|
31
|
+
arg = np.nan
|
32
|
+
compiled[1, 1](aryx, aryexp, arg)
|
33
|
+
np.testing.assert_array_equal(aryx, nptype(np.nan))
|
34
|
+
self.assertEqual(aryexp, 0) # np.frexp gives -1
|
35
|
+
|
36
|
+
def template_test_ldexp(self, nptype, nbtype):
|
37
|
+
compiled = cuda.jit(void(nbtype[:], nbtype, int32))(simple_ldexp)
|
38
|
+
arg = 0.785375
|
39
|
+
exp = 2
|
40
|
+
aryx = np.zeros(1, dtype=nptype)
|
41
|
+
compiled[1, 1](aryx, arg, exp)
|
42
|
+
np.testing.assert_array_equal(aryx, nptype(3.1415))
|
43
|
+
|
44
|
+
arg = np.inf
|
45
|
+
compiled[1, 1](aryx, arg, exp)
|
46
|
+
np.testing.assert_array_equal(aryx, nptype(np.inf))
|
47
|
+
|
48
|
+
arg = np.nan
|
49
|
+
compiled[1, 1](aryx, arg, exp)
|
50
|
+
np.testing.assert_array_equal(aryx, nptype(np.nan))
|
51
|
+
|
52
|
+
def test_frexp_f4(self):
|
53
|
+
self.template_test_frexp(np.float32, float32)
|
54
|
+
|
55
|
+
def test_ldexp_f4(self):
|
56
|
+
self.template_test_ldexp(np.float32, float32)
|
57
|
+
|
58
|
+
def test_frexp_f8(self):
|
59
|
+
self.template_test_frexp(np.float64, float64)
|
60
|
+
|
61
|
+
def test_ldexp_f8(self):
|
62
|
+
self.template_test_ldexp(np.float64, float64)
|
63
|
+
|
64
|
+
|
65
|
+
if __name__ == '__main__':
|
66
|
+
unittest.main()
|
@@ -0,0 +1,60 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from numba import cuda, int32, float32
|
3
|
+
from numba.cuda.testing import unittest, CUDATestCase
|
4
|
+
|
5
|
+
N = 100
|
6
|
+
|
7
|
+
|
8
|
+
def simple_smem(ary):
|
9
|
+
sm = cuda.shared.array(N, int32)
|
10
|
+
i = cuda.grid(1)
|
11
|
+
if i == 0:
|
12
|
+
for j in range(N):
|
13
|
+
sm[j] = j
|
14
|
+
cuda.syncthreads()
|
15
|
+
ary[i] = sm[i]
|
16
|
+
|
17
|
+
|
18
|
+
S0 = 10
|
19
|
+
S1 = 20
|
20
|
+
|
21
|
+
|
22
|
+
def coop_smem2d(ary):
|
23
|
+
i, j = cuda.grid(2)
|
24
|
+
sm = cuda.shared.array((S0, S1), float32)
|
25
|
+
sm[i, j] = (i + 1) / (j + 1)
|
26
|
+
cuda.syncthreads()
|
27
|
+
ary[i, j] = sm[i, j]
|
28
|
+
|
29
|
+
|
30
|
+
class TestCudaTestGlobal(CUDATestCase):
|
31
|
+
def test_global_int_const(self):
|
32
|
+
"""Test simple_smem
|
33
|
+
"""
|
34
|
+
compiled = cuda.jit("void(int32[:])")(simple_smem)
|
35
|
+
|
36
|
+
nelem = 100
|
37
|
+
ary = np.empty(nelem, dtype=np.int32)
|
38
|
+
compiled[1, nelem](ary)
|
39
|
+
|
40
|
+
self.assertTrue(np.all(ary == np.arange(nelem, dtype=np.int32)))
|
41
|
+
|
42
|
+
@unittest.SkipTest
|
43
|
+
def test_global_tuple_const(self):
|
44
|
+
"""Test coop_smem2d
|
45
|
+
"""
|
46
|
+
compiled = cuda.jit("void(float32[:,:])")(coop_smem2d)
|
47
|
+
|
48
|
+
shape = 10, 20
|
49
|
+
ary = np.empty(shape, dtype=np.float32)
|
50
|
+
compiled[1, shape](ary)
|
51
|
+
|
52
|
+
exp = np.empty_like(ary)
|
53
|
+
for i in range(ary.shape[0]):
|
54
|
+
for j in range(ary.shape[1]):
|
55
|
+
exp[i, j] = float(i + 1) / (j + 1)
|
56
|
+
self.assertTrue(np.allclose(ary, exp))
|
57
|
+
|
58
|
+
|
59
|
+
if __name__ == '__main__':
|
60
|
+
unittest.main()
|