numba-cuda 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +246 -114
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
- numba_cuda/numba/cuda/cuda_paths.py +293 -99
- numba_cuda/numba/cuda/cudadecl.py +93 -79
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +27 -3
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +146 -30
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +296 -275
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +99 -7
- numba_cuda/numba/cuda/decorators.py +87 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +68 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +55 -1
- numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
- numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
- numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +1090 -927
- numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +468 -319
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
- numba_cuda/numba/cuda/intrinsics.py +203 -28
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/lowering.py +43 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +134 -108
- numba_cuda/numba/cuda/target.py +92 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +10 -7
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +59 -23
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +77 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +24 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +21 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +81 -30
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +5 -3
- numba_cuda/numba/cuda/vectorizers.py +38 -33
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
- numba_cuda-0.10.0.dist-info/RECORD +263 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.1.dist-info/RECORD +0 -251
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0
@@ -5,8 +5,7 @@ from numba.cuda.compiler import compile_ptx_for_current_device, compile_ptx
|
|
5
5
|
from math import cos, sin, tan, exp, log, log10, log2, pow, tanh
|
6
6
|
from operator import truediv
|
7
7
|
import numpy as np
|
8
|
-
from numba.cuda.testing import
|
9
|
-
skip_unless_cc_75)
|
8
|
+
from numba.cuda.testing import CUDATestCase, skip_on_cudasim, skip_unless_cc_75
|
10
9
|
import unittest
|
11
10
|
|
12
11
|
|
@@ -24,10 +23,9 @@ class FastMathCriterion:
|
|
24
23
|
test.assertTrue(all(i not in prec for i in self.prec_unexpected))
|
25
24
|
|
26
25
|
|
27
|
-
@skip_on_cudasim(
|
26
|
+
@skip_on_cudasim("Fastmath and PTX inspection not available on cudasim")
|
28
27
|
class TestFastMathOption(CUDATestCase):
|
29
28
|
def _test_fast_math_common(self, pyfunc, sig, device, criterion):
|
30
|
-
|
31
29
|
# Test jit code path
|
32
30
|
fastver = cuda.jit(sig, device=device, fastmath=True)(pyfunc)
|
33
31
|
precver = cuda.jit(sig, device=device)(pyfunc)
|
@@ -40,9 +38,7 @@ class TestFastMathOption(CUDATestCase):
|
|
40
38
|
fastptx, _ = compile_ptx_for_current_device(
|
41
39
|
pyfunc, sig, device=device, fastmath=True
|
42
40
|
)
|
43
|
-
precptx, _ = compile_ptx_for_current_device(
|
44
|
-
pyfunc, sig, device=device
|
45
|
-
)
|
41
|
+
precptx, _ = compile_ptx_for_current_device(pyfunc, sig, device=device)
|
46
42
|
|
47
43
|
criterion.check(self, fastptx, precptx)
|
48
44
|
|
@@ -69,7 +65,9 @@ class TestFastMathOption(CUDATestCase):
|
|
69
65
|
|
70
66
|
self._test_fast_math_common(
|
71
67
|
kernel,
|
72
|
-
(float32[::1], float32, float32),
|
68
|
+
(float32[::1], float32, float32),
|
69
|
+
device=False,
|
70
|
+
criterion=criterion,
|
73
71
|
)
|
74
72
|
self._test_fast_math_common(
|
75
73
|
device, (float32, float32), device=True, criterion=criterion
|
@@ -79,39 +77,41 @@ class TestFastMathOption(CUDATestCase):
|
|
79
77
|
self._test_fast_math_unary(
|
80
78
|
cos,
|
81
79
|
FastMathCriterion(
|
82
|
-
fast_expected=[
|
83
|
-
prec_unexpected=[
|
84
|
-
)
|
80
|
+
fast_expected=["cos.approx.ftz.f32 "],
|
81
|
+
prec_unexpected=["cos.approx.ftz.f32 "],
|
82
|
+
),
|
85
83
|
)
|
86
84
|
|
87
85
|
def test_sinf(self):
|
88
86
|
self._test_fast_math_unary(
|
89
87
|
sin,
|
90
88
|
FastMathCriterion(
|
91
|
-
fast_expected=[
|
92
|
-
prec_unexpected=[
|
93
|
-
)
|
89
|
+
fast_expected=["sin.approx.ftz.f32 "],
|
90
|
+
prec_unexpected=["sin.approx.ftz.f32 "],
|
91
|
+
),
|
94
92
|
)
|
95
93
|
|
96
94
|
def test_tanf(self):
|
97
95
|
self._test_fast_math_unary(
|
98
96
|
tan,
|
99
|
-
FastMathCriterion(
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
97
|
+
FastMathCriterion(
|
98
|
+
fast_expected=[
|
99
|
+
"sin.approx.ftz.f32 ",
|
100
|
+
"cos.approx.ftz.f32 ",
|
101
|
+
"div.approx.ftz.f32 ",
|
102
|
+
],
|
103
|
+
prec_unexpected=["sin.approx.ftz.f32 "],
|
104
|
+
),
|
104
105
|
)
|
105
106
|
|
106
107
|
@skip_unless_cc_75
|
107
108
|
def test_tanhf(self):
|
108
|
-
|
109
109
|
self._test_fast_math_unary(
|
110
110
|
tanh,
|
111
111
|
FastMathCriterion(
|
112
|
-
fast_expected=[
|
113
|
-
prec_unexpected=[
|
114
|
-
)
|
112
|
+
fast_expected=["tanh.approx.f32 "],
|
113
|
+
prec_unexpected=["tanh.approx.f32 "],
|
114
|
+
),
|
115
115
|
)
|
116
116
|
|
117
117
|
def test_tanhf_compile_ptx(self):
|
@@ -119,74 +119,85 @@ class TestFastMathOption(CUDATestCase):
|
|
119
119
|
r[0] = tanh(x)
|
120
120
|
|
121
121
|
def tanh_common_test(cc, criterion):
|
122
|
-
fastptx, _ = compile_ptx(
|
123
|
-
|
124
|
-
|
125
|
-
|
122
|
+
fastptx, _ = compile_ptx(
|
123
|
+
tanh_kernel, (float32[::1], float32), fastmath=True, cc=cc
|
124
|
+
)
|
125
|
+
precptx, _ = compile_ptx(
|
126
|
+
tanh_kernel, (float32[::1], float32), cc=cc
|
127
|
+
)
|
126
128
|
criterion.check(self, fastptx, precptx)
|
127
129
|
|
128
|
-
tanh_common_test(
|
129
|
-
|
130
|
-
|
131
|
-
|
130
|
+
tanh_common_test(
|
131
|
+
cc=(7, 5),
|
132
|
+
criterion=FastMathCriterion(
|
133
|
+
fast_expected=["tanh.approx.f32 "],
|
134
|
+
prec_unexpected=["tanh.approx.f32 "],
|
135
|
+
),
|
136
|
+
)
|
132
137
|
|
133
|
-
tanh_common_test(
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
+
tanh_common_test(
|
139
|
+
cc=(7, 0),
|
140
|
+
criterion=FastMathCriterion(
|
141
|
+
fast_expected=["ex2.approx.ftz.f32 ", "rcp.approx.ftz.f32 "],
|
142
|
+
prec_unexpected=["tanh.approx.f32 "],
|
143
|
+
),
|
144
|
+
)
|
138
145
|
|
139
146
|
def test_expf(self):
|
140
147
|
self._test_fast_math_unary(
|
141
148
|
exp,
|
142
149
|
FastMathCriterion(
|
143
|
-
fast_unexpected=[
|
144
|
-
|
145
|
-
)
|
150
|
+
fast_unexpected=["fma.rn.f32 "], prec_expected=["fma.rn.f32 "]
|
151
|
+
),
|
146
152
|
)
|
147
153
|
|
148
154
|
def test_logf(self):
|
149
155
|
# Look for constant used to convert from log base 2 to log base e
|
150
156
|
self._test_fast_math_unary(
|
151
|
-
log,
|
152
|
-
|
153
|
-
|
154
|
-
|
157
|
+
log,
|
158
|
+
FastMathCriterion(
|
159
|
+
fast_expected=["lg2.approx.ftz.f32 ", "0f3F317218"],
|
160
|
+
prec_unexpected=["lg2.approx.ftz.f32 "],
|
161
|
+
),
|
155
162
|
)
|
156
163
|
|
157
164
|
def test_log10f(self):
|
158
165
|
# Look for constant used to convert from log base 2 to log base 10
|
159
166
|
self._test_fast_math_unary(
|
160
|
-
log10,
|
161
|
-
|
162
|
-
|
163
|
-
|
167
|
+
log10,
|
168
|
+
FastMathCriterion(
|
169
|
+
fast_expected=["lg2.approx.ftz.f32 ", "0f3E9A209B"],
|
170
|
+
prec_unexpected=["lg2.approx.ftz.f32 "],
|
171
|
+
),
|
164
172
|
)
|
165
173
|
|
166
174
|
def test_log2f(self):
|
167
175
|
self._test_fast_math_unary(
|
168
|
-
log2,
|
169
|
-
|
170
|
-
|
171
|
-
|
176
|
+
log2,
|
177
|
+
FastMathCriterion(
|
178
|
+
fast_expected=["lg2.approx.ftz.f32 "],
|
179
|
+
prec_unexpected=["lg2.approx.ftz.f32 "],
|
180
|
+
),
|
172
181
|
)
|
173
182
|
|
174
183
|
def test_powf(self):
|
175
184
|
self._test_fast_math_binary(
|
176
|
-
pow,
|
177
|
-
|
178
|
-
|
179
|
-
|
185
|
+
pow,
|
186
|
+
FastMathCriterion(
|
187
|
+
fast_expected=["lg2.approx.ftz.f32 "],
|
188
|
+
prec_unexpected=["lg2.approx.ftz.f32 "],
|
189
|
+
),
|
180
190
|
)
|
181
191
|
|
182
192
|
def test_divf(self):
|
183
193
|
self._test_fast_math_binary(
|
184
|
-
truediv,
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
194
|
+
truediv,
|
195
|
+
FastMathCriterion(
|
196
|
+
fast_expected=["div.approx.ftz.f32 "],
|
197
|
+
fast_unexpected=["div.rn.f32"],
|
198
|
+
prec_expected=["div.rn.f32"],
|
199
|
+
prec_unexpected=["div.approx.ftz.f32 "],
|
200
|
+
),
|
190
201
|
)
|
191
202
|
|
192
203
|
def test_divf_exception(self):
|
@@ -232,13 +243,13 @@ class TestFastMathOption(CUDATestCase):
|
|
232
243
|
# https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-div
|
233
244
|
|
234
245
|
# The fast version should use the "fast, approximate divide" variant
|
235
|
-
self.assertIn(
|
246
|
+
self.assertIn("div.approx.f32", fastver.inspect_asm(sig))
|
236
247
|
# The precise version should use the "IEEE 754 compliant rounding"
|
237
248
|
# variant, and neither of the "approximate divide" variants.
|
238
|
-
self.assertIn(
|
239
|
-
self.assertNotIn(
|
240
|
-
self.assertNotIn(
|
249
|
+
self.assertIn("div.rn.f32", precver.inspect_asm(sig))
|
250
|
+
self.assertNotIn("div.approx.f32", precver.inspect_asm(sig))
|
251
|
+
self.assertNotIn("div.full.f32", precver.inspect_asm(sig))
|
241
252
|
|
242
253
|
|
243
|
-
if __name__ ==
|
254
|
+
if __name__ == "__main__":
|
244
255
|
unittest.main()
|
@@ -44,9 +44,11 @@ class TestForAll(CUDATestCase):
|
|
44
44
|
# negative element count.
|
45
45
|
with self.assertRaises(ValueError) as raises:
|
46
46
|
foo.forall(-1)
|
47
|
-
self.assertIn(
|
48
|
-
|
47
|
+
self.assertIn(
|
48
|
+
"Can't create ForAll with negative task count",
|
49
|
+
str(raises.exception),
|
50
|
+
)
|
49
51
|
|
50
52
|
|
51
|
-
if __name__ ==
|
53
|
+
if __name__ == "__main__":
|
52
54
|
unittest.main()
|
@@ -17,13 +17,15 @@ class TestFreeVar(CUDATestCase):
|
|
17
17
|
@cuda.jit("(float32[::1], intp)")
|
18
18
|
def foo(A, i):
|
19
19
|
"Dummy function"
|
20
|
-
sdata = cuda.shared.array(
|
21
|
-
|
20
|
+
sdata = cuda.shared.array(
|
21
|
+
size, # size is freevar
|
22
|
+
dtype=nbtype,
|
23
|
+
) # nbtype is freevar
|
22
24
|
A[i] = sdata[i]
|
23
25
|
|
24
26
|
A = np.arange(2, dtype="float32")
|
25
27
|
foo[1, 1](A, 0)
|
26
28
|
|
27
29
|
|
28
|
-
if __name__ ==
|
30
|
+
if __name__ == "__main__":
|
29
31
|
unittest.main()
|
@@ -29,8 +29,7 @@ def coop_smem2d(ary):
|
|
29
29
|
|
30
30
|
class TestCudaTestGlobal(CUDATestCase):
|
31
31
|
def test_global_int_const(self):
|
32
|
-
"""Test simple_smem
|
33
|
-
"""
|
32
|
+
"""Test simple_smem"""
|
34
33
|
compiled = cuda.jit("void(int32[:])")(simple_smem)
|
35
34
|
|
36
35
|
nelem = 100
|
@@ -41,8 +40,7 @@ class TestCudaTestGlobal(CUDATestCase):
|
|
41
40
|
|
42
41
|
@unittest.SkipTest
|
43
42
|
def test_global_tuple_const(self):
|
44
|
-
"""Test coop_smem2d
|
45
|
-
"""
|
43
|
+
"""Test coop_smem2d"""
|
46
44
|
compiled = cuda.jit("void(float32[:,:])")(coop_smem2d)
|
47
45
|
|
48
46
|
shape = 10, 20
|
@@ -56,5 +54,5 @@ class TestCudaTestGlobal(CUDATestCase):
|
|
56
54
|
self.assertTrue(np.allclose(ary, exp))
|
57
55
|
|
58
56
|
|
59
|
-
if __name__ ==
|
57
|
+
if __name__ == "__main__":
|
60
58
|
unittest.main()
|