numba-cuda 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +232 -113
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_fp16.h +661 -661
- numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
- numba_cuda/numba/cuda/cuda_paths.py +291 -99
- numba_cuda/numba/cuda/cudadecl.py +125 -69
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +463 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +317 -233
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +8 -6
- numba_cuda/numba/cuda/decorators.py +75 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +69 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +1 -1
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
- numba_cuda/numba/cuda/intrinsics.py +31 -27
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +139 -102
- numba_cuda/numba/cuda/target.py +64 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +2 -2
- numba_cuda/numba/cuda/vectorizers.py +37 -32
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
- numba_cuda-0.9.0.dist-info/RECORD +253 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.0.dist-info/RECORD +0 -251
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -6,8 +6,12 @@ from numba import cuda, int64
|
|
6
6
|
from numba.cuda import compile_ptx
|
7
7
|
from numba.core.errors import TypingError
|
8
8
|
from numba.core.types import f2
|
9
|
-
from numba.cuda.testing import (
|
10
|
-
|
9
|
+
from numba.cuda.testing import (
|
10
|
+
unittest,
|
11
|
+
CUDATestCase,
|
12
|
+
skip_on_cudasim,
|
13
|
+
skip_unless_cc_53,
|
14
|
+
)
|
11
15
|
|
12
16
|
|
13
17
|
def simple_threadidx(ary):
|
@@ -260,7 +264,6 @@ def simple_hsqrt(r, x):
|
|
260
264
|
|
261
265
|
|
262
266
|
def simple_hrsqrt(r, x):
|
263
|
-
|
264
267
|
i = cuda.grid(1)
|
265
268
|
|
266
269
|
if i < len(r):
|
@@ -268,7 +271,7 @@ def simple_hrsqrt(r, x):
|
|
268
271
|
|
269
272
|
|
270
273
|
def numpy_hrsqrt(x, dtype):
|
271
|
-
return x
|
274
|
+
return x**-0.5
|
272
275
|
|
273
276
|
|
274
277
|
def simple_hceil(r, x):
|
@@ -404,15 +407,15 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
404
407
|
f_res = f_contigous()
|
405
408
|
self.assertTrue(np.all(c_res == f_res))
|
406
409
|
|
407
|
-
@skip_on_cudasim(
|
410
|
+
@skip_on_cudasim("Cudasim does not check types")
|
408
411
|
def test_nonliteral_grid_error(self):
|
409
|
-
with self.assertRaisesRegex(TypingError,
|
410
|
-
cuda.jit(
|
412
|
+
with self.assertRaisesRegex(TypingError, "RequireLiteralValue"):
|
413
|
+
cuda.jit("void(int32)")(nonliteral_grid)
|
411
414
|
|
412
|
-
@skip_on_cudasim(
|
415
|
+
@skip_on_cudasim("Cudasim does not check types")
|
413
416
|
def test_nonliteral_gridsize_error(self):
|
414
|
-
with self.assertRaisesRegex(TypingError,
|
415
|
-
cuda.jit(
|
417
|
+
with self.assertRaisesRegex(TypingError, "RequireLiteralValue"):
|
418
|
+
cuda.jit("void(int32)")(nonliteral_gridsize)
|
416
419
|
|
417
420
|
def test_simple_grid1d(self):
|
418
421
|
compiled = cuda.jit("void(int32[::1])")(simple_grid1d)
|
@@ -444,7 +447,7 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
444
447
|
compiled[nctaid, ntid](ary)
|
445
448
|
self.assertEqual(ary[0], nctaid * ntid)
|
446
449
|
|
447
|
-
@skip_on_cudasim(
|
450
|
+
@skip_on_cudasim("Requires too many threads")
|
448
451
|
def test_issue_9229(self):
|
449
452
|
# Ensure that grid and grid size are correct - #9229 showed that they
|
450
453
|
# overflowed an int32.
|
@@ -469,7 +472,7 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
469
472
|
self.assertEqual(grid_error[0], 0)
|
470
473
|
self.assertEqual(gridsize_error[0], 0)
|
471
474
|
|
472
|
-
@skip_on_cudasim(
|
475
|
+
@skip_on_cudasim("Tests PTX emission")
|
473
476
|
def test_selp(self):
|
474
477
|
sig = (int64[:], int64, int64[:])
|
475
478
|
cu_branching_with_ifs = cuda.jit(sig)(branching_with_ifs)
|
@@ -485,14 +488,14 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
485
488
|
a = np.arange(n, dtype=np.int64)
|
486
489
|
cu_branching_with_ifs[n, 1](a, b, c)
|
487
490
|
ptx = cu_branching_with_ifs.inspect_asm(sig)
|
488
|
-
self.assertEqual(2, len(re.findall(r
|
489
|
-
np.testing.assert_array_equal(a, expected, err_msg=
|
491
|
+
self.assertEqual(2, len(re.findall(r"\s+bra\s+", ptx)))
|
492
|
+
np.testing.assert_array_equal(a, expected, err_msg="branching")
|
490
493
|
|
491
494
|
a = np.arange(n, dtype=np.int64)
|
492
495
|
cu_branching_with_selps[n, 1](a, b, c)
|
493
496
|
ptx = cu_branching_with_selps.inspect_asm(sig)
|
494
|
-
self.assertEqual(0, len(re.findall(r
|
495
|
-
np.testing.assert_array_equal(a, expected, err_msg=
|
497
|
+
self.assertEqual(0, len(re.findall(r"\s+bra\s+", ptx)))
|
498
|
+
np.testing.assert_array_equal(a, expected, err_msg="selp")
|
496
499
|
|
497
500
|
def test_simple_gridsize2d(self):
|
498
501
|
compiled = cuda.jit("void(int32[::1])")(simple_gridsize2d)
|
@@ -528,10 +531,10 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
528
531
|
a, b, c = cuda.gridsize(3)
|
529
532
|
out[x, y, z] = a * b * c
|
530
533
|
|
531
|
-
arr = np.zeros(9
|
534
|
+
arr = np.zeros(9**3, dtype=np.int32).reshape(9, 9, 9)
|
532
535
|
foo[(3, 3, 3), (3, 3, 3)](arr)
|
533
536
|
|
534
|
-
np.testing.assert_equal(arr, 9
|
537
|
+
np.testing.assert_equal(arr, 9**3)
|
535
538
|
|
536
539
|
def test_3dgrid_2(self):
|
537
540
|
@cuda.jit
|
@@ -539,13 +542,15 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
539
542
|
x, y, z = cuda.grid(3)
|
540
543
|
a, b, c = cuda.gridsize(3)
|
541
544
|
grid_is_right = (
|
542
|
-
x == cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x
|
543
|
-
y == cuda.threadIdx.y + cuda.blockIdx.y * cuda.blockDim.y
|
544
|
-
z == cuda.threadIdx.z + cuda.blockIdx.z * cuda.blockDim.z
|
545
|
+
x == cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x
|
546
|
+
and y == cuda.threadIdx.y + cuda.blockIdx.y * cuda.blockDim.y
|
547
|
+
and z == cuda.threadIdx.z + cuda.blockIdx.z * cuda.blockDim.z
|
548
|
+
)
|
549
|
+
gridsize_is_right = (
|
550
|
+
a == cuda.blockDim.x * cuda.gridDim.x
|
551
|
+
and b == cuda.blockDim.y * cuda.gridDim.y
|
552
|
+
and c == cuda.blockDim.z * cuda.gridDim.z
|
545
553
|
)
|
546
|
-
gridsize_is_right = (a == cuda.blockDim.x * cuda.gridDim.x and
|
547
|
-
b == cuda.blockDim.y * cuda.gridDim.y and
|
548
|
-
c == cuda.blockDim.z * cuda.gridDim.z)
|
549
554
|
out[x, y, z] = grid_is_right and gridsize_is_right
|
550
555
|
|
551
556
|
x, y, z = (4 * 3, 3 * 2, 2 * 4)
|
@@ -605,21 +610,21 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
605
610
|
def test_fma_f4(self):
|
606
611
|
compiled = cuda.jit("void(f4[:], f4, f4, f4)")(simple_fma)
|
607
612
|
ary = np.zeros(1, dtype=np.float32)
|
608
|
-
compiled[1, 1](ary, 2
|
613
|
+
compiled[1, 1](ary, 2.0, 3.0, 4.0)
|
609
614
|
np.testing.assert_allclose(ary[0], 2 * 3 + 4)
|
610
615
|
|
611
616
|
def test_fma_f8(self):
|
612
617
|
compiled = cuda.jit("void(f8[:], f8, f8, f8)")(simple_fma)
|
613
618
|
ary = np.zeros(1, dtype=np.float64)
|
614
|
-
compiled[1, 1](ary, 2
|
619
|
+
compiled[1, 1](ary, 2.0, 3.0, 4.0)
|
615
620
|
np.testing.assert_allclose(ary[0], 2 * 3 + 4)
|
616
621
|
|
617
622
|
@skip_unless_cc_53
|
618
623
|
def test_hadd(self):
|
619
624
|
compiled = cuda.jit("void(f2[:], f2[:], f2[:])")(simple_hadd)
|
620
625
|
ary = np.zeros(1, dtype=np.float16)
|
621
|
-
arg1 = np.array([3.], dtype=np.float16)
|
622
|
-
arg2 = np.array([4.], dtype=np.float16)
|
626
|
+
arg1 = np.array([3.0], dtype=np.float16)
|
627
|
+
arg2 = np.array([4.0], dtype=np.float16)
|
623
628
|
compiled[1, 1](ary, arg1, arg2)
|
624
629
|
np.testing.assert_allclose(ary[0], arg1 + arg2)
|
625
630
|
|
@@ -628,24 +633,24 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
628
633
|
compiled = cuda.jit("void(f2[:], f2, f2)")(simple_hadd_scalar)
|
629
634
|
ary = np.zeros(1, dtype=np.float16)
|
630
635
|
arg1 = np.float16(3.1415926)
|
631
|
-
arg2 = np.float16(3.)
|
636
|
+
arg2 = np.float16(3.0)
|
632
637
|
compiled[1, 1](ary, arg1, arg2)
|
633
638
|
ref = arg1 + arg2
|
634
639
|
np.testing.assert_allclose(ary[0], ref)
|
635
640
|
|
636
|
-
@skip_on_cudasim(
|
641
|
+
@skip_on_cudasim("Compilation unsupported in the simulator")
|
637
642
|
def test_hadd_ptx(self):
|
638
643
|
args = (f2[:], f2, f2)
|
639
644
|
ptx, _ = compile_ptx(simple_hadd_scalar, args, cc=(5, 3))
|
640
|
-
self.assertIn(
|
645
|
+
self.assertIn("add.f16", ptx)
|
641
646
|
|
642
647
|
@skip_unless_cc_53
|
643
648
|
def test_hfma(self):
|
644
649
|
compiled = cuda.jit("void(f2[:], f2[:], f2[:], f2[:])")(simple_hfma)
|
645
650
|
ary = np.zeros(1, dtype=np.float16)
|
646
|
-
arg1 = np.array([2.], dtype=np.float16)
|
647
|
-
arg2 = np.array([3.], dtype=np.float16)
|
648
|
-
arg3 = np.array([4.], dtype=np.float16)
|
651
|
+
arg1 = np.array([2.0], dtype=np.float16)
|
652
|
+
arg2 = np.array([3.0], dtype=np.float16)
|
653
|
+
arg3 = np.array([4.0], dtype=np.float16)
|
649
654
|
compiled[1, 1](ary, arg1, arg2, arg3)
|
650
655
|
np.testing.assert_allclose(ary[0], arg1 * arg2 + arg3)
|
651
656
|
|
@@ -653,25 +658,25 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
653
658
|
def test_hfma_scalar(self):
|
654
659
|
compiled = cuda.jit("void(f2[:], f2, f2, f2)")(simple_hfma_scalar)
|
655
660
|
ary = np.zeros(1, dtype=np.float16)
|
656
|
-
arg1 = np.float16(2.)
|
657
|
-
arg2 = np.float16(3.)
|
658
|
-
arg3 = np.float16(4.)
|
661
|
+
arg1 = np.float16(2.0)
|
662
|
+
arg2 = np.float16(3.0)
|
663
|
+
arg3 = np.float16(4.0)
|
659
664
|
compiled[1, 1](ary, arg1, arg2, arg3)
|
660
665
|
ref = arg1 * arg2 + arg3
|
661
666
|
np.testing.assert_allclose(ary[0], ref)
|
662
667
|
|
663
|
-
@skip_on_cudasim(
|
668
|
+
@skip_on_cudasim("Compilation unsupported in the simulator")
|
664
669
|
def test_hfma_ptx(self):
|
665
670
|
args = (f2[:], f2, f2, f2)
|
666
671
|
ptx, _ = compile_ptx(simple_hfma_scalar, args, cc=(5, 3))
|
667
|
-
self.assertIn(
|
672
|
+
self.assertIn("fma.rn.f16", ptx)
|
668
673
|
|
669
674
|
@skip_unless_cc_53
|
670
675
|
def test_hsub(self):
|
671
676
|
compiled = cuda.jit("void(f2[:], f2[:], f2[:])")(simple_hsub)
|
672
677
|
ary = np.zeros(1, dtype=np.float16)
|
673
|
-
arg1 = np.array([3.], dtype=np.float16)
|
674
|
-
arg2 = np.array([4.], dtype=np.float16)
|
678
|
+
arg1 = np.array([3.0], dtype=np.float16)
|
679
|
+
arg2 = np.array([4.0], dtype=np.float16)
|
675
680
|
compiled[1, 1](ary, arg1, arg2)
|
676
681
|
np.testing.assert_allclose(ary[0], arg1 - arg2)
|
677
682
|
|
@@ -685,18 +690,18 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
685
690
|
ref = arg1 - arg2
|
686
691
|
np.testing.assert_allclose(ary[0], ref)
|
687
692
|
|
688
|
-
@skip_on_cudasim(
|
693
|
+
@skip_on_cudasim("Compilation unsupported in the simulator")
|
689
694
|
def test_hsub_ptx(self):
|
690
695
|
args = (f2[:], f2, f2)
|
691
696
|
ptx, _ = compile_ptx(simple_hsub_scalar, args, cc=(5, 3))
|
692
|
-
self.assertIn(
|
697
|
+
self.assertIn("sub.f16", ptx)
|
693
698
|
|
694
699
|
@skip_unless_cc_53
|
695
700
|
def test_hmul(self):
|
696
701
|
compiled = cuda.jit()(simple_hmul)
|
697
702
|
ary = np.zeros(1, dtype=np.float16)
|
698
|
-
arg1 = np.array([3.], dtype=np.float16)
|
699
|
-
arg2 = np.array([4.], dtype=np.float16)
|
703
|
+
arg1 = np.array([3.0], dtype=np.float16)
|
704
|
+
arg2 = np.array([4.0], dtype=np.float16)
|
700
705
|
compiled[1, 1](ary, arg1, arg2)
|
701
706
|
np.testing.assert_allclose(ary[0], arg1 * arg2)
|
702
707
|
|
@@ -710,11 +715,11 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
710
715
|
ref = arg1 * arg2
|
711
716
|
np.testing.assert_allclose(ary[0], ref)
|
712
717
|
|
713
|
-
@skip_on_cudasim(
|
718
|
+
@skip_on_cudasim("Compilation unsupported in the simulator")
|
714
719
|
def test_hmul_ptx(self):
|
715
720
|
args = (f2[:], f2, f2)
|
716
721
|
ptx, _ = compile_ptx(simple_hmul_scalar, args, cc=(5, 3))
|
717
|
-
self.assertIn(
|
722
|
+
self.assertIn("mul.f16", ptx)
|
718
723
|
|
719
724
|
@skip_unless_cc_53
|
720
725
|
def test_hdiv_scalar(self):
|
@@ -742,7 +747,7 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
742
747
|
def test_hneg(self):
|
743
748
|
compiled = cuda.jit("void(f2[:], f2[:])")(simple_hneg)
|
744
749
|
ary = np.zeros(1, dtype=np.float16)
|
745
|
-
arg1 = np.array([3.], dtype=np.float16)
|
750
|
+
arg1 = np.array([3.0], dtype=np.float16)
|
746
751
|
compiled[1, 1](ary, arg1)
|
747
752
|
np.testing.assert_allclose(ary[0], -arg1)
|
748
753
|
|
@@ -755,17 +760,17 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
755
760
|
ref = -arg1
|
756
761
|
np.testing.assert_allclose(ary[0], ref)
|
757
762
|
|
758
|
-
@skip_on_cudasim(
|
763
|
+
@skip_on_cudasim("Compilation unsupported in the simulator")
|
759
764
|
def test_hneg_ptx(self):
|
760
765
|
args = (f2[:], f2)
|
761
766
|
ptx, _ = compile_ptx(simple_hneg_scalar, args, cc=(5, 3))
|
762
|
-
self.assertIn(
|
767
|
+
self.assertIn("neg.f16", ptx)
|
763
768
|
|
764
769
|
@skip_unless_cc_53
|
765
770
|
def test_habs(self):
|
766
771
|
compiled = cuda.jit()(simple_habs)
|
767
772
|
ary = np.zeros(1, dtype=np.float16)
|
768
|
-
arg1 = np.array([-3.], dtype=np.float16)
|
773
|
+
arg1 = np.array([-3.0], dtype=np.float16)
|
769
774
|
compiled[1, 1](ary, arg1)
|
770
775
|
np.testing.assert_allclose(ary[0], abs(arg1))
|
771
776
|
|
@@ -778,25 +783,43 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
778
783
|
ref = abs(arg1)
|
779
784
|
np.testing.assert_allclose(ary[0], ref)
|
780
785
|
|
781
|
-
@skip_on_cudasim(
|
786
|
+
@skip_on_cudasim("Compilation unsupported in the simulator")
|
782
787
|
def test_habs_ptx(self):
|
783
788
|
args = (f2[:], f2)
|
784
789
|
ptx, _ = compile_ptx(simple_habs_scalar, args, cc=(5, 3))
|
785
|
-
self.assertIn(
|
790
|
+
self.assertIn("abs.f16", ptx)
|
786
791
|
|
787
792
|
@skip_unless_cc_53
|
788
793
|
def test_fp16_intrinsics_common(self):
|
789
|
-
kernels = (
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
+
kernels = (
|
795
|
+
simple_hsin,
|
796
|
+
simple_hcos,
|
797
|
+
simple_hlog,
|
798
|
+
simple_hlog2,
|
799
|
+
simple_hlog10,
|
800
|
+
simple_hsqrt,
|
801
|
+
simple_hceil,
|
802
|
+
simple_hfloor,
|
803
|
+
simple_hrcp,
|
804
|
+
simple_htrunc,
|
805
|
+
simple_hrint,
|
806
|
+
simple_hrsqrt,
|
807
|
+
)
|
794
808
|
exp_kernels = (simple_hexp, simple_hexp2)
|
795
|
-
expected_functions = (
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
809
|
+
expected_functions = (
|
810
|
+
np.sin,
|
811
|
+
np.cos,
|
812
|
+
np.log,
|
813
|
+
np.log2,
|
814
|
+
np.log10,
|
815
|
+
np.sqrt,
|
816
|
+
np.ceil,
|
817
|
+
np.floor,
|
818
|
+
np.reciprocal,
|
819
|
+
np.trunc,
|
820
|
+
np.rint,
|
821
|
+
numpy_hrsqrt,
|
822
|
+
)
|
800
823
|
expected_exp_functions = (np.exp, np.exp2)
|
801
824
|
|
802
825
|
# Generate random data
|
@@ -807,7 +830,7 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
807
830
|
for kernel, fn in zip(kernels, expected_functions):
|
808
831
|
with self.subTest(fn=fn):
|
809
832
|
kernel = cuda.jit("void(f2[:], f2[:])")(kernel)
|
810
|
-
kernel[1,N](r, x)
|
833
|
+
kernel[1, N](r, x)
|
811
834
|
expected = fn(x, dtype=np.float16)
|
812
835
|
np.testing.assert_allclose(r, expected)
|
813
836
|
|
@@ -815,7 +838,7 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
815
838
|
for kernel, fn in zip(exp_kernels, expected_exp_functions):
|
816
839
|
with self.subTest(fn=fn):
|
817
840
|
kernel = cuda.jit("void(f2[:], f2[:])")(kernel)
|
818
|
-
kernel[1,N](r, x2)
|
841
|
+
kernel[1, N](r, x2)
|
819
842
|
expected = fn(x2, dtype=np.float16)
|
820
843
|
np.testing.assert_allclose(r, expected)
|
821
844
|
|
@@ -836,14 +859,26 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
836
859
|
|
837
860
|
# Run the kernel
|
838
861
|
hexp10_vectors[1, N](r, x)
|
839
|
-
np.testing.assert_allclose(r, 10
|
862
|
+
np.testing.assert_allclose(r, 10**x)
|
840
863
|
|
841
864
|
@skip_unless_cc_53
|
842
865
|
def test_fp16_comparison(self):
|
843
|
-
fns = (
|
844
|
-
|
845
|
-
|
846
|
-
|
866
|
+
fns = (
|
867
|
+
simple_heq_scalar,
|
868
|
+
simple_hne_scalar,
|
869
|
+
simple_hge_scalar,
|
870
|
+
simple_hgt_scalar,
|
871
|
+
simple_hle_scalar,
|
872
|
+
simple_hlt_scalar,
|
873
|
+
)
|
874
|
+
ops = (
|
875
|
+
operator.eq,
|
876
|
+
operator.ne,
|
877
|
+
operator.ge,
|
878
|
+
operator.gt,
|
879
|
+
operator.le,
|
880
|
+
operator.lt,
|
881
|
+
)
|
847
882
|
|
848
883
|
for fn, op in zip(fns, ops):
|
849
884
|
with self.subTest(op=op):
|
@@ -872,18 +907,20 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
872
907
|
|
873
908
|
@skip_unless_cc_53
|
874
909
|
def test_multiple_float16_comparisons(self):
|
875
|
-
functions = (
|
876
|
-
|
877
|
-
|
878
|
-
|
879
|
-
|
910
|
+
functions = (
|
911
|
+
test_multiple_hcmp_1,
|
912
|
+
test_multiple_hcmp_2,
|
913
|
+
test_multiple_hcmp_3,
|
914
|
+
test_multiple_hcmp_4,
|
915
|
+
test_multiple_hcmp_5,
|
916
|
+
)
|
880
917
|
for fn in functions:
|
881
918
|
with self.subTest(fn=fn):
|
882
919
|
compiled = cuda.jit("void(b1[:], f2, f2, f2)")(fn)
|
883
920
|
ary = np.zeros(1, dtype=np.bool_)
|
884
|
-
arg1 = np.float16(2.)
|
885
|
-
arg2 = np.float16(3.)
|
886
|
-
arg3 = np.float16(4.)
|
921
|
+
arg1 = np.float16(2.0)
|
922
|
+
arg2 = np.float16(3.0)
|
923
|
+
arg3 = np.float16(4.0)
|
887
924
|
compiled[1, 1](ary, arg1, arg2, arg3)
|
888
925
|
self.assertTrue(ary[0])
|
889
926
|
|
@@ -891,11 +928,11 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
891
928
|
def test_hmax(self):
|
892
929
|
compiled = cuda.jit("void(f2[:], f2, f2)")(simple_hmax_scalar)
|
893
930
|
ary = np.zeros(1, dtype=np.float16)
|
894
|
-
arg1 = np.float16(3.)
|
895
|
-
arg2 = np.float16(4.)
|
931
|
+
arg1 = np.float16(3.0)
|
932
|
+
arg2 = np.float16(4.0)
|
896
933
|
compiled[1, 1](ary, arg1, arg2)
|
897
934
|
np.testing.assert_allclose(ary[0], arg2)
|
898
|
-
arg1 = np.float16(5.)
|
935
|
+
arg1 = np.float16(5.0)
|
899
936
|
compiled[1, 1](ary, arg1, arg2)
|
900
937
|
np.testing.assert_allclose(ary[0], arg1)
|
901
938
|
|
@@ -903,25 +940,25 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
903
940
|
def test_hmin(self):
|
904
941
|
compiled = cuda.jit("void(f2[:], f2, f2)")(simple_hmin_scalar)
|
905
942
|
ary = np.zeros(1, dtype=np.float16)
|
906
|
-
arg1 = np.float16(3.)
|
907
|
-
arg2 = np.float16(4.)
|
943
|
+
arg1 = np.float16(3.0)
|
944
|
+
arg2 = np.float16(4.0)
|
908
945
|
compiled[1, 1](ary, arg1, arg2)
|
909
946
|
np.testing.assert_allclose(ary[0], arg1)
|
910
|
-
arg1 = np.float16(5.)
|
947
|
+
arg1 = np.float16(5.0)
|
911
948
|
compiled[1, 1](ary, arg1, arg2)
|
912
949
|
np.testing.assert_allclose(ary[0], arg2)
|
913
950
|
|
914
951
|
def test_cbrt_f32(self):
|
915
952
|
compiled = cuda.jit("void(float32[:], float32)")(simple_cbrt)
|
916
953
|
ary = np.zeros(1, dtype=np.float32)
|
917
|
-
cbrt_arg = 2.
|
954
|
+
cbrt_arg = 2.0
|
918
955
|
compiled[1, 1](ary, cbrt_arg)
|
919
956
|
np.testing.assert_allclose(ary[0], cbrt_arg ** (1 / 3))
|
920
957
|
|
921
958
|
def test_cbrt_f64(self):
|
922
959
|
compiled = cuda.jit("void(float64[:], float64)")(simple_cbrt)
|
923
960
|
ary = np.zeros(1, dtype=np.float64)
|
924
|
-
cbrt_arg = 6.
|
961
|
+
cbrt_arg = 6.0
|
925
962
|
compiled[1, 1](ary, cbrt_arg)
|
926
963
|
np.testing.assert_allclose(ary[0], cbrt_arg ** (1 / 3))
|
927
964
|
|
@@ -1052,25 +1089,36 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
1052
1089
|
np.concatenate((vals, np.array([np.inf, -np.inf, np.nan])))
|
1053
1090
|
digits = (
|
1054
1091
|
# Common case branch of round_to_impl
|
1055
|
-
-5,
|
1092
|
+
-5,
|
1093
|
+
-4,
|
1094
|
+
-3,
|
1095
|
+
-2,
|
1096
|
+
-1,
|
1097
|
+
0,
|
1098
|
+
1,
|
1099
|
+
2,
|
1100
|
+
3,
|
1101
|
+
4,
|
1102
|
+
5,
|
1056
1103
|
# The algorithm currently implemented can only round to 13 digits
|
1057
1104
|
# with single precision. Note that this doesn't trigger the
|
1058
1105
|
# "overflow safe" branch of the implementation, which can only be
|
1059
1106
|
# hit when using double precision.
|
1060
|
-
13
|
1107
|
+
13,
|
1061
1108
|
)
|
1062
1109
|
for val, ndigits in itertools.product(vals, digits):
|
1063
1110
|
with self.subTest(val=val, ndigits=ndigits):
|
1064
1111
|
compiled[1, 1](ary, val, ndigits)
|
1065
|
-
self.assertPreciseEqual(
|
1066
|
-
|
1112
|
+
self.assertPreciseEqual(
|
1113
|
+
ary[0], round(val, ndigits), prec="single"
|
1114
|
+
)
|
1067
1115
|
|
1068
1116
|
# CPython on most platforms uses rounding based on dtoa.c, whereas the CUDA
|
1069
1117
|
# round-to implementation uses CPython's fallback implementation, which has
|
1070
1118
|
# slightly different behavior at the edges of the domain. Since the CUDA
|
1071
1119
|
# simulator executes using CPython, we need to skip this test when the
|
1072
1120
|
# simulator is active.
|
1073
|
-
@skip_on_cudasim(
|
1121
|
+
@skip_on_cudasim("Overflow behavior differs on CPython")
|
1074
1122
|
def test_round_to_f4_overflow(self):
|
1075
1123
|
# Test that the input value is returned when y in round_ndigits
|
1076
1124
|
# overflows.
|
@@ -1092,7 +1140,7 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
1092
1140
|
val = 0.3425
|
1093
1141
|
ndigits = 3
|
1094
1142
|
compiled[1, 1](ary, val, ndigits)
|
1095
|
-
self.assertPreciseEqual(ary[0], round(val, ndigits), prec=
|
1143
|
+
self.assertPreciseEqual(ary[0], round(val, ndigits), prec="single")
|
1096
1144
|
|
1097
1145
|
def test_round_to_f8(self):
|
1098
1146
|
compiled = cuda.jit("void(float64[:], float64, int32)")(simple_round_to)
|
@@ -1105,19 +1153,19 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
1105
1153
|
for val, ndigits in itertools.product(vals, digits):
|
1106
1154
|
with self.subTest(val=val, ndigits=ndigits):
|
1107
1155
|
compiled[1, 1](ary, val, ndigits)
|
1108
|
-
self.assertPreciseEqual(
|
1109
|
-
|
1156
|
+
self.assertPreciseEqual(
|
1157
|
+
ary[0], round(val, ndigits), prec="exact"
|
1158
|
+
)
|
1110
1159
|
|
1111
1160
|
# Trigger the "overflow safe" branch of the implementation
|
1112
1161
|
val = 0.12345678987654321 * 10e-15
|
1113
1162
|
ndigits = 23
|
1114
1163
|
with self.subTest(val=val, ndigits=ndigits):
|
1115
1164
|
compiled[1, 1](ary, val, ndigits)
|
1116
|
-
self.assertPreciseEqual(ary[0], round(val, ndigits),
|
1117
|
-
prec='double')
|
1165
|
+
self.assertPreciseEqual(ary[0], round(val, ndigits), prec="double")
|
1118
1166
|
|
1119
1167
|
# Skipped on cudasim for the same reasons as test_round_to_f4 above.
|
1120
|
-
@skip_on_cudasim(
|
1168
|
+
@skip_on_cudasim("Overflow behavior differs on CPython")
|
1121
1169
|
def test_round_to_f8_overflow(self):
|
1122
1170
|
# Test that the input value is returned when y in round_ndigits
|
1123
1171
|
# overflows.
|
@@ -1139,8 +1187,8 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
1139
1187
|
val = 0.5425
|
1140
1188
|
ndigits = 3
|
1141
1189
|
compiled[1, 1](ary, val, ndigits)
|
1142
|
-
self.assertPreciseEqual(ary[0], round(val, ndigits), prec=
|
1190
|
+
self.assertPreciseEqual(ary[0], round(val, ndigits), prec="double")
|
1143
1191
|
|
1144
1192
|
|
1145
|
-
if __name__ ==
|
1193
|
+
if __name__ == "__main__":
|
1146
1194
|
unittest.main()
|