numba-cuda 0.8.1__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +232 -113
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_fp16.h +661 -661
- numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
- numba_cuda/numba/cuda/cuda_paths.py +291 -99
- numba_cuda/numba/cuda/cudadecl.py +125 -69
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +317 -233
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +8 -6
- numba_cuda/numba/cuda/decorators.py +75 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +69 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +1 -1
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
- numba_cuda/numba/cuda/intrinsics.py +31 -27
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +139 -102
- numba_cuda/numba/cuda/target.py +64 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +2 -2
- numba_cuda/numba/cuda/vectorizers.py +37 -32
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
- numba_cuda-0.9.0.dist-info/RECORD +253 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.1.dist-info/RECORD +0 -251
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -12,18 +12,17 @@ from numba.tests.enum_usecases import (
|
|
12
12
|
Shape,
|
13
13
|
Planet,
|
14
14
|
RequestError,
|
15
|
-
IntEnumWithNegatives
|
15
|
+
IntEnumWithNegatives,
|
16
16
|
)
|
17
17
|
|
18
18
|
|
19
19
|
class EnumTest(CUDATestCase):
|
20
|
-
|
21
20
|
pairs = [
|
22
21
|
(Color.red, Color.red),
|
23
22
|
(Color.red, Color.green),
|
24
23
|
(Planet.EARTH, Planet.EARTH),
|
25
24
|
(Planet.VENUS, Planet.MARS),
|
26
|
-
(Shape.circle, IntEnumWithNegatives.two)
|
25
|
+
(Shape.circle, IntEnumWithNegatives.two), # IntEnum, same value
|
27
26
|
]
|
28
27
|
|
29
28
|
def test_compare(self):
|
@@ -45,7 +44,7 @@ class EnumTest(CUDATestCase):
|
|
45
44
|
def f(out):
|
46
45
|
# Lookup of an enum member on its class
|
47
46
|
out[0] = Color.red == Color.green
|
48
|
-
out[1] = Color[
|
47
|
+
out[1] = Color["red"] == Color["green"]
|
49
48
|
|
50
49
|
cuda_f = cuda.jit(f)
|
51
50
|
got = np.zeros((2,), dtype=np.bool_)
|
@@ -106,16 +105,16 @@ class EnumTest(CUDATestCase):
|
|
106
105
|
def test_vectorize(self):
|
107
106
|
def f(x):
|
108
107
|
if x != RequestError.not_found:
|
109
|
-
return RequestError[
|
108
|
+
return RequestError["internal_error"]
|
110
109
|
else:
|
111
110
|
return RequestError.dummy
|
112
111
|
|
113
|
-
cuda_func = vectorize("int64(int64)", target=
|
112
|
+
cuda_func = vectorize("int64(int64)", target="cuda")(f)
|
114
113
|
arr = np.array([2, 404, 500, 404], dtype=np.int64)
|
115
114
|
expected = np.array([f(x) for x in arr], dtype=np.int64)
|
116
115
|
got = cuda_func(arr)
|
117
116
|
self.assertPreciseEqual(expected, got)
|
118
117
|
|
119
118
|
|
120
|
-
if __name__ ==
|
119
|
+
if __name__ == "__main__":
|
121
120
|
unittest.main()
|
@@ -17,34 +17,49 @@ class TestJitErrors(CUDATestCase):
|
|
17
17
|
|
18
18
|
with self.assertRaises(ValueError) as raises:
|
19
19
|
kernfunc[(1, 2, 3, 4), (5, 6)]
|
20
|
-
self.assertIn(
|
21
|
-
|
22
|
-
|
20
|
+
self.assertIn(
|
21
|
+
"griddim must be a sequence of 1, 2 or 3 integers, "
|
22
|
+
"got [1, 2, 3, 4]",
|
23
|
+
str(raises.exception),
|
24
|
+
)
|
23
25
|
|
24
26
|
with self.assertRaises(ValueError) as raises:
|
25
|
-
kernfunc[
|
26
|
-
|
27
|
-
|
28
|
-
|
27
|
+
kernfunc[
|
28
|
+
(
|
29
|
+
1,
|
30
|
+
2,
|
31
|
+
),
|
32
|
+
(3, 4, 5, 6),
|
33
|
+
]
|
34
|
+
self.assertIn(
|
35
|
+
"blockdim must be a sequence of 1, 2 or 3 integers, "
|
36
|
+
"got [3, 4, 5, 6]",
|
37
|
+
str(raises.exception),
|
38
|
+
)
|
29
39
|
|
30
40
|
def test_non_integral_dims(self):
|
31
41
|
kernfunc = cuda.jit(noop)
|
32
42
|
|
33
43
|
with self.assertRaises(TypeError) as raises:
|
34
44
|
kernfunc[2.0, 3]
|
35
|
-
self.assertIn(
|
36
|
-
|
45
|
+
self.assertIn(
|
46
|
+
"griddim must be a sequence of integers, got [2.0]",
|
47
|
+
str(raises.exception),
|
48
|
+
)
|
37
49
|
|
38
50
|
with self.assertRaises(TypeError) as raises:
|
39
51
|
kernfunc[2, 3.0]
|
40
|
-
self.assertIn(
|
41
|
-
|
52
|
+
self.assertIn(
|
53
|
+
"blockdim must be a sequence of integers, got [3.0]",
|
54
|
+
str(raises.exception),
|
55
|
+
)
|
42
56
|
|
43
57
|
def _test_unconfigured(self, kernfunc):
|
44
58
|
with self.assertRaises(ValueError) as raises:
|
45
59
|
kernfunc(0)
|
46
|
-
self.assertIn(
|
47
|
-
|
60
|
+
self.assertIn(
|
61
|
+
"launch configuration was not specified", str(raises.exception)
|
62
|
+
)
|
48
63
|
|
49
64
|
def test_unconfigured_typed_cudakernel(self):
|
50
65
|
kernfunc = cuda.jit("void(int32)")(noop)
|
@@ -54,7 +69,7 @@ class TestJitErrors(CUDATestCase):
|
|
54
69
|
kernfunc = cuda.jit(noop)
|
55
70
|
self._test_unconfigured(kernfunc)
|
56
71
|
|
57
|
-
@skip_on_cudasim(
|
72
|
+
@skip_on_cudasim("TypingError does not occur on simulator")
|
58
73
|
def test_typing_error(self):
|
59
74
|
# see #5860, this is present to catch changes to error reporting
|
60
75
|
# accidentally breaking the CUDA target
|
@@ -75,5 +90,5 @@ class TestJitErrors(CUDATestCase):
|
|
75
90
|
self.assertIn("NameError: name 'floor' is not defined", excstr)
|
76
91
|
|
77
92
|
|
78
|
-
if __name__ ==
|
93
|
+
if __name__ == "__main__":
|
79
94
|
unittest.main()
|
@@ -83,20 +83,19 @@ class TestException(CUDATestCase):
|
|
83
83
|
x[i] += x[i] // y[i]
|
84
84
|
|
85
85
|
n = 32
|
86
|
-
got_x = 1. / (np.arange(n) + 0.01)
|
87
|
-
got_y = 1. / (np.arange(n) + 0.01)
|
86
|
+
got_x = 1.0 / (np.arange(n) + 0.01)
|
87
|
+
got_y = 1.0 / (np.arange(n) + 0.01)
|
88
88
|
problematic[1, n](got_x, got_y)
|
89
89
|
|
90
|
-
expect_x = 1. / (np.arange(n) + 0.01)
|
91
|
-
expect_y = 1. / (np.arange(n) + 0.01)
|
90
|
+
expect_x = 1.0 / (np.arange(n) + 0.01)
|
91
|
+
expect_y = 1.0 / (np.arange(n) + 0.01)
|
92
92
|
oracle[1, n](expect_x, expect_y)
|
93
93
|
|
94
94
|
np.testing.assert_almost_equal(expect_x, got_x)
|
95
95
|
np.testing.assert_almost_equal(expect_y, got_y)
|
96
96
|
|
97
97
|
def test_raise_causing_warp_diverge(self):
|
98
|
-
"""Test case for issue #2655.
|
99
|
-
"""
|
98
|
+
"""Test case for issue #2655."""
|
100
99
|
self.case_raise_causing_warp_diverge(with_debug_mode=False)
|
101
100
|
|
102
101
|
# The following two cases relate to Issue #7806: Division by zero stops the
|
@@ -117,8 +116,8 @@ class TestException(CUDATestCase):
|
|
117
116
|
|
118
117
|
f[1, 1](r, x, y)
|
119
118
|
|
120
|
-
self.assertTrue(np.isinf(r[0]),
|
121
|
-
self.assertEqual(r[1], y[0],
|
119
|
+
self.assertTrue(np.isinf(r[0]), "Expected inf from div by zero")
|
120
|
+
self.assertEqual(r[1], y[0], "Expected execution to continue")
|
122
121
|
|
123
122
|
def test_zero_division_error_in_debug(self):
|
124
123
|
# When debug is True:
|
@@ -146,15 +145,15 @@ class TestException(CUDATestCase):
|
|
146
145
|
with self.assertRaises(exc):
|
147
146
|
f[1, 1](r, x, y)
|
148
147
|
|
149
|
-
self.assertEqual(r[0], 0,
|
150
|
-
self.assertEqual(r[1], 0,
|
148
|
+
self.assertEqual(r[0], 0, "Expected result to be left unset")
|
149
|
+
self.assertEqual(r[1], 0, "Expected execution to stop")
|
151
150
|
|
152
151
|
@xfail_unless_cudasim
|
153
152
|
def test_raise_in_device_function(self):
|
154
153
|
# This is an expected failure because reporting of exceptions raised in
|
155
154
|
# device functions does not work correctly - see Issue #8036:
|
156
155
|
# https://github.com/numba/numba/issues/8036
|
157
|
-
msg =
|
156
|
+
msg = "Device Function Error"
|
158
157
|
|
159
158
|
@cuda.jit(device=True)
|
160
159
|
def f():
|
@@ -170,5 +169,5 @@ class TestException(CUDATestCase):
|
|
170
169
|
self.assertIn(msg, str(raises.exception))
|
171
170
|
|
172
171
|
|
173
|
-
if __name__ ==
|
172
|
+
if __name__ == "__main__":
|
174
173
|
unittest.main()
|
@@ -8,12 +8,13 @@ class Interval:
|
|
8
8
|
"""
|
9
9
|
A half-open interval on the real number line.
|
10
10
|
"""
|
11
|
+
|
11
12
|
def __init__(self, lo, hi):
|
12
13
|
self.lo = lo
|
13
14
|
self.hi = hi
|
14
15
|
|
15
16
|
def __repr__(self):
|
16
|
-
return
|
17
|
+
return "Interval(%f, %f)" % (self.lo, self.hi)
|
17
18
|
|
18
19
|
@property
|
19
20
|
def width(self):
|
@@ -32,16 +33,21 @@ def sum_intervals(i, j):
|
|
32
33
|
|
33
34
|
if not config.ENABLE_CUDASIM:
|
34
35
|
from numba.core import cgutils
|
35
|
-
from numba.core.extending import (
|
36
|
-
|
37
|
-
|
36
|
+
from numba.core.extending import (
|
37
|
+
lower_builtin,
|
38
|
+
make_attribute_wrapper,
|
39
|
+
models,
|
40
|
+
register_model,
|
41
|
+
type_callable,
|
42
|
+
typeof_impl,
|
43
|
+
)
|
38
44
|
from numba.core.typing.templates import AttributeTemplate
|
39
45
|
from numba.cuda.cudadecl import registry as cuda_registry
|
40
46
|
from numba.cuda.cudaimpl import lower_attr as cuda_lower_attr
|
41
47
|
|
42
48
|
class IntervalType(types.Type):
|
43
49
|
def __init__(self):
|
44
|
-
super().__init__(name=
|
50
|
+
super().__init__(name="Interval")
|
45
51
|
|
46
52
|
interval_type = IntervalType()
|
47
53
|
|
@@ -54,19 +60,20 @@ if not config.ENABLE_CUDASIM:
|
|
54
60
|
def typer(lo, hi):
|
55
61
|
if isinstance(lo, types.Float) and isinstance(hi, types.Float):
|
56
62
|
return interval_type
|
63
|
+
|
57
64
|
return typer
|
58
65
|
|
59
66
|
@register_model(IntervalType)
|
60
67
|
class IntervalModel(models.StructModel):
|
61
68
|
def __init__(self, dmm, fe_type):
|
62
69
|
members = [
|
63
|
-
(
|
64
|
-
(
|
70
|
+
("lo", types.float64),
|
71
|
+
("hi", types.float64),
|
65
72
|
]
|
66
73
|
models.StructModel.__init__(self, dmm, fe_type, members)
|
67
74
|
|
68
|
-
make_attribute_wrapper(IntervalType,
|
69
|
-
make_attribute_wrapper(IntervalType,
|
75
|
+
make_attribute_wrapper(IntervalType, "lo", "lo")
|
76
|
+
make_attribute_wrapper(IntervalType, "hi", "hi")
|
70
77
|
|
71
78
|
@lower_builtin(Interval, types.Float, types.Float)
|
72
79
|
def impl_interval(context, builder, sig, args):
|
@@ -84,14 +91,14 @@ if not config.ENABLE_CUDASIM:
|
|
84
91
|
def resolve_width(self, mod):
|
85
92
|
return types.float64
|
86
93
|
|
87
|
-
@cuda_lower_attr(IntervalType,
|
94
|
+
@cuda_lower_attr(IntervalType, "width")
|
88
95
|
def cuda_Interval_width(context, builder, sig, arg):
|
89
96
|
lo = builder.extract_value(arg, 0)
|
90
97
|
hi = builder.extract_value(arg, 1)
|
91
98
|
return builder.fsub(hi, lo)
|
92
99
|
|
93
100
|
|
94
|
-
@skip_on_cudasim(
|
101
|
+
@skip_on_cudasim("Extensions not supported in the simulator")
|
95
102
|
class TestExtending(CUDATestCase):
|
96
103
|
def test_attributes(self):
|
97
104
|
@cuda.jit
|
@@ -151,5 +158,5 @@ class TestExtending(CUDATestCase):
|
|
151
158
|
np.testing.assert_allclose(r, expected)
|
152
159
|
|
153
160
|
|
154
|
-
if __name__ ==
|
161
|
+
if __name__ == "__main__":
|
155
162
|
unittest.main()
|
@@ -5,8 +5,7 @@ from numba.cuda.compiler import compile_ptx_for_current_device, compile_ptx
|
|
5
5
|
from math import cos, sin, tan, exp, log, log10, log2, pow, tanh
|
6
6
|
from operator import truediv
|
7
7
|
import numpy as np
|
8
|
-
from numba.cuda.testing import
|
9
|
-
skip_unless_cc_75)
|
8
|
+
from numba.cuda.testing import CUDATestCase, skip_on_cudasim, skip_unless_cc_75
|
10
9
|
import unittest
|
11
10
|
|
12
11
|
|
@@ -24,10 +23,9 @@ class FastMathCriterion:
|
|
24
23
|
test.assertTrue(all(i not in prec for i in self.prec_unexpected))
|
25
24
|
|
26
25
|
|
27
|
-
@skip_on_cudasim(
|
26
|
+
@skip_on_cudasim("Fastmath and PTX inspection not available on cudasim")
|
28
27
|
class TestFastMathOption(CUDATestCase):
|
29
28
|
def _test_fast_math_common(self, pyfunc, sig, device, criterion):
|
30
|
-
|
31
29
|
# Test jit code path
|
32
30
|
fastver = cuda.jit(sig, device=device, fastmath=True)(pyfunc)
|
33
31
|
precver = cuda.jit(sig, device=device)(pyfunc)
|
@@ -40,9 +38,7 @@ class TestFastMathOption(CUDATestCase):
|
|
40
38
|
fastptx, _ = compile_ptx_for_current_device(
|
41
39
|
pyfunc, sig, device=device, fastmath=True
|
42
40
|
)
|
43
|
-
precptx, _ = compile_ptx_for_current_device(
|
44
|
-
pyfunc, sig, device=device
|
45
|
-
)
|
41
|
+
precptx, _ = compile_ptx_for_current_device(pyfunc, sig, device=device)
|
46
42
|
|
47
43
|
criterion.check(self, fastptx, precptx)
|
48
44
|
|
@@ -69,7 +65,9 @@ class TestFastMathOption(CUDATestCase):
|
|
69
65
|
|
70
66
|
self._test_fast_math_common(
|
71
67
|
kernel,
|
72
|
-
(float32[::1], float32, float32),
|
68
|
+
(float32[::1], float32, float32),
|
69
|
+
device=False,
|
70
|
+
criterion=criterion,
|
73
71
|
)
|
74
72
|
self._test_fast_math_common(
|
75
73
|
device, (float32, float32), device=True, criterion=criterion
|
@@ -79,39 +77,41 @@ class TestFastMathOption(CUDATestCase):
|
|
79
77
|
self._test_fast_math_unary(
|
80
78
|
cos,
|
81
79
|
FastMathCriterion(
|
82
|
-
fast_expected=[
|
83
|
-
prec_unexpected=[
|
84
|
-
)
|
80
|
+
fast_expected=["cos.approx.ftz.f32 "],
|
81
|
+
prec_unexpected=["cos.approx.ftz.f32 "],
|
82
|
+
),
|
85
83
|
)
|
86
84
|
|
87
85
|
def test_sinf(self):
|
88
86
|
self._test_fast_math_unary(
|
89
87
|
sin,
|
90
88
|
FastMathCriterion(
|
91
|
-
fast_expected=[
|
92
|
-
prec_unexpected=[
|
93
|
-
)
|
89
|
+
fast_expected=["sin.approx.ftz.f32 "],
|
90
|
+
prec_unexpected=["sin.approx.ftz.f32 "],
|
91
|
+
),
|
94
92
|
)
|
95
93
|
|
96
94
|
def test_tanf(self):
|
97
95
|
self._test_fast_math_unary(
|
98
96
|
tan,
|
99
|
-
FastMathCriterion(
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
97
|
+
FastMathCriterion(
|
98
|
+
fast_expected=[
|
99
|
+
"sin.approx.ftz.f32 ",
|
100
|
+
"cos.approx.ftz.f32 ",
|
101
|
+
"div.approx.ftz.f32 ",
|
102
|
+
],
|
103
|
+
prec_unexpected=["sin.approx.ftz.f32 "],
|
104
|
+
),
|
104
105
|
)
|
105
106
|
|
106
107
|
@skip_unless_cc_75
|
107
108
|
def test_tanhf(self):
|
108
|
-
|
109
109
|
self._test_fast_math_unary(
|
110
110
|
tanh,
|
111
111
|
FastMathCriterion(
|
112
|
-
fast_expected=[
|
113
|
-
prec_unexpected=[
|
114
|
-
)
|
112
|
+
fast_expected=["tanh.approx.f32 "],
|
113
|
+
prec_unexpected=["tanh.approx.f32 "],
|
114
|
+
),
|
115
115
|
)
|
116
116
|
|
117
117
|
def test_tanhf_compile_ptx(self):
|
@@ -119,74 +119,85 @@ class TestFastMathOption(CUDATestCase):
|
|
119
119
|
r[0] = tanh(x)
|
120
120
|
|
121
121
|
def tanh_common_test(cc, criterion):
|
122
|
-
fastptx, _ = compile_ptx(
|
123
|
-
|
124
|
-
|
125
|
-
|
122
|
+
fastptx, _ = compile_ptx(
|
123
|
+
tanh_kernel, (float32[::1], float32), fastmath=True, cc=cc
|
124
|
+
)
|
125
|
+
precptx, _ = compile_ptx(
|
126
|
+
tanh_kernel, (float32[::1], float32), cc=cc
|
127
|
+
)
|
126
128
|
criterion.check(self, fastptx, precptx)
|
127
129
|
|
128
|
-
tanh_common_test(
|
129
|
-
|
130
|
-
|
131
|
-
|
130
|
+
tanh_common_test(
|
131
|
+
cc=(7, 5),
|
132
|
+
criterion=FastMathCriterion(
|
133
|
+
fast_expected=["tanh.approx.f32 "],
|
134
|
+
prec_unexpected=["tanh.approx.f32 "],
|
135
|
+
),
|
136
|
+
)
|
132
137
|
|
133
|
-
tanh_common_test(
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
+
tanh_common_test(
|
139
|
+
cc=(7, 0),
|
140
|
+
criterion=FastMathCriterion(
|
141
|
+
fast_expected=["ex2.approx.ftz.f32 ", "rcp.approx.ftz.f32 "],
|
142
|
+
prec_unexpected=["tanh.approx.f32 "],
|
143
|
+
),
|
144
|
+
)
|
138
145
|
|
139
146
|
def test_expf(self):
|
140
147
|
self._test_fast_math_unary(
|
141
148
|
exp,
|
142
149
|
FastMathCriterion(
|
143
|
-
fast_unexpected=[
|
144
|
-
|
145
|
-
)
|
150
|
+
fast_unexpected=["fma.rn.f32 "], prec_expected=["fma.rn.f32 "]
|
151
|
+
),
|
146
152
|
)
|
147
153
|
|
148
154
|
def test_logf(self):
|
149
155
|
# Look for constant used to convert from log base 2 to log base e
|
150
156
|
self._test_fast_math_unary(
|
151
|
-
log,
|
152
|
-
|
153
|
-
|
154
|
-
|
157
|
+
log,
|
158
|
+
FastMathCriterion(
|
159
|
+
fast_expected=["lg2.approx.ftz.f32 ", "0f3F317218"],
|
160
|
+
prec_unexpected=["lg2.approx.ftz.f32 "],
|
161
|
+
),
|
155
162
|
)
|
156
163
|
|
157
164
|
def test_log10f(self):
|
158
165
|
# Look for constant used to convert from log base 2 to log base 10
|
159
166
|
self._test_fast_math_unary(
|
160
|
-
log10,
|
161
|
-
|
162
|
-
|
163
|
-
|
167
|
+
log10,
|
168
|
+
FastMathCriterion(
|
169
|
+
fast_expected=["lg2.approx.ftz.f32 ", "0f3E9A209B"],
|
170
|
+
prec_unexpected=["lg2.approx.ftz.f32 "],
|
171
|
+
),
|
164
172
|
)
|
165
173
|
|
166
174
|
def test_log2f(self):
|
167
175
|
self._test_fast_math_unary(
|
168
|
-
log2,
|
169
|
-
|
170
|
-
|
171
|
-
|
176
|
+
log2,
|
177
|
+
FastMathCriterion(
|
178
|
+
fast_expected=["lg2.approx.ftz.f32 "],
|
179
|
+
prec_unexpected=["lg2.approx.ftz.f32 "],
|
180
|
+
),
|
172
181
|
)
|
173
182
|
|
174
183
|
def test_powf(self):
|
175
184
|
self._test_fast_math_binary(
|
176
|
-
pow,
|
177
|
-
|
178
|
-
|
179
|
-
|
185
|
+
pow,
|
186
|
+
FastMathCriterion(
|
187
|
+
fast_expected=["lg2.approx.ftz.f32 "],
|
188
|
+
prec_unexpected=["lg2.approx.ftz.f32 "],
|
189
|
+
),
|
180
190
|
)
|
181
191
|
|
182
192
|
def test_divf(self):
|
183
193
|
self._test_fast_math_binary(
|
184
|
-
truediv,
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
194
|
+
truediv,
|
195
|
+
FastMathCriterion(
|
196
|
+
fast_expected=["div.approx.ftz.f32 "],
|
197
|
+
fast_unexpected=["div.rn.f32"],
|
198
|
+
prec_expected=["div.rn.f32"],
|
199
|
+
prec_unexpected=["div.approx.ftz.f32 "],
|
200
|
+
),
|
190
201
|
)
|
191
202
|
|
192
203
|
def test_divf_exception(self):
|
@@ -232,13 +243,13 @@ class TestFastMathOption(CUDATestCase):
|
|
232
243
|
# https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-div
|
233
244
|
|
234
245
|
# The fast version should use the "fast, approximate divide" variant
|
235
|
-
self.assertIn(
|
246
|
+
self.assertIn("div.approx.f32", fastver.inspect_asm(sig))
|
236
247
|
# The precise version should use the "IEEE 754 compliant rounding"
|
237
248
|
# variant, and neither of the "approximate divide" variants.
|
238
|
-
self.assertIn(
|
239
|
-
self.assertNotIn(
|
240
|
-
self.assertNotIn(
|
249
|
+
self.assertIn("div.rn.f32", precver.inspect_asm(sig))
|
250
|
+
self.assertNotIn("div.approx.f32", precver.inspect_asm(sig))
|
251
|
+
self.assertNotIn("div.full.f32", precver.inspect_asm(sig))
|
241
252
|
|
242
253
|
|
243
|
-
if __name__ ==
|
254
|
+
if __name__ == "__main__":
|
244
255
|
unittest.main()
|
@@ -44,9 +44,11 @@ class TestForAll(CUDATestCase):
|
|
44
44
|
# negative element count.
|
45
45
|
with self.assertRaises(ValueError) as raises:
|
46
46
|
foo.forall(-1)
|
47
|
-
self.assertIn(
|
48
|
-
|
47
|
+
self.assertIn(
|
48
|
+
"Can't create ForAll with negative task count",
|
49
|
+
str(raises.exception),
|
50
|
+
)
|
49
51
|
|
50
52
|
|
51
|
-
if __name__ ==
|
53
|
+
if __name__ == "__main__":
|
52
54
|
unittest.main()
|
@@ -17,13 +17,15 @@ class TestFreeVar(CUDATestCase):
|
|
17
17
|
@cuda.jit("(float32[::1], intp)")
|
18
18
|
def foo(A, i):
|
19
19
|
"Dummy function"
|
20
|
-
sdata = cuda.shared.array(
|
21
|
-
|
20
|
+
sdata = cuda.shared.array(
|
21
|
+
size, # size is freevar
|
22
|
+
dtype=nbtype,
|
23
|
+
) # nbtype is freevar
|
22
24
|
A[i] = sdata[i]
|
23
25
|
|
24
26
|
A = np.arange(2, dtype="float32")
|
25
27
|
foo[1, 1](A, 0)
|
26
28
|
|
27
29
|
|
28
|
-
if __name__ ==
|
30
|
+
if __name__ == "__main__":
|
29
31
|
unittest.main()
|
@@ -29,8 +29,7 @@ def coop_smem2d(ary):
|
|
29
29
|
|
30
30
|
class TestCudaTestGlobal(CUDATestCase):
|
31
31
|
def test_global_int_const(self):
|
32
|
-
"""Test simple_smem
|
33
|
-
"""
|
32
|
+
"""Test simple_smem"""
|
34
33
|
compiled = cuda.jit("void(int32[:])")(simple_smem)
|
35
34
|
|
36
35
|
nelem = 100
|
@@ -41,8 +40,7 @@ class TestCudaTestGlobal(CUDATestCase):
|
|
41
40
|
|
42
41
|
@unittest.SkipTest
|
43
42
|
def test_global_tuple_const(self):
|
44
|
-
"""Test coop_smem2d
|
45
|
-
"""
|
43
|
+
"""Test coop_smem2d"""
|
46
44
|
compiled = cuda.jit("void(float32[:,:])")(coop_smem2d)
|
47
45
|
|
48
46
|
shape = 10, 20
|
@@ -56,5 +54,5 @@ class TestCudaTestGlobal(CUDATestCase):
|
|
56
54
|
self.assertTrue(np.allclose(ary, exp))
|
57
55
|
|
58
56
|
|
59
|
-
if __name__ ==
|
57
|
+
if __name__ == "__main__":
|
60
58
|
unittest.main()
|