numba-cuda 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +246 -114
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
- numba_cuda/numba/cuda/cuda_paths.py +293 -99
- numba_cuda/numba/cuda/cudadecl.py +93 -79
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +27 -3
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +146 -30
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +296 -275
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +99 -7
- numba_cuda/numba/cuda/decorators.py +87 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +68 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +55 -1
- numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
- numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
- numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +1090 -927
- numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +468 -319
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
- numba_cuda/numba/cuda/intrinsics.py +203 -28
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/lowering.py +43 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +134 -108
- numba_cuda/numba/cuda/target.py +92 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +10 -7
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +59 -23
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +77 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +24 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +21 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +81 -30
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +5 -3
- numba_cuda/numba/cuda/vectorizers.py +38 -33
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
- numba_cuda-0.10.0.dist-info/RECORD +263 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.1.dist-info/RECORD +0 -251
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0
@@ -13,9 +13,11 @@ import unittest
|
|
13
13
|
# Signatures to test with - these are all homogeneous in dtype, so the output
|
14
14
|
# dtype should match the input dtype - the output should not have been cast
|
15
15
|
# upwards, as reported in #8400: https://github.com/numba/numba/issues/8400
|
16
|
-
signatures = [
|
17
|
-
|
18
|
-
|
16
|
+
signatures = [
|
17
|
+
int32(int32, int32),
|
18
|
+
float32(float32, float32),
|
19
|
+
float64(float64, float64),
|
20
|
+
]
|
19
21
|
|
20
22
|
# The order here is chosen such that each subsequent dtype might have been
|
21
23
|
# casted to a previously-used dtype. This is unlikely to be an issue for CUDA,
|
@@ -25,16 +27,16 @@ signatures = [int32(int32, int32),
|
|
25
27
|
dtypes = (np.float64, np.float32, np.int32)
|
26
28
|
|
27
29
|
# NumPy ndarray orders
|
28
|
-
orders = (
|
30
|
+
orders = ("C", "F")
|
29
31
|
|
30
32
|
# Input sizes corresponding to operations:
|
31
33
|
# - Less than one warp,
|
32
34
|
# - Less than one block,
|
33
35
|
# - Greater than one block (i.e. many blocks)
|
34
|
-
input_sizes = (8, 100, 2
|
36
|
+
input_sizes = (8, 100, 2**10 + 1)
|
35
37
|
|
36
38
|
|
37
|
-
@skip_on_cudasim(
|
39
|
+
@skip_on_cudasim("ufunc API unsupported in the simulator")
|
38
40
|
class TestCUDAVectorize(CUDATestCase):
|
39
41
|
# Presumably chosen as an odd number unlikely to coincide with the total
|
40
42
|
# thread count, and large enough to ensure a significant number of blocks
|
@@ -42,8 +44,7 @@ class TestCUDAVectorize(CUDATestCase):
|
|
42
44
|
N = 1000001
|
43
45
|
|
44
46
|
def test_scalar(self):
|
45
|
-
|
46
|
-
@vectorize(signatures, target='cuda')
|
47
|
+
@vectorize(signatures, target="cuda")
|
47
48
|
def vector_add(a, b):
|
48
49
|
return a + b
|
49
50
|
|
@@ -53,8 +54,7 @@ class TestCUDAVectorize(CUDATestCase):
|
|
53
54
|
self.assertEqual(c, a + b)
|
54
55
|
|
55
56
|
def test_1d(self):
|
56
|
-
|
57
|
-
@vectorize(signatures, target='cuda')
|
57
|
+
@vectorize(signatures, target="cuda")
|
58
58
|
def vector_add(a, b):
|
59
59
|
return a + b
|
60
60
|
|
@@ -66,8 +66,7 @@ class TestCUDAVectorize(CUDATestCase):
|
|
66
66
|
self.assertEqual(actual.dtype, ty)
|
67
67
|
|
68
68
|
def test_1d_async(self):
|
69
|
-
|
70
|
-
@vectorize(signatures, target='cuda')
|
69
|
+
@vectorize(signatures, target="cuda")
|
71
70
|
def vector_add(a, b):
|
72
71
|
return a + b
|
73
72
|
|
@@ -86,8 +85,7 @@ class TestCUDAVectorize(CUDATestCase):
|
|
86
85
|
self.assertEqual(actual.dtype, ty)
|
87
86
|
|
88
87
|
def test_nd(self):
|
89
|
-
|
90
|
-
@vectorize(signatures, target='cuda')
|
88
|
+
@vectorize(signatures, target="cuda")
|
91
89
|
def vector_add(a, b):
|
92
90
|
return a + b
|
93
91
|
|
@@ -102,7 +100,7 @@ class TestCUDAVectorize(CUDATestCase):
|
|
102
100
|
self.assertEqual(actual.dtype, dtype)
|
103
101
|
|
104
102
|
def test_output_arg(self):
|
105
|
-
@vectorize(signatures, target=
|
103
|
+
@vectorize(signatures, target="cuda")
|
106
104
|
def vector_add(a, b):
|
107
105
|
return a + b
|
108
106
|
|
@@ -117,7 +115,7 @@ class TestCUDAVectorize(CUDATestCase):
|
|
117
115
|
self.assertEqual(expected.dtype, actual.dtype)
|
118
116
|
|
119
117
|
def test_reduce(self):
|
120
|
-
@vectorize(signatures, target=
|
118
|
+
@vectorize(signatures, target="cuda")
|
121
119
|
def vector_add(a, b):
|
122
120
|
return a + b
|
123
121
|
|
@@ -135,8 +133,7 @@ class TestCUDAVectorize(CUDATestCase):
|
|
135
133
|
self.assertEqual(dtype, actual.dtype)
|
136
134
|
|
137
135
|
def test_reduce_async(self):
|
138
|
-
|
139
|
-
@vectorize(signatures, target='cuda')
|
136
|
+
@vectorize(signatures, target="cuda")
|
140
137
|
def vector_add(a, b):
|
141
138
|
return a + b
|
142
139
|
|
@@ -153,7 +150,7 @@ class TestCUDAVectorize(CUDATestCase):
|
|
153
150
|
self.assertEqual(dtype, actual.dtype)
|
154
151
|
|
155
152
|
def test_manual_transfer(self):
|
156
|
-
@vectorize(signatures, target=
|
153
|
+
@vectorize(signatures, target="cuda")
|
157
154
|
def vector_add(a, b):
|
158
155
|
return a + b
|
159
156
|
|
@@ -166,7 +163,7 @@ class TestCUDAVectorize(CUDATestCase):
|
|
166
163
|
self.assertEqual(expected.dtype, actual.dtype)
|
167
164
|
|
168
165
|
def test_ufunc_output_2d(self):
|
169
|
-
@vectorize(signatures, target=
|
166
|
+
@vectorize(signatures, target="cuda")
|
170
167
|
def vector_add(a, b):
|
171
168
|
return a + b
|
172
169
|
|
@@ -181,7 +178,7 @@ class TestCUDAVectorize(CUDATestCase):
|
|
181
178
|
self.assertEqual(expected.dtype, actual.dtype)
|
182
179
|
|
183
180
|
def check_tuple_arg(self, a, b):
|
184
|
-
@vectorize(signatures, target=
|
181
|
+
@vectorize(signatures, target="cuda")
|
185
182
|
def vector_add(a, b):
|
186
183
|
return a + b
|
187
184
|
|
@@ -194,7 +191,7 @@ class TestCUDAVectorize(CUDATestCase):
|
|
194
191
|
self.check_tuple_arg(a, b)
|
195
192
|
|
196
193
|
def test_namedtuple_arg(self):
|
197
|
-
Point = namedtuple(
|
194
|
+
Point = namedtuple("Point", ("x", "y", "z"))
|
198
195
|
a = Point(x=1.0, y=2.0, z=3.0)
|
199
196
|
b = Point(x=4.0, y=5.0, z=6.0)
|
200
197
|
self.check_tuple_arg(a, b)
|
@@ -206,7 +203,7 @@ class TestCUDAVectorize(CUDATestCase):
|
|
206
203
|
self.check_tuple_arg(a, b)
|
207
204
|
|
208
205
|
def test_tuple_of_namedtuple_arg(self):
|
209
|
-
Point = namedtuple(
|
206
|
+
Point = namedtuple("Point", ("x", "y", "z"))
|
210
207
|
a = (Point(x=1.0, y=2.0, z=3.0), Point(x=1.5, y=2.5, z=3.5))
|
211
208
|
b = (Point(x=4.0, y=5.0, z=6.0), Point(x=4.5, y=5.5, z=6.5))
|
212
209
|
self.check_tuple_arg(a, b)
|
@@ -216,17 +213,17 @@ class TestCUDAVectorize(CUDATestCase):
|
|
216
213
|
ys1 = xs1 + 2
|
217
214
|
xs2 = np.arange(10, dtype=np.int32) * 2
|
218
215
|
ys2 = xs2 + 1
|
219
|
-
Points = namedtuple(
|
216
|
+
Points = namedtuple("Points", ("xs", "ys"))
|
220
217
|
a = Points(xs=xs1, ys=ys1)
|
221
218
|
b = Points(xs=xs2, ys=ys2)
|
222
219
|
self.check_tuple_arg(a, b)
|
223
220
|
|
224
221
|
def test_name_attribute(self):
|
225
|
-
@vectorize(
|
222
|
+
@vectorize("f8(f8)", target="cuda")
|
226
223
|
def bar(x):
|
227
|
-
return x
|
224
|
+
return x**2
|
228
225
|
|
229
|
-
self.assertEqual(bar.__name__,
|
226
|
+
self.assertEqual(bar.__name__, "bar")
|
230
227
|
|
231
228
|
def test_no_transfer_for_device_data(self):
|
232
229
|
# Initialize test data on the device prior to banning host <-> device
|
@@ -238,15 +235,15 @@ class TestCUDAVectorize(CUDATestCase):
|
|
238
235
|
# A mock of a CUDA function that always raises a CudaAPIError
|
239
236
|
|
240
237
|
def raising_transfer(*args, **kwargs):
|
241
|
-
raise CudaAPIError(999,
|
238
|
+
raise CudaAPIError(999, "Transfer not allowed")
|
242
239
|
|
243
240
|
# Use the mock for transfers between the host and device
|
244
241
|
|
245
|
-
old_HtoD = getattr(driver,
|
246
|
-
old_DtoH = getattr(driver,
|
242
|
+
old_HtoD = getattr(driver, "cuMemcpyHtoD", None)
|
243
|
+
old_DtoH = getattr(driver, "cuMemcpyDtoH", None)
|
247
244
|
|
248
|
-
setattr(driver,
|
249
|
-
setattr(driver,
|
245
|
+
setattr(driver, "cuMemcpyHtoD", raising_transfer)
|
246
|
+
setattr(driver, "cuMemcpyDtoH", raising_transfer)
|
250
247
|
|
251
248
|
# Ensure that the mock functions are working as expected
|
252
249
|
|
@@ -260,7 +257,7 @@ class TestCUDAVectorize(CUDATestCase):
|
|
260
257
|
# Check that defining and calling a ufunc with data on the device
|
261
258
|
# induces no transfers
|
262
259
|
|
263
|
-
@vectorize([
|
260
|
+
@vectorize(["float32(float32)"], target="cuda")
|
264
261
|
def func(noise):
|
265
262
|
return noise + 1.0
|
266
263
|
|
@@ -270,14 +267,14 @@ class TestCUDAVectorize(CUDATestCase):
|
|
270
267
|
# no original implementation, simply remove ours.
|
271
268
|
|
272
269
|
if old_HtoD is not None:
|
273
|
-
setattr(driver,
|
270
|
+
setattr(driver, "cuMemcpyHtoD", old_HtoD)
|
274
271
|
else:
|
275
272
|
del driver.cuMemcpyHtoD
|
276
273
|
if old_DtoH is not None:
|
277
|
-
setattr(driver,
|
274
|
+
setattr(driver, "cuMemcpyDtoH", old_DtoH)
|
278
275
|
else:
|
279
276
|
del driver.cuMemcpyDtoH
|
280
277
|
|
281
278
|
|
282
|
-
if __name__ ==
|
279
|
+
if __name__ == "__main__":
|
283
280
|
unittest.main()
|
@@ -4,17 +4,17 @@ from numba.cuda.testing import skip_on_cudasim, CUDATestCase
|
|
4
4
|
import unittest
|
5
5
|
|
6
6
|
|
7
|
-
@skip_on_cudasim(
|
7
|
+
@skip_on_cudasim("ufunc API unsupported in the simulator")
|
8
8
|
class TestVectorizeComplex(CUDATestCase):
|
9
9
|
def test_vectorize_complex(self):
|
10
|
-
@vectorize([
|
10
|
+
@vectorize(["complex128(complex128)"], target="cuda")
|
11
11
|
def vcomp(a):
|
12
|
-
return a * a + 1.
|
12
|
+
return a * a + 1.0
|
13
13
|
|
14
14
|
A = np.arange(5, dtype=np.complex128)
|
15
15
|
B = vcomp(A)
|
16
|
-
self.assertTrue(np.allclose(A * A + 1
|
16
|
+
self.assertTrue(np.allclose(A * A + 1.0, B))
|
17
17
|
|
18
18
|
|
19
|
-
if __name__ ==
|
19
|
+
if __name__ == "__main__":
|
20
20
|
unittest.main()
|
@@ -1,21 +1,25 @@
|
|
1
1
|
import numpy as np
|
2
2
|
|
3
3
|
from numba import vectorize, cuda
|
4
|
-
from numba.tests.npyufunc.test_vectorize_decor import
|
5
|
-
|
4
|
+
from numba.tests.npyufunc.test_vectorize_decor import (
|
5
|
+
BaseVectorizeDecor,
|
6
|
+
BaseVectorizeNopythonArg,
|
7
|
+
BaseVectorizeUnrecognizedArg,
|
8
|
+
)
|
6
9
|
from numba.cuda.testing import skip_on_cudasim, CUDATestCase
|
7
10
|
import unittest
|
8
11
|
|
9
12
|
|
10
|
-
@skip_on_cudasim(
|
13
|
+
@skip_on_cudasim("ufunc API unsupported in the simulator")
|
11
14
|
class TestVectorizeDecor(CUDATestCase, BaseVectorizeDecor):
|
12
15
|
"""
|
13
16
|
Runs the tests from BaseVectorizeDecor with the CUDA target.
|
14
17
|
"""
|
15
|
-
target = 'cuda'
|
16
18
|
|
19
|
+
target = "cuda"
|
17
20
|
|
18
|
-
|
21
|
+
|
22
|
+
@skip_on_cudasim("ufunc API unsupported in the simulator")
|
19
23
|
class TestGPUVectorizeBroadcast(CUDATestCase):
|
20
24
|
def test_broadcast(self):
|
21
25
|
a = np.random.randn(100, 3, 1)
|
@@ -24,7 +28,7 @@ class TestGPUVectorizeBroadcast(CUDATestCase):
|
|
24
28
|
def fn(a, b):
|
25
29
|
return a - b
|
26
30
|
|
27
|
-
@vectorize([
|
31
|
+
@vectorize(["float64(float64,float64)"], target="cuda")
|
28
32
|
def fngpu(a, b):
|
29
33
|
return a - b
|
30
34
|
|
@@ -43,7 +47,7 @@ class TestGPUVectorizeBroadcast(CUDATestCase):
|
|
43
47
|
def fn(a, b):
|
44
48
|
return a - b
|
45
49
|
|
46
|
-
@vectorize([
|
50
|
+
@vectorize(["float64(float64,float64)"], target="cuda")
|
47
51
|
def fngpu(a, b):
|
48
52
|
return a - b
|
49
53
|
|
@@ -52,18 +56,18 @@ class TestGPUVectorizeBroadcast(CUDATestCase):
|
|
52
56
|
np.testing.assert_almost_equal(expect, got.copy_to_host())
|
53
57
|
|
54
58
|
|
55
|
-
@skip_on_cudasim(
|
59
|
+
@skip_on_cudasim("ufunc API unsupported in the simulator")
|
56
60
|
class TestVectorizeNopythonArg(BaseVectorizeNopythonArg, CUDATestCase):
|
57
61
|
def test_target_cuda_nopython(self):
|
58
62
|
warnings = ["nopython kwarg for cuda target is redundant"]
|
59
|
-
self._test_target_nopython(
|
63
|
+
self._test_target_nopython("cuda", warnings)
|
60
64
|
|
61
65
|
|
62
|
-
@skip_on_cudasim(
|
66
|
+
@skip_on_cudasim("ufunc API unsupported in the simulator")
|
63
67
|
class TestVectorizeUnrecognizedArg(BaseVectorizeUnrecognizedArg, CUDATestCase):
|
64
68
|
def test_target_cuda_unrecognized_arg(self):
|
65
|
-
self._test_target_unrecognized_arg(
|
69
|
+
self._test_target_unrecognized_arg("cuda")
|
66
70
|
|
67
71
|
|
68
|
-
if __name__ ==
|
72
|
+
if __name__ == "__main__":
|
69
73
|
unittest.main()
|
@@ -5,19 +5,19 @@ from numba.cuda.testing import skip_on_cudasim, CUDATestCase
|
|
5
5
|
import unittest
|
6
6
|
|
7
7
|
|
8
|
-
@skip_on_cudasim(
|
8
|
+
@skip_on_cudasim("ufunc API unsupported in the simulator")
|
9
9
|
class TestCudaVectorizeDeviceCall(CUDATestCase):
|
10
10
|
def test_cuda_vectorize_device_call(self):
|
11
|
-
|
12
11
|
@cuda.jit(float32(float32, float32, float32), device=True)
|
13
12
|
def cu_device_fn(x, y, z):
|
14
|
-
return x
|
13
|
+
return x**y / z
|
15
14
|
|
16
15
|
def cu_ufunc(x, y, z):
|
17
16
|
return cu_device_fn(x, y, z)
|
18
17
|
|
19
|
-
ufunc = vectorize([float32(float32, float32, float32)], target=
|
20
|
-
cu_ufunc
|
18
|
+
ufunc = vectorize([float32(float32, float32, float32)], target="cuda")(
|
19
|
+
cu_ufunc
|
20
|
+
)
|
21
21
|
|
22
22
|
N = 100
|
23
23
|
|
@@ -27,10 +27,10 @@ class TestCudaVectorizeDeviceCall(CUDATestCase):
|
|
27
27
|
|
28
28
|
out = ufunc(X, Y, Z)
|
29
29
|
|
30
|
-
gold = (X
|
30
|
+
gold = (X**Y) / Z
|
31
31
|
|
32
32
|
self.assertTrue(np.allclose(out, gold))
|
33
33
|
|
34
34
|
|
35
|
-
if __name__ ==
|
35
|
+
if __name__ == "__main__":
|
36
36
|
unittest.main()
|
@@ -7,11 +7,10 @@ import unittest
|
|
7
7
|
sig = [float64(float64, float64)]
|
8
8
|
|
9
9
|
|
10
|
-
@skip_on_cudasim(
|
10
|
+
@skip_on_cudasim("ufunc API unsupported in the simulator")
|
11
11
|
class TestCUDAVectorizeScalarArg(CUDATestCase):
|
12
|
-
|
13
12
|
def test_vectorize_scalar_arg(self):
|
14
|
-
@vectorize(sig, target=
|
13
|
+
@vectorize(sig, target="cuda")
|
15
14
|
def vector_add(a, b):
|
16
15
|
return a + b
|
17
16
|
|
@@ -20,11 +19,11 @@ class TestCUDAVectorizeScalarArg(CUDATestCase):
|
|
20
19
|
v = vector_add(1.0, dA)
|
21
20
|
|
22
21
|
np.testing.assert_array_almost_equal(
|
23
|
-
v.copy_to_host(),
|
24
|
-
|
22
|
+
v.copy_to_host(), np.arange(1, 11, dtype=np.float64)
|
23
|
+
)
|
25
24
|
|
26
25
|
def test_vectorize_all_scalars(self):
|
27
|
-
@vectorize(sig, target=
|
26
|
+
@vectorize(sig, target="cuda")
|
28
27
|
def vector_add(a, b):
|
29
28
|
return a + b
|
30
29
|
|
@@ -33,5 +32,5 @@ class TestCUDAVectorizeScalarArg(CUDATestCase):
|
|
33
32
|
np.testing.assert_almost_equal(2.0, v)
|
34
33
|
|
35
34
|
|
36
|
-
if __name__ ==
|
35
|
+
if __name__ == "__main__":
|
37
36
|
unittest.main()
|
@@ -7,27 +7,27 @@ from numba.core import config
|
|
7
7
|
import warnings
|
8
8
|
|
9
9
|
|
10
|
-
@skip_on_cudasim(
|
10
|
+
@skip_on_cudasim("cudasim does not raise performance warnings")
|
11
11
|
class TestWarnings(CUDATestCase):
|
12
12
|
def test_inefficient_launch_configuration(self):
|
13
13
|
@cuda.jit
|
14
14
|
def kernel():
|
15
15
|
pass
|
16
16
|
|
17
|
-
with override_config(
|
17
|
+
with override_config("CUDA_LOW_OCCUPANCY_WARNINGS", 1):
|
18
18
|
with warnings.catch_warnings(record=True) as w:
|
19
19
|
kernel[1, 1]()
|
20
20
|
|
21
21
|
self.assertEqual(w[0].category, NumbaPerformanceWarning)
|
22
|
-
self.assertIn(
|
23
|
-
self.assertIn(
|
22
|
+
self.assertIn("Grid size", str(w[0].message))
|
23
|
+
self.assertIn("low occupancy", str(w[0].message))
|
24
24
|
|
25
25
|
def test_efficient_launch_configuration(self):
|
26
26
|
@cuda.jit
|
27
27
|
def kernel():
|
28
28
|
pass
|
29
29
|
|
30
|
-
with override_config(
|
30
|
+
with override_config("CUDA_LOW_OCCUPANCY_WARNINGS", 1):
|
31
31
|
with warnings.catch_warnings(record=True) as w:
|
32
32
|
kernel[256, 256]()
|
33
33
|
|
@@ -40,14 +40,15 @@ class TestWarnings(CUDATestCase):
|
|
40
40
|
|
41
41
|
N = 10
|
42
42
|
arr_f32 = np.zeros(N, dtype=np.float32)
|
43
|
-
with override_config(
|
43
|
+
with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
|
44
44
|
with warnings.catch_warnings(record=True) as w:
|
45
45
|
foo[1, N](arr_f32, N)
|
46
46
|
|
47
47
|
self.assertEqual(w[0].category, NumbaPerformanceWarning)
|
48
|
-
self.assertIn(
|
49
|
-
|
50
|
-
|
48
|
+
self.assertIn(
|
49
|
+
"Host array used in CUDA kernel will incur", str(w[0].message)
|
50
|
+
)
|
51
|
+
self.assertIn("copy overhead", str(w[0].message))
|
51
52
|
|
52
53
|
def test_pinned_warn_on_host_array(self):
|
53
54
|
@cuda.jit
|
@@ -57,14 +58,15 @@ class TestWarnings(CUDATestCase):
|
|
57
58
|
N = 10
|
58
59
|
ary = cuda.pinned_array(N, dtype=np.float32)
|
59
60
|
|
60
|
-
with override_config(
|
61
|
+
with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
|
61
62
|
with warnings.catch_warnings(record=True) as w:
|
62
63
|
foo[1, N](ary, N)
|
63
64
|
|
64
65
|
self.assertEqual(w[0].category, NumbaPerformanceWarning)
|
65
|
-
self.assertIn(
|
66
|
-
|
67
|
-
|
66
|
+
self.assertIn(
|
67
|
+
"Host array used in CUDA kernel will incur", str(w[0].message)
|
68
|
+
)
|
69
|
+
self.assertIn("copy overhead", str(w[0].message))
|
68
70
|
|
69
71
|
def test_nowarn_on_mapped_array(self):
|
70
72
|
@cuda.jit
|
@@ -74,7 +76,7 @@ class TestWarnings(CUDATestCase):
|
|
74
76
|
N = 10
|
75
77
|
ary = cuda.mapped_array(N, dtype=np.float32)
|
76
78
|
|
77
|
-
with override_config(
|
79
|
+
with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
|
78
80
|
with warnings.catch_warnings(record=True) as w:
|
79
81
|
foo[1, N](ary, N)
|
80
82
|
|
@@ -89,7 +91,7 @@ class TestWarnings(CUDATestCase):
|
|
89
91
|
N = 10
|
90
92
|
ary = cuda.managed_array(N, dtype=np.float32)
|
91
93
|
|
92
|
-
with override_config(
|
94
|
+
with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
|
93
95
|
with warnings.catch_warnings(record=True) as w:
|
94
96
|
foo[1, N](ary, N)
|
95
97
|
|
@@ -103,7 +105,7 @@ class TestWarnings(CUDATestCase):
|
|
103
105
|
N = 10
|
104
106
|
ary = cuda.device_array(N, dtype=np.float32)
|
105
107
|
|
106
|
-
with override_config(
|
108
|
+
with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
|
107
109
|
with warnings.catch_warnings(record=True) as w:
|
108
110
|
foo[1, N](ary, N)
|
109
111
|
|
@@ -114,14 +116,14 @@ class TestWarnings(CUDATestCase):
|
|
114
116
|
cuda.jit(debug=True, opt=True)
|
115
117
|
|
116
118
|
self.assertEqual(len(w), 1)
|
117
|
-
self.assertIn(
|
119
|
+
self.assertIn("not supported by CUDA", str(w[0].message))
|
118
120
|
|
119
121
|
def test_warn_on_debug_and_opt_default(self):
|
120
122
|
with warnings.catch_warnings(record=True) as w:
|
121
123
|
cuda.jit(debug=True)
|
122
124
|
|
123
125
|
self.assertEqual(len(w), 1)
|
124
|
-
self.assertIn(
|
126
|
+
self.assertIn("not supported by CUDA", str(w[0].message))
|
125
127
|
|
126
128
|
def test_no_warn_on_debug_and_no_opt(self):
|
127
129
|
with warnings.catch_warnings(record=True) as w:
|
@@ -136,8 +138,8 @@ class TestWarnings(CUDATestCase):
|
|
136
138
|
self.assertEqual(len(w), 0)
|
137
139
|
|
138
140
|
def test_no_warn_on_debug_and_opt_with_config(self):
|
139
|
-
with override_config(
|
140
|
-
with override_config(
|
141
|
+
with override_config("CUDA_DEBUGINFO_DEFAULT", 1):
|
142
|
+
with override_config("OPT", config._OptLevel(0)):
|
141
143
|
with warnings.catch_warnings(record=True) as w:
|
142
144
|
cuda.jit()
|
143
145
|
|
@@ -148,30 +150,30 @@ class TestWarnings(CUDATestCase):
|
|
148
150
|
|
149
151
|
self.assertEqual(len(w), 0)
|
150
152
|
|
151
|
-
with override_config(
|
153
|
+
with override_config("OPT", config._OptLevel(0)):
|
152
154
|
with warnings.catch_warnings(record=True) as w:
|
153
155
|
cuda.jit(debug=True)
|
154
156
|
|
155
157
|
self.assertEqual(len(w), 0)
|
156
158
|
|
157
159
|
def test_warn_on_debug_and_opt_with_config(self):
|
158
|
-
with override_config(
|
159
|
-
for opt in (1, 2, 3,
|
160
|
-
with override_config(
|
160
|
+
with override_config("CUDA_DEBUGINFO_DEFAULT", 1):
|
161
|
+
for opt in (1, 2, 3, "max"):
|
162
|
+
with override_config("OPT", config._OptLevel(opt)):
|
161
163
|
with warnings.catch_warnings(record=True) as w:
|
162
164
|
cuda.jit()
|
163
165
|
|
164
166
|
self.assertEqual(len(w), 1)
|
165
|
-
self.assertIn(
|
167
|
+
self.assertIn("not supported by CUDA", str(w[0].message))
|
166
168
|
|
167
|
-
for opt in (1, 2, 3,
|
168
|
-
with override_config(
|
169
|
+
for opt in (1, 2, 3, "max"):
|
170
|
+
with override_config("OPT", config._OptLevel(opt)):
|
169
171
|
with warnings.catch_warnings(record=True) as w:
|
170
172
|
cuda.jit(debug=True)
|
171
173
|
|
172
174
|
self.assertEqual(len(w), 1)
|
173
|
-
self.assertIn(
|
175
|
+
self.assertIn("not supported by CUDA", str(w[0].message))
|
174
176
|
|
175
177
|
|
176
|
-
if __name__ ==
|
178
|
+
if __name__ == "__main__":
|
177
179
|
unittest.main()
|