numba-cuda 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +246 -114
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
- numba_cuda/numba/cuda/cuda_paths.py +293 -99
- numba_cuda/numba/cuda/cudadecl.py +93 -79
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +27 -3
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +146 -30
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +296 -275
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +99 -7
- numba_cuda/numba/cuda/decorators.py +87 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +68 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +55 -1
- numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
- numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
- numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +1090 -927
- numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +468 -319
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
- numba_cuda/numba/cuda/intrinsics.py +203 -28
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/lowering.py +43 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +134 -108
- numba_cuda/numba/cuda/target.py +92 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +10 -7
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +59 -23
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +77 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +24 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +21 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +81 -30
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +5 -3
- numba_cuda/numba/cuda/vectorizers.py +38 -33
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
- numba_cuda-0.10.0.dist-info/RECORD +263 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.1.dist-info/RECORD +0 -251
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,6 @@ regex_pattern = (
|
|
13
13
|
|
14
14
|
|
15
15
|
class TestUserExc(CUDATestCase):
|
16
|
-
|
17
16
|
def setUp(self):
|
18
17
|
super().setUp()
|
19
18
|
# LTO optimizes away the exception status due to an oversight
|
@@ -29,7 +28,7 @@ class TestUserExc(CUDATestCase):
|
|
29
28
|
elif x == 2:
|
30
29
|
raise MyError("foo")
|
31
30
|
|
32
|
-
test_exc[1, 1](0)
|
31
|
+
test_exc[1, 1](0) # no raise
|
33
32
|
with self.assertRaises(MyError) as cm:
|
34
33
|
test_exc[1, 1](1)
|
35
34
|
if not config.ENABLE_CUDASIM:
|
@@ -43,5 +42,5 @@ class TestUserExc(CUDATestCase):
|
|
43
42
|
self.assertIn("tid=[0, 0, 0] ctaid=[0, 0, 0]: foo", str(cm.exception))
|
44
43
|
|
45
44
|
|
46
|
-
if __name__ ==
|
45
|
+
if __name__ == "__main__":
|
47
46
|
unittest.main()
|
@@ -44,12 +44,7 @@ def make_kernel(vtype):
|
|
44
44
|
res[2] = v.z
|
45
45
|
|
46
46
|
def kernel_4elem(res):
|
47
|
-
v = vobj(
|
48
|
-
base_type(0),
|
49
|
-
base_type(1),
|
50
|
-
base_type(2),
|
51
|
-
base_type(3)
|
52
|
-
)
|
47
|
+
v = vobj(base_type(0), base_type(1), base_type(2), base_type(3))
|
53
48
|
res[0] = v.x
|
54
49
|
res[1] = v.y
|
55
50
|
res[2] = v.z
|
@@ -59,7 +54,7 @@ def make_kernel(vtype):
|
|
59
54
|
1: kernel_1elem,
|
60
55
|
2: kernel_2elem,
|
61
56
|
3: kernel_3elem,
|
62
|
-
4: kernel_4elem
|
57
|
+
4: kernel_4elem,
|
63
58
|
}[vtype.num_elements]
|
64
59
|
return cuda.jit(host_function)
|
65
60
|
|
@@ -83,13 +78,13 @@ def make_fancy_creation_kernel(vtype):
|
|
83
78
|
three = base_type(3.0)
|
84
79
|
four = base_type(4.0)
|
85
80
|
|
86
|
-
j = 0
|
81
|
+
j = 0 # index of the result array
|
87
82
|
|
88
83
|
# Construct a 1-component vector type, possible combination includes:
|
89
84
|
# 2C1 = 2 combinations.
|
90
85
|
|
91
86
|
f1_1 = v1(one) # 1
|
92
|
-
f1_2 = v1(f1_1)
|
87
|
+
f1_2 = v1(f1_1) # 1
|
93
88
|
|
94
89
|
res[0] = f1_1.x
|
95
90
|
res[1] = f1_2.x
|
@@ -98,11 +93,11 @@ def make_fancy_creation_kernel(vtype):
|
|
98
93
|
# Construct a 2-component vector type, possible combination includes:
|
99
94
|
# 1 + 2C1 * 2 = 5 combinations
|
100
95
|
|
101
|
-
f2_1 = v2(two, three)
|
102
|
-
f2_2 = v2(f1_1, three)
|
103
|
-
f2_3 = v2(two, f1_1)
|
104
|
-
f2_4 = v2(f1_1, f1_1)
|
105
|
-
f2_5 = v2(f2_1)
|
96
|
+
f2_1 = v2(two, three) # 2 3
|
97
|
+
f2_2 = v2(f1_1, three) # 1 3
|
98
|
+
f2_3 = v2(two, f1_1) # 2 1
|
99
|
+
f2_4 = v2(f1_1, f1_1) # 1 1
|
100
|
+
f2_5 = v2(f2_1) # 2 3
|
106
101
|
|
107
102
|
for v in (f2_1, f2_2, f2_3, f2_4, f2_5):
|
108
103
|
res[j] = v.x
|
@@ -112,24 +107,37 @@ def make_fancy_creation_kernel(vtype):
|
|
112
107
|
# Construct a 3-component vector type, possible combination includes:
|
113
108
|
# 1 + 2C1 * 2 + 2^3 = 13 combinations
|
114
109
|
|
115
|
-
f3_1 = v3(f2_1, one)
|
116
|
-
f3_2 = v3(f2_1, f1_1)
|
117
|
-
f3_3 = v3(one, f2_1)
|
118
|
-
f3_4 = v3(f1_1, f2_1)
|
119
|
-
|
120
|
-
f3_5 = v3(one, two, three)
|
121
|
-
f3_6 = v3(f1_1, two, three)
|
122
|
-
f3_7 = v3(one, f1_1, three)
|
123
|
-
f3_8 = v3(one, two, f1_1)
|
124
|
-
f3_9 = v3(f1_1, f1_1, three)
|
125
|
-
f3_10 = v3(one, f1_1, f1_1)
|
126
|
-
f3_11 = v3(f1_1, two, f1_1)
|
127
|
-
f3_12 = v3(f1_1, f1_1, f1_1)
|
128
|
-
|
129
|
-
f3_13 = v3(f3_1)
|
130
|
-
|
131
|
-
for v in (
|
132
|
-
|
110
|
+
f3_1 = v3(f2_1, one) # 2 3 1
|
111
|
+
f3_2 = v3(f2_1, f1_1) # 2 3 1
|
112
|
+
f3_3 = v3(one, f2_1) # 1 2 3
|
113
|
+
f3_4 = v3(f1_1, f2_1) # 1 2 3
|
114
|
+
|
115
|
+
f3_5 = v3(one, two, three) # 1 2 3
|
116
|
+
f3_6 = v3(f1_1, two, three) # 1 2 3
|
117
|
+
f3_7 = v3(one, f1_1, three) # 1 1 3
|
118
|
+
f3_8 = v3(one, two, f1_1) # 1 2 1
|
119
|
+
f3_9 = v3(f1_1, f1_1, three) # 1 1 3
|
120
|
+
f3_10 = v3(one, f1_1, f1_1) # 1 1 1
|
121
|
+
f3_11 = v3(f1_1, two, f1_1) # 1 2 1
|
122
|
+
f3_12 = v3(f1_1, f1_1, f1_1) # 1 1 1
|
123
|
+
|
124
|
+
f3_13 = v3(f3_1) # 2 3 1
|
125
|
+
|
126
|
+
for v in (
|
127
|
+
f3_1,
|
128
|
+
f3_2,
|
129
|
+
f3_3,
|
130
|
+
f3_4,
|
131
|
+
f3_5,
|
132
|
+
f3_6,
|
133
|
+
f3_7,
|
134
|
+
f3_8,
|
135
|
+
f3_9,
|
136
|
+
f3_10,
|
137
|
+
f3_11,
|
138
|
+
f3_12,
|
139
|
+
f3_13,
|
140
|
+
):
|
133
141
|
res[j] = v.x
|
134
142
|
res[j + 1] = v.y
|
135
143
|
res[j + 2] = v.z
|
@@ -138,48 +146,80 @@ def make_fancy_creation_kernel(vtype):
|
|
138
146
|
# Construct a 4-component vector type, possible combination includes:
|
139
147
|
# 1 + (2C1 * 2 + 1) + 3C1 * 2^2 + 2^4 = 34 combinations
|
140
148
|
|
141
|
-
f4_1 = v4(one, two, three, four)
|
142
|
-
f4_2 = v4(f1_1, two, three, four)
|
143
|
-
f4_3 = v4(one, f1_1, three, four)
|
144
|
-
f4_4 = v4(one, two, f1_1, four)
|
145
|
-
f4_5 = v4(one, two, three, f1_1)
|
149
|
+
f4_1 = v4(one, two, three, four) # 1 2 3 4
|
150
|
+
f4_2 = v4(f1_1, two, three, four) # 1 2 3 4
|
151
|
+
f4_3 = v4(one, f1_1, three, four) # 1 1 3 4
|
152
|
+
f4_4 = v4(one, two, f1_1, four) # 1 2 1 4
|
153
|
+
f4_5 = v4(one, two, three, f1_1) # 1 2 3 1
|
146
154
|
f4_6 = v4(f1_1, f1_1, three, four) # 1 1 3 4
|
147
|
-
f4_7 = v4(f1_1, two, f1_1, four)
|
148
|
-
f4_8 = v4(f1_1, two, three, f1_1)
|
149
|
-
f4_9 = v4(one, f1_1, f1_1, four)
|
155
|
+
f4_7 = v4(f1_1, two, f1_1, four) # 1 2 1 4
|
156
|
+
f4_8 = v4(f1_1, two, three, f1_1) # 1 2 3 1
|
157
|
+
f4_9 = v4(one, f1_1, f1_1, four) # 1 1 1 4
|
150
158
|
f4_10 = v4(one, f1_1, three, f1_1) # 1 1 3 1
|
151
|
-
f4_11 = v4(one, two, f1_1, f1_1)
|
159
|
+
f4_11 = v4(one, two, f1_1, f1_1) # 1 2 1 1
|
152
160
|
f4_12 = v4(f1_1, f1_1, f1_1, four) # 1 1 1 4
|
153
|
-
f4_13 = v4(f1_1, f1_1, three, f1_1)
|
154
|
-
f4_14 = v4(f1_1, two, f1_1, f1_1)
|
155
|
-
f4_15 = v4(one, f1_1, f1_1, f1_1)
|
161
|
+
f4_13 = v4(f1_1, f1_1, three, f1_1) # 1 1 3 1
|
162
|
+
f4_14 = v4(f1_1, two, f1_1, f1_1) # 1 2 1 1
|
163
|
+
f4_15 = v4(one, f1_1, f1_1, f1_1) # 1 1 1 1
|
156
164
|
f4_16 = v4(f1_1, f1_1, f1_1, f1_1) # 1 1 1 1
|
157
165
|
|
158
|
-
f4_17 = v4(f2_1, two, three)
|
159
|
-
f4_18 = v4(f2_1, f1_1, three)
|
160
|
-
f4_19 = v4(f2_1, two, f1_1)
|
161
|
-
f4_20 = v4(f2_1, f1_1, f1_1)
|
162
|
-
f4_21 = v4(one, f2_1, three)
|
163
|
-
f4_22 = v4(f1_1, f2_1, three)
|
164
|
-
f4_23 = v4(one, f2_1, f1_1)
|
165
|
-
f4_24 = v4(f1_1, f2_1, f1_1)
|
166
|
-
f4_25 = v4(one, four, f2_1)
|
167
|
-
f4_26 = v4(f1_1, four, f2_1)
|
168
|
-
f4_27 = v4(one, f1_1, f2_1)
|
169
|
-
f4_28 = v4(f1_1, f1_1, f2_1)
|
170
|
-
|
171
|
-
f4_29 = v4(f2_1, f2_1)
|
172
|
-
f4_30 = v4(f3_1, four)
|
173
|
-
f4_31 = v4(f3_1, f1_1)
|
174
|
-
f4_32 = v4(four, f3_1)
|
175
|
-
f4_33 = v4(f1_1, f3_1)
|
176
|
-
|
177
|
-
f4_34 = v4(f4_1)
|
178
|
-
|
179
|
-
for v in (
|
180
|
-
|
181
|
-
|
182
|
-
|
166
|
+
f4_17 = v4(f2_1, two, three) # 2 3 2 3
|
167
|
+
f4_18 = v4(f2_1, f1_1, three) # 2 3 1 3
|
168
|
+
f4_19 = v4(f2_1, two, f1_1) # 2 3 2 1
|
169
|
+
f4_20 = v4(f2_1, f1_1, f1_1) # 2 3 1 1
|
170
|
+
f4_21 = v4(one, f2_1, three) # 1 2 3 3
|
171
|
+
f4_22 = v4(f1_1, f2_1, three) # 1 2 3 3
|
172
|
+
f4_23 = v4(one, f2_1, f1_1) # 1 2 3 1
|
173
|
+
f4_24 = v4(f1_1, f2_1, f1_1) # 1 2 3 1
|
174
|
+
f4_25 = v4(one, four, f2_1) # 1 4 2 3
|
175
|
+
f4_26 = v4(f1_1, four, f2_1) # 1 4 2 3
|
176
|
+
f4_27 = v4(one, f1_1, f2_1) # 1 1 2 3
|
177
|
+
f4_28 = v4(f1_1, f1_1, f2_1) # 1 1 2 3
|
178
|
+
|
179
|
+
f4_29 = v4(f2_1, f2_1) # 2 3 2 3
|
180
|
+
f4_30 = v4(f3_1, four) # 2 3 1 4
|
181
|
+
f4_31 = v4(f3_1, f1_1) # 2 3 1 1
|
182
|
+
f4_32 = v4(four, f3_1) # 4 2 3 1
|
183
|
+
f4_33 = v4(f1_1, f3_1) # 1 2 3 1
|
184
|
+
|
185
|
+
f4_34 = v4(f4_1) # 1 2 3 4
|
186
|
+
|
187
|
+
for v in (
|
188
|
+
f4_1,
|
189
|
+
f4_2,
|
190
|
+
f4_3,
|
191
|
+
f4_4,
|
192
|
+
f4_5,
|
193
|
+
f4_6,
|
194
|
+
f4_7,
|
195
|
+
f4_8,
|
196
|
+
f4_9,
|
197
|
+
f4_10,
|
198
|
+
f4_11,
|
199
|
+
f4_12,
|
200
|
+
f4_13,
|
201
|
+
f4_14,
|
202
|
+
f4_15,
|
203
|
+
f4_16,
|
204
|
+
f4_17,
|
205
|
+
f4_18,
|
206
|
+
f4_19,
|
207
|
+
f4_20,
|
208
|
+
f4_21,
|
209
|
+
f4_22,
|
210
|
+
f4_23,
|
211
|
+
f4_24,
|
212
|
+
f4_25,
|
213
|
+
f4_26,
|
214
|
+
f4_27,
|
215
|
+
f4_28,
|
216
|
+
f4_29,
|
217
|
+
f4_30,
|
218
|
+
f4_31,
|
219
|
+
f4_32,
|
220
|
+
f4_33,
|
221
|
+
f4_34,
|
222
|
+
):
|
183
223
|
res[j] = v.x
|
184
224
|
res[j + 1] = v.y
|
185
225
|
res[j + 2] = v.z
|
@@ -190,13 +230,13 @@ def make_fancy_creation_kernel(vtype):
|
|
190
230
|
|
191
231
|
|
192
232
|
class TestCudaVectorType(CUDATestCase):
|
193
|
-
|
194
233
|
def test_basic(self):
|
195
234
|
"""Basic test that makes sure that vector type and aliases
|
196
235
|
are available within the cuda module from both device and
|
197
236
|
simulator mode. This is an important sanity check, since other
|
198
237
|
tests below tests the vector type objects programmatically.
|
199
238
|
"""
|
239
|
+
|
200
240
|
@cuda.jit("void(float64[:])")
|
201
241
|
def kernel(arr):
|
202
242
|
v1 = cuda.float64x4(1.0, 3.0, 5.0, 7.0)
|
@@ -227,66 +267,201 @@ class TestCudaVectorType(CUDATestCase):
|
|
227
267
|
with self.subTest(vty=vty):
|
228
268
|
kernel = make_fancy_creation_kernel(vty)
|
229
269
|
|
230
|
-
expected = np.array(
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
270
|
+
expected = np.array(
|
271
|
+
[
|
272
|
+
# 1-component vectors
|
273
|
+
1,
|
274
|
+
1,
|
275
|
+
# 2-component vectors
|
276
|
+
2,
|
277
|
+
3,
|
278
|
+
1,
|
279
|
+
3,
|
280
|
+
2,
|
281
|
+
1,
|
282
|
+
1,
|
283
|
+
1,
|
284
|
+
2,
|
285
|
+
3,
|
286
|
+
# 3-component vectors
|
287
|
+
2,
|
288
|
+
3,
|
289
|
+
1,
|
290
|
+
2,
|
291
|
+
3,
|
292
|
+
1,
|
293
|
+
1,
|
294
|
+
2,
|
295
|
+
3,
|
296
|
+
1,
|
297
|
+
2,
|
298
|
+
3,
|
299
|
+
1,
|
300
|
+
2,
|
301
|
+
3,
|
302
|
+
1,
|
303
|
+
2,
|
304
|
+
3,
|
305
|
+
1,
|
306
|
+
1,
|
307
|
+
3,
|
308
|
+
1,
|
309
|
+
2,
|
310
|
+
1,
|
311
|
+
1,
|
312
|
+
1,
|
313
|
+
3,
|
314
|
+
1,
|
315
|
+
1,
|
316
|
+
1,
|
317
|
+
1,
|
318
|
+
2,
|
319
|
+
1,
|
320
|
+
1,
|
321
|
+
1,
|
322
|
+
1,
|
323
|
+
2,
|
324
|
+
3,
|
325
|
+
1,
|
326
|
+
# 4-component vectors
|
327
|
+
1,
|
328
|
+
2,
|
329
|
+
3,
|
330
|
+
4,
|
331
|
+
1,
|
332
|
+
2,
|
333
|
+
3,
|
334
|
+
4,
|
335
|
+
1,
|
336
|
+
1,
|
337
|
+
3,
|
338
|
+
4,
|
339
|
+
1,
|
340
|
+
2,
|
341
|
+
1,
|
342
|
+
4,
|
343
|
+
1,
|
344
|
+
2,
|
345
|
+
3,
|
346
|
+
1,
|
347
|
+
1,
|
348
|
+
1,
|
349
|
+
3,
|
350
|
+
4,
|
351
|
+
1,
|
352
|
+
2,
|
353
|
+
1,
|
354
|
+
4,
|
355
|
+
1,
|
356
|
+
2,
|
357
|
+
3,
|
358
|
+
1,
|
359
|
+
1,
|
360
|
+
1,
|
361
|
+
1,
|
362
|
+
4,
|
363
|
+
1,
|
364
|
+
1,
|
365
|
+
3,
|
366
|
+
1,
|
367
|
+
1,
|
368
|
+
2,
|
369
|
+
1,
|
370
|
+
1,
|
371
|
+
1,
|
372
|
+
1,
|
373
|
+
1,
|
374
|
+
4,
|
375
|
+
1,
|
376
|
+
1,
|
377
|
+
3,
|
378
|
+
1,
|
379
|
+
1,
|
380
|
+
2,
|
381
|
+
1,
|
382
|
+
1,
|
383
|
+
1,
|
384
|
+
1,
|
385
|
+
1,
|
386
|
+
1,
|
387
|
+
1,
|
388
|
+
1,
|
389
|
+
1,
|
390
|
+
1,
|
391
|
+
2,
|
392
|
+
3,
|
393
|
+
2,
|
394
|
+
3,
|
395
|
+
2,
|
396
|
+
3,
|
397
|
+
1,
|
398
|
+
3,
|
399
|
+
2,
|
400
|
+
3,
|
401
|
+
2,
|
402
|
+
1,
|
403
|
+
2,
|
404
|
+
3,
|
405
|
+
1,
|
406
|
+
1,
|
407
|
+
1,
|
408
|
+
2,
|
409
|
+
3,
|
410
|
+
3,
|
411
|
+
1,
|
412
|
+
2,
|
413
|
+
3,
|
414
|
+
3,
|
415
|
+
1,
|
416
|
+
2,
|
417
|
+
3,
|
418
|
+
1,
|
419
|
+
1,
|
420
|
+
2,
|
421
|
+
3,
|
422
|
+
1,
|
423
|
+
1,
|
424
|
+
4,
|
425
|
+
2,
|
426
|
+
3,
|
427
|
+
1,
|
428
|
+
4,
|
429
|
+
2,
|
430
|
+
3,
|
431
|
+
1,
|
432
|
+
1,
|
433
|
+
2,
|
434
|
+
3,
|
435
|
+
1,
|
436
|
+
1,
|
437
|
+
2,
|
438
|
+
3,
|
439
|
+
2,
|
440
|
+
3,
|
441
|
+
2,
|
442
|
+
3,
|
443
|
+
2,
|
444
|
+
3,
|
445
|
+
1,
|
446
|
+
4,
|
447
|
+
2,
|
448
|
+
3,
|
449
|
+
1,
|
450
|
+
1,
|
451
|
+
4,
|
452
|
+
2,
|
453
|
+
3,
|
454
|
+
1,
|
455
|
+
1,
|
456
|
+
2,
|
457
|
+
3,
|
458
|
+
1,
|
459
|
+
1,
|
460
|
+
2,
|
461
|
+
3,
|
462
|
+
4,
|
463
|
+
]
|
464
|
+
)
|
290
465
|
arr = np.zeros(expected.shape)
|
291
466
|
kernel[1, 1](arr)
|
292
467
|
np.testing.assert_almost_equal(arr, expected)
|