numba-cuda 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +232 -113
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_fp16.h +661 -661
- numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
- numba_cuda/numba/cuda/cuda_paths.py +291 -99
- numba_cuda/numba/cuda/cudadecl.py +125 -69
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +463 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +317 -233
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +8 -6
- numba_cuda/numba/cuda/decorators.py +75 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +69 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +1 -1
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
- numba_cuda/numba/cuda/intrinsics.py +31 -27
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +139 -102
- numba_cuda/numba/cuda/target.py +64 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +2 -2
- numba_cuda/numba/cuda/vectorizers.py +37 -32
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
- numba_cuda-0.9.0.dist-info/RECORD +253 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.0.dist-info/RECORD +0 -251
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -14,14 +14,17 @@ class TestConstStringCodegen(unittest.TestCase):
|
|
14
14
|
|
15
15
|
targetctx = cuda_target.target_context
|
16
16
|
mod = targetctx.create_module("")
|
17
|
-
textstring =
|
17
|
+
textstring = "A Little Brown Fox"
|
18
18
|
gv0 = targetctx.insert_const_string(mod, textstring)
|
19
19
|
# Insert the same const string a second time - the first should be
|
20
20
|
# reused.
|
21
21
|
targetctx.insert_const_string(mod, textstring)
|
22
22
|
|
23
|
-
res = re.findall(
|
24
|
-
|
23
|
+
res = re.findall(
|
24
|
+
r"@\"__conststring__.*internal.*constant.*\["
|
25
|
+
r"19\s+x\s+i8\]",
|
26
|
+
str(mod),
|
27
|
+
)
|
25
28
|
# Ensure that the const string was only inserted once
|
26
29
|
self.assertEqual(len(res), 1)
|
27
30
|
|
@@ -30,12 +33,16 @@ class TestConstStringCodegen(unittest.TestCase):
|
|
30
33
|
# Using insert_const_string
|
31
34
|
fn = ir.Function(mod, fnty, "test_insert_const_string")
|
32
35
|
builder = ir.IRBuilder(fn.append_basic_block())
|
33
|
-
res = builder.addrspacecast(
|
34
|
-
|
36
|
+
res = builder.addrspacecast(
|
37
|
+
gv0, ir.PointerType(ir.IntType(8)), "generic"
|
38
|
+
)
|
35
39
|
builder.ret(res)
|
36
40
|
|
37
|
-
matches = re.findall(
|
38
|
-
|
41
|
+
matches = re.findall(
|
42
|
+
r"@\"__conststring__.*internal.*constant.*\["
|
43
|
+
r"19\s+x\s+i8\]",
|
44
|
+
str(mod),
|
45
|
+
)
|
39
46
|
self.assertEqual(len(matches), 1)
|
40
47
|
|
41
48
|
# Using insert_string_const_addrspace
|
@@ -44,11 +51,14 @@ class TestConstStringCodegen(unittest.TestCase):
|
|
44
51
|
res = targetctx.insert_string_const_addrspace(builder, textstring)
|
45
52
|
builder.ret(res)
|
46
53
|
|
47
|
-
matches = re.findall(
|
48
|
-
|
54
|
+
matches = re.findall(
|
55
|
+
r"@\"__conststring__.*internal.*constant.*\["
|
56
|
+
r"19\s+x\s+i8\]",
|
57
|
+
str(mod),
|
58
|
+
)
|
49
59
|
self.assertEqual(len(matches), 1)
|
50
60
|
|
51
|
-
ptx = compile_ir(str(mod)).decode(
|
61
|
+
ptx = compile_ir(str(mod)).decode("ascii")
|
52
62
|
matches = list(re.findall(r"\.const.*__conststring__", ptx))
|
53
63
|
|
54
64
|
self.assertEqual(len(matches), 1)
|
@@ -70,8 +80,8 @@ class TestConstString(CUDATestCase):
|
|
70
80
|
# Expected result, e.g.:
|
71
81
|
# ['XYZ' 'XYZ' 'XYZ' 'XYZ' 'XYZ' 'XYZ' 'XYZ' 'XYZ' '']
|
72
82
|
expected = np.zeros_like(arr)
|
73
|
-
expected[:-1] =
|
74
|
-
expected[-1] =
|
83
|
+
expected[:-1] = "XYZ"
|
84
|
+
expected[-1] = ""
|
75
85
|
np.testing.assert_equal(arr, expected)
|
76
86
|
|
77
87
|
def test_assign_const_byte_string(self):
|
@@ -88,42 +98,42 @@ class TestConstString(CUDATestCase):
|
|
88
98
|
# Expected result, e.g.:
|
89
99
|
# [b'XYZ' b'XYZ' b'XYZ' b'XYZ' b'XYZ' b'XYZ' b'XYZ' b'XYZ' b'']
|
90
100
|
expected = np.zeros_like(arr)
|
91
|
-
expected[:-1] = b
|
92
|
-
expected[-1] = b
|
101
|
+
expected[:-1] = b"XYZ"
|
102
|
+
expected[-1] = b""
|
93
103
|
np.testing.assert_equal(arr, expected)
|
94
104
|
|
95
105
|
def test_assign_const_string_in_record(self):
|
96
106
|
@cuda.jit
|
97
107
|
def f(a):
|
98
|
-
a[0][
|
99
|
-
a[0][
|
100
|
-
a[1][
|
101
|
-
a[1][
|
108
|
+
a[0]["x"] = 1
|
109
|
+
a[0]["y"] = "ABC"
|
110
|
+
a[1]["x"] = 2
|
111
|
+
a[1]["y"] = "XYZ"
|
102
112
|
|
103
|
-
dt = np.dtype([(
|
113
|
+
dt = np.dtype([("x", np.int32), ("y", np.dtype("<U12"))])
|
104
114
|
a = np.zeros(2, dt)
|
105
115
|
|
106
116
|
f[1, 1](a)
|
107
117
|
|
108
|
-
reference = np.asarray([(1,
|
118
|
+
reference = np.asarray([(1, "ABC"), (2, "XYZ")], dtype=dt)
|
109
119
|
np.testing.assert_array_equal(reference, a)
|
110
120
|
|
111
121
|
def test_assign_const_bytes_in_record(self):
|
112
122
|
@cuda.jit
|
113
123
|
def f(a):
|
114
|
-
a[0][
|
115
|
-
a[0][
|
116
|
-
a[1][
|
117
|
-
a[1][
|
124
|
+
a[0]["x"] = 1
|
125
|
+
a[0]["y"] = b"ABC"
|
126
|
+
a[1]["x"] = 2
|
127
|
+
a[1]["y"] = b"XYZ"
|
118
128
|
|
119
|
-
dt = np.dtype([(
|
129
|
+
dt = np.dtype([("x", np.float32), ("y", np.dtype("S12"))])
|
120
130
|
a = np.zeros(2, dt)
|
121
131
|
|
122
132
|
f[1, 1](a)
|
123
133
|
|
124
|
-
reference = np.asarray([(1, b
|
134
|
+
reference = np.asarray([(1, b"ABC"), (2, b"XYZ")], dtype=dt)
|
125
135
|
np.testing.assert_array_equal(reference, a)
|
126
136
|
|
127
137
|
|
128
|
-
if __name__ ==
|
138
|
+
if __name__ == "__main__":
|
129
139
|
unittest.main()
|
@@ -5,30 +5,26 @@ from numba.cuda.testing import unittest, CUDATestCase
|
|
5
5
|
from numba.core.config import ENABLE_CUDASIM
|
6
6
|
|
7
7
|
CONST_EMPTY = np.array([])
|
8
|
-
CONST1D = np.arange(10, dtype=np.float64) / 2.
|
9
|
-
CONST2D = np.asfortranarray(
|
10
|
-
|
11
|
-
CONST3D = ((np.arange(5 * 5 * 5, dtype=np.complex64).reshape(5, 5, 5) + 1j) /
|
12
|
-
2j)
|
8
|
+
CONST1D = np.arange(10, dtype=np.float64) / 2.0
|
9
|
+
CONST2D = np.asfortranarray(np.arange(100, dtype=np.int32).reshape(10, 10))
|
10
|
+
CONST3D = (np.arange(5 * 5 * 5, dtype=np.complex64).reshape(5, 5, 5) + 1j) / 2j
|
13
11
|
CONST3BYTES = np.arange(3, dtype=np.uint8)
|
14
12
|
|
15
|
-
CONST_RECORD_EMPTY = np.array(
|
16
|
-
|
17
|
-
dtype=[('x', float), ('y', int)])
|
18
|
-
CONST_RECORD = np.array(
|
19
|
-
[(1.0, 2), (3.0, 4)],
|
20
|
-
dtype=[('x', float), ('y', int)])
|
13
|
+
CONST_RECORD_EMPTY = np.array([], dtype=[("x", float), ("y", int)])
|
14
|
+
CONST_RECORD = np.array([(1.0, 2), (3.0, 4)], dtype=[("x", float), ("y", int)])
|
21
15
|
CONST_RECORD_ALIGN = np.array(
|
22
16
|
[(1, 2, 3, 0xDEADBEEF, 8), (4, 5, 6, 0xBEEFDEAD, 10)],
|
23
17
|
dtype=np.dtype(
|
24
18
|
dtype=[
|
25
|
-
(
|
26
|
-
(
|
27
|
-
(
|
28
|
-
(
|
29
|
-
(
|
19
|
+
("a", np.uint8),
|
20
|
+
("b", np.uint8),
|
21
|
+
("x", np.uint8),
|
22
|
+
("y", np.uint32),
|
23
|
+
("z", np.uint8),
|
30
24
|
],
|
31
|
-
align=True
|
25
|
+
align=True,
|
26
|
+
),
|
27
|
+
)
|
32
28
|
|
33
29
|
|
34
30
|
def cuconstEmpty(A):
|
@@ -68,18 +64,18 @@ def cuconstRecEmpty(A):
|
|
68
64
|
def cuconstRec(A, B):
|
69
65
|
C = cuda.const.array_like(CONST_RECORD)
|
70
66
|
i = cuda.grid(1)
|
71
|
-
A[i] = C[i][
|
72
|
-
B[i] = C[i][
|
67
|
+
A[i] = C[i]["x"]
|
68
|
+
B[i] = C[i]["y"]
|
73
69
|
|
74
70
|
|
75
71
|
def cuconstRecAlign(A, B, C, D, E):
|
76
72
|
Z = cuda.const.array_like(CONST_RECORD_ALIGN)
|
77
73
|
i = cuda.grid(1)
|
78
|
-
A[i] = Z[i][
|
79
|
-
B[i] = Z[i][
|
80
|
-
C[i] = Z[i][
|
81
|
-
D[i] = Z[i][
|
82
|
-
E[i] = Z[i][
|
74
|
+
A[i] = Z[i]["a"]
|
75
|
+
B[i] = Z[i]["b"]
|
76
|
+
C[i] = Z[i]["x"]
|
77
|
+
D[i] = Z[i]["y"]
|
78
|
+
E[i] = Z[i]["z"]
|
83
79
|
|
84
80
|
|
85
81
|
def cuconstAlign(z):
|
@@ -99,50 +95,52 @@ class TestCudaConstantMemory(CUDATestCase):
|
|
99
95
|
|
100
96
|
if not ENABLE_CUDASIM:
|
101
97
|
self.assertIn(
|
102
|
-
|
98
|
+
"ld.const.f64",
|
103
99
|
jcuconst.inspect_asm(sig),
|
104
|
-
"as we're adding to it, load as a double"
|
100
|
+
"as we're adding to it, load as a double",
|
101
|
+
)
|
105
102
|
|
106
103
|
def test_const_empty(self):
|
107
|
-
jcuconstEmpty = cuda.jit(
|
104
|
+
jcuconstEmpty = cuda.jit("void(int64[:])")(cuconstEmpty)
|
108
105
|
A = np.full(1, fill_value=-1, dtype=np.int64)
|
109
106
|
jcuconstEmpty[1, 1](A)
|
110
107
|
self.assertTrue(np.all(A == 0))
|
111
108
|
|
112
109
|
def test_const_align(self):
|
113
|
-
jcuconstAlign = cuda.jit(
|
110
|
+
jcuconstAlign = cuda.jit("void(float64[:])")(cuconstAlign)
|
114
111
|
A = np.full(3, fill_value=np.nan, dtype=float)
|
115
112
|
jcuconstAlign[1, 3](A)
|
116
113
|
self.assertTrue(np.all(A == (CONST3BYTES + CONST1D[:3])))
|
117
114
|
|
118
115
|
def test_const_array_2d(self):
|
119
|
-
sig = (int32[
|
116
|
+
sig = (int32[:, :],)
|
120
117
|
jcuconst2d = cuda.jit(sig)(cuconst2d)
|
121
|
-
A = np.zeros_like(CONST2D, order=
|
118
|
+
A = np.zeros_like(CONST2D, order="C")
|
122
119
|
jcuconst2d[(2, 2), (5, 5)](A)
|
123
120
|
self.assertTrue(np.all(A == CONST2D))
|
124
121
|
|
125
122
|
if not ENABLE_CUDASIM:
|
126
123
|
self.assertIn(
|
127
|
-
|
124
|
+
"ld.const.u32",
|
128
125
|
jcuconst2d.inspect_asm(sig),
|
129
|
-
"load the ints as ints"
|
126
|
+
"load the ints as ints",
|
127
|
+
)
|
130
128
|
|
131
129
|
def test_const_array_3d(self):
|
132
|
-
sig = (complex64[
|
130
|
+
sig = (complex64[:, :, :],)
|
133
131
|
jcuconst3d = cuda.jit(sig)(cuconst3d)
|
134
|
-
A = np.zeros_like(CONST3D, order=
|
132
|
+
A = np.zeros_like(CONST3D, order="F")
|
135
133
|
jcuconst3d[1, (5, 5, 5)](A)
|
136
134
|
self.assertTrue(np.all(A == CONST3D))
|
137
135
|
|
138
136
|
if not ENABLE_CUDASIM:
|
139
137
|
asm = jcuconst3d.inspect_asm(sig)
|
140
|
-
complex_load =
|
141
|
-
description =
|
138
|
+
complex_load = "ld.const.v2.f32"
|
139
|
+
description = "Load the complex as a vector of 2x f32"
|
142
140
|
self.assertIn(complex_load, asm, description)
|
143
141
|
|
144
142
|
def test_const_record_empty(self):
|
145
|
-
jcuconstRecEmpty = cuda.jit(
|
143
|
+
jcuconstRecEmpty = cuda.jit("void(int64[:])")(cuconstRecEmpty)
|
146
144
|
A = np.full(1, fill_value=-1, dtype=np.int64)
|
147
145
|
jcuconstRecEmpty[1, 1](A)
|
148
146
|
self.assertTrue(np.all(A == 0))
|
@@ -153,8 +151,8 @@ class TestCudaConstantMemory(CUDATestCase):
|
|
153
151
|
jcuconst = cuda.jit(cuconstRec).specialize(A, B)
|
154
152
|
|
155
153
|
jcuconst[2, 1](A, B)
|
156
|
-
np.testing.assert_allclose(A, CONST_RECORD[
|
157
|
-
np.testing.assert_allclose(B, CONST_RECORD[
|
154
|
+
np.testing.assert_allclose(A, CONST_RECORD["x"])
|
155
|
+
np.testing.assert_allclose(B, CONST_RECORD["y"])
|
158
156
|
|
159
157
|
def test_const_record_align(self):
|
160
158
|
A = np.zeros(2, dtype=np.float64)
|
@@ -165,12 +163,12 @@ class TestCudaConstantMemory(CUDATestCase):
|
|
165
163
|
jcuconst = cuda.jit(cuconstRecAlign).specialize(A, B, C, D, E)
|
166
164
|
|
167
165
|
jcuconst[2, 1](A, B, C, D, E)
|
168
|
-
np.testing.assert_allclose(A, CONST_RECORD_ALIGN[
|
169
|
-
np.testing.assert_allclose(B, CONST_RECORD_ALIGN[
|
170
|
-
np.testing.assert_allclose(C, CONST_RECORD_ALIGN[
|
171
|
-
np.testing.assert_allclose(D, CONST_RECORD_ALIGN[
|
172
|
-
np.testing.assert_allclose(E, CONST_RECORD_ALIGN[
|
166
|
+
np.testing.assert_allclose(A, CONST_RECORD_ALIGN["a"])
|
167
|
+
np.testing.assert_allclose(B, CONST_RECORD_ALIGN["b"])
|
168
|
+
np.testing.assert_allclose(C, CONST_RECORD_ALIGN["x"])
|
169
|
+
np.testing.assert_allclose(D, CONST_RECORD_ALIGN["y"])
|
170
|
+
np.testing.assert_allclose(E, CONST_RECORD_ALIGN["z"])
|
173
171
|
|
174
172
|
|
175
|
-
if __name__ ==
|
173
|
+
if __name__ == "__main__":
|
176
174
|
unittest.main()
|
@@ -3,9 +3,14 @@ from __future__ import print_function
|
|
3
3
|
import numpy as np
|
4
4
|
|
5
5
|
from numba import config, cuda, int32
|
6
|
-
from numba.cuda.testing import (
|
7
|
-
|
8
|
-
|
6
|
+
from numba.cuda.testing import (
|
7
|
+
unittest,
|
8
|
+
CUDATestCase,
|
9
|
+
skip_on_cudasim,
|
10
|
+
skip_unless_cc_60,
|
11
|
+
skip_if_cudadevrt_missing,
|
12
|
+
skip_if_mvc_enabled,
|
13
|
+
)
|
9
14
|
|
10
15
|
|
11
16
|
@cuda.jit
|
@@ -47,7 +52,7 @@ def sequential_rows(M):
|
|
47
52
|
|
48
53
|
|
49
54
|
@skip_if_cudadevrt_missing
|
50
|
-
@skip_if_mvc_enabled(
|
55
|
+
@skip_if_mvc_enabled("CG not supported with MVC")
|
51
56
|
class TestCudaCooperativeGroups(CUDATestCase):
|
52
57
|
@skip_unless_cc_60
|
53
58
|
def test_this_grid(self):
|
@@ -55,11 +60,12 @@ class TestCudaCooperativeGroups(CUDATestCase):
|
|
55
60
|
this_grid[1, 1](A)
|
56
61
|
|
57
62
|
# Ensure the kernel executed beyond the call to cuda.this_grid()
|
58
|
-
self.assertFalse(np.isnan(A[0]),
|
63
|
+
self.assertFalse(np.isnan(A[0]), "Value was not set")
|
59
64
|
|
60
65
|
@skip_unless_cc_60
|
61
|
-
@skip_on_cudasim(
|
62
|
-
|
66
|
+
@skip_on_cudasim(
|
67
|
+
"Simulator doesn't differentiate between normal and cooperative kernels"
|
68
|
+
)
|
63
69
|
def test_this_grid_is_cooperative(self):
|
64
70
|
A = np.full(1, fill_value=np.nan)
|
65
71
|
this_grid[1, 1](A)
|
@@ -74,11 +80,12 @@ class TestCudaCooperativeGroups(CUDATestCase):
|
|
74
80
|
sync_group[1, 1](A)
|
75
81
|
|
76
82
|
# Ensure the kernel executed beyond the call to cuda.sync_group()
|
77
|
-
self.assertFalse(np.isnan(A[0]),
|
83
|
+
self.assertFalse(np.isnan(A[0]), "Value was not set")
|
78
84
|
|
79
85
|
@skip_unless_cc_60
|
80
|
-
@skip_on_cudasim(
|
81
|
-
|
86
|
+
@skip_on_cudasim(
|
87
|
+
"Simulator doesn't differentiate between normal and cooperative kernels"
|
88
|
+
)
|
82
89
|
def test_sync_group_is_cooperative(self):
|
83
90
|
A = np.full(1, fill_value=np.nan)
|
84
91
|
sync_group[1, 1](A)
|
@@ -99,7 +106,7 @@ class TestCudaCooperativeGroups(CUDATestCase):
|
|
99
106
|
for key, overload in no_sync.overloads.items():
|
100
107
|
self.assertFalse(overload.cooperative)
|
101
108
|
for link in overload._codelibrary._linking_files:
|
102
|
-
self.assertNotIn(
|
109
|
+
self.assertNotIn("cudadevrt", link)
|
103
110
|
|
104
111
|
@skip_unless_cc_60
|
105
112
|
def test_sync_at_matrix_row(self):
|
@@ -113,7 +120,7 @@ class TestCudaCooperativeGroups(CUDATestCase):
|
|
113
120
|
blockdim = 32
|
114
121
|
griddim = A.shape[1] // blockdim
|
115
122
|
|
116
|
-
sig = (int32[
|
123
|
+
sig = (int32[:, ::1],)
|
117
124
|
c_sequential_rows = cuda.jit(sig)(sequential_rows)
|
118
125
|
|
119
126
|
overload = c_sequential_rows.overloads[sig]
|
@@ -133,7 +140,7 @@ class TestCudaCooperativeGroups(CUDATestCase):
|
|
133
140
|
# doesn't error, and that varying the number of dimensions of the block
|
134
141
|
# whilst keeping the total number of threads constant doesn't change
|
135
142
|
# the maximum to validate some of the logic.
|
136
|
-
sig = (int32[
|
143
|
+
sig = (int32[:, ::1],)
|
137
144
|
c_sequential_rows = cuda.jit(sig)(sequential_rows)
|
138
145
|
overload = c_sequential_rows.overloads[sig]
|
139
146
|
blocks1d = overload.max_cooperative_grid_blocks(256)
|
@@ -143,5 +150,5 @@ class TestCudaCooperativeGroups(CUDATestCase):
|
|
143
150
|
self.assertEqual(blocks1d, blocks3d)
|
144
151
|
|
145
152
|
|
146
|
-
if __name__ ==
|
153
|
+
if __name__ == "__main__":
|
147
154
|
unittest.main()
|