numba-cuda 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +246 -114
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
- numba_cuda/numba/cuda/cuda_paths.py +293 -99
- numba_cuda/numba/cuda/cudadecl.py +93 -79
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +27 -3
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +146 -30
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +296 -275
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +99 -7
- numba_cuda/numba/cuda/decorators.py +87 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +68 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +55 -1
- numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
- numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
- numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +1090 -927
- numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +468 -319
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
- numba_cuda/numba/cuda/intrinsics.py +203 -28
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/lowering.py +43 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +134 -108
- numba_cuda/numba/cuda/target.py +92 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +10 -7
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +59 -23
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +77 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +24 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +21 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +81 -30
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +5 -3
- numba_cuda/numba/cuda/vectorizers.py +38 -33
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
- numba_cuda-0.10.0.dist-info/RECORD +263 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.1.dist-info/RECORD +0 -251
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0
@@ -6,20 +6,34 @@ import numpy as np
|
|
6
6
|
from numba.cuda.testing import unittest, CUDATestCase
|
7
7
|
from numba.core import types
|
8
8
|
from numba import cuda
|
9
|
-
from numba.tests.complex_usecases import (
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
9
|
+
from numba.tests.complex_usecases import (
|
10
|
+
real_usecase,
|
11
|
+
imag_usecase,
|
12
|
+
conjugate_usecase,
|
13
|
+
phase_usecase,
|
14
|
+
polar_as_complex_usecase,
|
15
|
+
rect_usecase,
|
16
|
+
isnan_usecase,
|
17
|
+
isinf_usecase,
|
18
|
+
isfinite_usecase,
|
19
|
+
exp_usecase,
|
20
|
+
log_usecase,
|
21
|
+
log_base_usecase,
|
22
|
+
log10_usecase,
|
23
|
+
sqrt_usecase,
|
24
|
+
asin_usecase,
|
25
|
+
acos_usecase,
|
26
|
+
atan_usecase,
|
27
|
+
cos_usecase,
|
28
|
+
sin_usecase,
|
29
|
+
tan_usecase,
|
30
|
+
acosh_usecase,
|
31
|
+
asinh_usecase,
|
32
|
+
atanh_usecase,
|
33
|
+
cosh_usecase,
|
34
|
+
sinh_usecase,
|
35
|
+
tanh_usecase,
|
36
|
+
)
|
23
37
|
from numba.np import numpy_support
|
24
38
|
|
25
39
|
|
@@ -29,15 +43,18 @@ def compile_scalar_func(pyfunc, argtypes, restype):
|
|
29
43
|
assert not isinstance(restype, types.Array)
|
30
44
|
device_func = cuda.jit(restype(*argtypes), device=True)(pyfunc)
|
31
45
|
|
32
|
-
kernel_types = [
|
33
|
-
|
46
|
+
kernel_types = [
|
47
|
+
types.Array(tp, 1, "C") for tp in [restype] + list(argtypes)
|
48
|
+
]
|
34
49
|
|
35
50
|
if len(argtypes) == 1:
|
51
|
+
|
36
52
|
def kernel_func(out, a):
|
37
53
|
i = cuda.grid(1)
|
38
54
|
if i < out.shape[0]:
|
39
55
|
out[i] = device_func(a[i])
|
40
56
|
elif len(argtypes) == 2:
|
57
|
+
|
41
58
|
def kernel_func(out, a, b):
|
42
59
|
i = cuda.grid(1)
|
43
60
|
if i < out.shape[0]:
|
@@ -49,8 +66,9 @@ def compile_scalar_func(pyfunc, argtypes, restype):
|
|
49
66
|
|
50
67
|
def kernel_wrapper(values):
|
51
68
|
n = len(values)
|
52
|
-
inputs = [
|
53
|
-
|
69
|
+
inputs = [
|
70
|
+
np.empty(n, dtype=numpy_support.as_dtype(tp)) for tp in argtypes
|
71
|
+
]
|
54
72
|
output = np.empty(n, dtype=numpy_support.as_dtype(restype))
|
55
73
|
for i, vs in enumerate(values):
|
56
74
|
for v, inp in zip(vs, inputs):
|
@@ -58,42 +76,70 @@ def compile_scalar_func(pyfunc, argtypes, restype):
|
|
58
76
|
args = [output] + inputs
|
59
77
|
kernel[int(math.ceil(n / 256)), 256](*args)
|
60
78
|
return list(output)
|
79
|
+
|
61
80
|
return kernel_wrapper
|
62
81
|
|
63
82
|
|
64
83
|
class BaseComplexTest(CUDATestCase):
|
65
|
-
|
66
84
|
def basic_values(self):
|
67
|
-
reals = [
|
68
|
-
|
85
|
+
reals = [
|
86
|
+
-0.0,
|
87
|
+
+0.0,
|
88
|
+
1,
|
89
|
+
-1,
|
90
|
+
+1.5,
|
91
|
+
-3.5,
|
92
|
+
float("-inf"),
|
93
|
+
float("+inf"),
|
94
|
+
float("nan"),
|
95
|
+
]
|
69
96
|
return [complex(x, y) for x, y in itertools.product(reals, reals)]
|
70
97
|
|
71
98
|
def more_values(self):
|
72
|
-
reals = [
|
73
|
-
|
99
|
+
reals = [
|
100
|
+
0.0,
|
101
|
+
+0.0,
|
102
|
+
1,
|
103
|
+
-1,
|
104
|
+
-math.pi,
|
105
|
+
+math.pi,
|
106
|
+
float("-inf"),
|
107
|
+
float("+inf"),
|
108
|
+
float("nan"),
|
109
|
+
]
|
74
110
|
return [complex(x, y) for x, y in itertools.product(reals, reals)]
|
75
111
|
|
76
112
|
def non_nan_values(self):
|
77
|
-
reals = [
|
78
|
-
|
113
|
+
reals = [
|
114
|
+
-0.0,
|
115
|
+
+0.0,
|
116
|
+
1,
|
117
|
+
-1,
|
118
|
+
-math.pi,
|
119
|
+
+math.pi,
|
120
|
+
float("inf"),
|
121
|
+
float("-inf"),
|
122
|
+
]
|
79
123
|
return [complex(x, y) for x, y in itertools.product(reals, reals)]
|
80
124
|
|
81
125
|
def run_func(self, pyfunc, sigs, values, ulps=1, ignore_sign_on_zero=False):
|
82
126
|
for sig in sigs:
|
83
127
|
if isinstance(sig, types.Type):
|
84
|
-
sig = sig,
|
128
|
+
sig = (sig,)
|
85
129
|
if isinstance(sig, tuple):
|
86
130
|
# Assume return type is the type of first argument
|
87
131
|
sig = sig[0](*sig)
|
88
|
-
prec = (
|
89
|
-
|
90
|
-
|
132
|
+
prec = (
|
133
|
+
"single"
|
134
|
+
if sig.args[0] in (types.float32, types.complex64)
|
135
|
+
else "double"
|
136
|
+
)
|
91
137
|
cudafunc = compile_scalar_func(pyfunc, sig.args, sig.return_type)
|
92
138
|
ok_values = []
|
93
139
|
expected_list = []
|
94
140
|
for args in values:
|
95
141
|
if not isinstance(args, (list, tuple)):
|
96
|
-
args = args,
|
142
|
+
args = (args,)
|
97
143
|
try:
|
98
144
|
expected_list.append(pyfunc(*args))
|
99
145
|
ok_values.append(args)
|
@@ -102,24 +148,31 @@ class BaseComplexTest(CUDATestCase):
|
|
102
148
|
continue
|
103
149
|
got_list = cudafunc(ok_values)
|
104
150
|
for got, expected, args in zip(got_list, expected_list, ok_values):
|
105
|
-
msg =
|
106
|
-
self.assertPreciseEqual(
|
107
|
-
|
108
|
-
|
109
|
-
|
151
|
+
msg = "for input %r with prec %r" % (args, prec)
|
152
|
+
self.assertPreciseEqual(
|
153
|
+
got,
|
154
|
+
expected,
|
155
|
+
prec=prec,
|
156
|
+
ulps=ulps,
|
157
|
+
ignore_sign_on_zero=ignore_sign_on_zero,
|
158
|
+
msg=msg,
|
159
|
+
)
|
110
160
|
|
111
161
|
run_unary = run_func
|
112
162
|
run_binary = run_func
|
113
163
|
|
114
164
|
|
115
165
|
class TestComplex(BaseComplexTest):
|
116
|
-
|
117
166
|
def check_real_image(self, pyfunc):
|
118
167
|
values = self.basic_values()
|
119
|
-
self.run_unary(
|
120
|
-
|
121
|
-
|
122
|
-
|
168
|
+
self.run_unary(
|
169
|
+
pyfunc,
|
170
|
+
[
|
171
|
+
tp.underlying_float(tp)
|
172
|
+
for tp in (types.complex64, types.complex128)
|
173
|
+
],
|
174
|
+
values,
|
175
|
+
)
|
123
176
|
|
124
177
|
def test_real(self):
|
125
178
|
self.check_real_image(real_usecase)
|
@@ -130,9 +183,7 @@ class TestComplex(BaseComplexTest):
|
|
130
183
|
def test_conjugate(self):
|
131
184
|
pyfunc = conjugate_usecase
|
132
185
|
values = self.basic_values()
|
133
|
-
self.run_unary(pyfunc,
|
134
|
-
[types.complex64, types.complex128],
|
135
|
-
values)
|
186
|
+
self.run_unary(pyfunc, [types.complex64, types.complex128], values)
|
136
187
|
|
137
188
|
|
138
189
|
class TestCMath(BaseComplexTest):
|
@@ -141,26 +192,44 @@ class TestCMath(BaseComplexTest):
|
|
141
192
|
"""
|
142
193
|
|
143
194
|
def check_predicate_func(self, pyfunc):
|
144
|
-
self.run_unary(
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
195
|
+
self.run_unary(
|
196
|
+
pyfunc,
|
197
|
+
[types.boolean(tp) for tp in (types.complex128, types.complex64)],
|
198
|
+
self.basic_values(),
|
199
|
+
)
|
200
|
+
|
201
|
+
def check_unary_func(
|
202
|
+
self,
|
203
|
+
pyfunc,
|
204
|
+
ulps=1,
|
205
|
+
values=None,
|
206
|
+
returns_float=False,
|
207
|
+
ignore_sign_on_zero=False,
|
208
|
+
):
|
151
209
|
if returns_float:
|
210
|
+
|
152
211
|
def sig(tp):
|
153
212
|
return tp.underlying_float(tp)
|
154
213
|
else:
|
214
|
+
|
155
215
|
def sig(tp):
|
156
216
|
return tp(tp)
|
157
|
-
|
158
|
-
|
159
|
-
|
217
|
+
|
218
|
+
self.run_unary(
|
219
|
+
pyfunc,
|
220
|
+
[sig(types.complex128)],
|
221
|
+
values or self.more_values(),
|
222
|
+
ulps=ulps,
|
223
|
+
ignore_sign_on_zero=ignore_sign_on_zero,
|
224
|
+
)
|
160
225
|
# Avoid discontinuities around pi when in single precision.
|
161
|
-
self.run_unary(
|
162
|
-
|
163
|
-
|
226
|
+
self.run_unary(
|
227
|
+
pyfunc,
|
228
|
+
[sig(types.complex64)],
|
229
|
+
values or self.basic_values(),
|
230
|
+
ulps=ulps,
|
231
|
+
ignore_sign_on_zero=ignore_sign_on_zero,
|
232
|
+
)
|
164
233
|
|
165
234
|
# Conversions
|
166
235
|
|
@@ -172,11 +241,14 @@ class TestCMath(BaseComplexTest):
|
|
172
241
|
|
173
242
|
def test_rect(self):
|
174
243
|
def do_test(tp, seed_values):
|
175
|
-
values = [
|
176
|
-
|
244
|
+
values = [
|
245
|
+
(z.real, z.imag)
|
246
|
+
for z in seed_values
|
247
|
+
if not math.isinf(z.imag) or z.real == 0
|
248
|
+
]
|
177
249
|
float_type = tp.underlying_float
|
178
|
-
self.run_binary(rect_usecase, [tp(float_type, float_type)],
|
179
|
-
|
250
|
+
self.run_binary(rect_usecase, [tp(float_type, float_type)], values)
|
251
|
+
|
180
252
|
do_test(types.complex128, self.more_values())
|
181
253
|
# Avoid discontinuities around pi when in single precision.
|
182
254
|
do_test(types.complex64, self.basic_values())
|
@@ -202,10 +274,11 @@ class TestCMath(BaseComplexTest):
|
|
202
274
|
|
203
275
|
def test_log_base(self):
|
204
276
|
values = list(itertools.product(self.more_values(), self.more_values()))
|
205
|
-
value_types = [
|
206
|
-
|
207
|
-
|
208
|
-
|
277
|
+
value_types = [
|
278
|
+
(types.complex128, types.complex128),
|
279
|
+
(types.complex64, types.complex64),
|
280
|
+
]
|
281
|
+
self.run_binary(log_base_usecase, value_types, values, ulps=3)
|
209
282
|
|
210
283
|
def test_log10(self):
|
211
284
|
self.check_unary_func(log10_usecase)
|
@@ -222,8 +295,9 @@ class TestCMath(BaseComplexTest):
|
|
222
295
|
self.check_unary_func(asin_usecase, ulps=2)
|
223
296
|
|
224
297
|
def test_atan(self):
|
225
|
-
self.check_unary_func(
|
226
|
-
|
298
|
+
self.check_unary_func(
|
299
|
+
atan_usecase, ulps=2, values=self.non_nan_values()
|
300
|
+
)
|
227
301
|
|
228
302
|
def test_cos(self):
|
229
303
|
self.check_unary_func(cos_usecase, ulps=2)
|
@@ -233,8 +307,7 @@ class TestCMath(BaseComplexTest):
|
|
233
307
|
self.check_unary_func(sin_usecase, ulps=2)
|
234
308
|
|
235
309
|
def test_tan(self):
|
236
|
-
self.check_unary_func(tan_usecase, ulps=2,
|
237
|
-
ignore_sign_on_zero=True)
|
310
|
+
self.check_unary_func(tan_usecase, ulps=2, ignore_sign_on_zero=True)
|
238
311
|
|
239
312
|
# Hyperbolic functions
|
240
313
|
|
@@ -245,8 +318,7 @@ class TestCMath(BaseComplexTest):
|
|
245
318
|
self.check_unary_func(asinh_usecase, ulps=2)
|
246
319
|
|
247
320
|
def test_atanh(self):
|
248
|
-
self.check_unary_func(atanh_usecase, ulps=2,
|
249
|
-
ignore_sign_on_zero=True)
|
321
|
+
self.check_unary_func(atanh_usecase, ulps=2, ignore_sign_on_zero=True)
|
250
322
|
|
251
323
|
def test_cosh(self):
|
252
324
|
self.check_unary_func(cosh_usecase, ulps=2)
|
@@ -255,8 +327,7 @@ class TestCMath(BaseComplexTest):
|
|
255
327
|
self.check_unary_func(sinh_usecase, ulps=2)
|
256
328
|
|
257
329
|
def test_tanh(self):
|
258
|
-
self.check_unary_func(tanh_usecase, ulps=2,
|
259
|
-
ignore_sign_on_zero=True)
|
330
|
+
self.check_unary_func(tanh_usecase, ulps=2, ignore_sign_on_zero=True)
|
260
331
|
|
261
332
|
|
262
333
|
class TestAtomicOnComplexComponents(CUDATestCase):
|
@@ -292,5 +363,5 @@ class TestAtomicOnComplexComponents(CUDATestCase):
|
|
292
363
|
np.testing.assert_equal(arr1 + 1j, arr2)
|
293
364
|
|
294
365
|
|
295
|
-
if __name__ ==
|
366
|
+
if __name__ == "__main__":
|
296
367
|
unittest.main()
|
@@ -5,7 +5,7 @@ from numba.cuda.testing import unittest, CUDATestCase
|
|
5
5
|
|
6
6
|
class TestCudaComplex(CUDATestCase):
|
7
7
|
def test_cuda_complex_arg(self):
|
8
|
-
@cuda.jit(
|
8
|
+
@cuda.jit("void(complex128[:], complex128)")
|
9
9
|
def foo(a, b):
|
10
10
|
i = cuda.grid(1)
|
11
11
|
a[i] += b
|
@@ -16,5 +16,5 @@ class TestCudaComplex(CUDATestCase):
|
|
16
16
|
self.assertTrue(np.allclose(a, a0 + 2j))
|
17
17
|
|
18
18
|
|
19
|
-
if __name__ ==
|
19
|
+
if __name__ == "__main__":
|
20
20
|
unittest.main()
|
@@ -14,14 +14,17 @@ class TestConstStringCodegen(unittest.TestCase):
|
|
14
14
|
|
15
15
|
targetctx = cuda_target.target_context
|
16
16
|
mod = targetctx.create_module("")
|
17
|
-
textstring =
|
17
|
+
textstring = "A Little Brown Fox"
|
18
18
|
gv0 = targetctx.insert_const_string(mod, textstring)
|
19
19
|
# Insert the same const string a second time - the first should be
|
20
20
|
# reused.
|
21
21
|
targetctx.insert_const_string(mod, textstring)
|
22
22
|
|
23
|
-
res = re.findall(
|
24
|
-
|
23
|
+
res = re.findall(
|
24
|
+
r"@\"__conststring__.*internal.*constant.*\["
|
25
|
+
r"19\s+x\s+i8\]",
|
26
|
+
str(mod),
|
27
|
+
)
|
25
28
|
# Ensure that the const string was only inserted once
|
26
29
|
self.assertEqual(len(res), 1)
|
27
30
|
|
@@ -30,12 +33,16 @@ class TestConstStringCodegen(unittest.TestCase):
|
|
30
33
|
# Using insert_const_string
|
31
34
|
fn = ir.Function(mod, fnty, "test_insert_const_string")
|
32
35
|
builder = ir.IRBuilder(fn.append_basic_block())
|
33
|
-
res = builder.addrspacecast(
|
34
|
-
|
36
|
+
res = builder.addrspacecast(
|
37
|
+
gv0, ir.PointerType(ir.IntType(8)), "generic"
|
38
|
+
)
|
35
39
|
builder.ret(res)
|
36
40
|
|
37
|
-
matches = re.findall(
|
38
|
-
|
41
|
+
matches = re.findall(
|
42
|
+
r"@\"__conststring__.*internal.*constant.*\["
|
43
|
+
r"19\s+x\s+i8\]",
|
44
|
+
str(mod),
|
45
|
+
)
|
39
46
|
self.assertEqual(len(matches), 1)
|
40
47
|
|
41
48
|
# Using insert_string_const_addrspace
|
@@ -44,11 +51,14 @@ class TestConstStringCodegen(unittest.TestCase):
|
|
44
51
|
res = targetctx.insert_string_const_addrspace(builder, textstring)
|
45
52
|
builder.ret(res)
|
46
53
|
|
47
|
-
matches = re.findall(
|
48
|
-
|
54
|
+
matches = re.findall(
|
55
|
+
r"@\"__conststring__.*internal.*constant.*\["
|
56
|
+
r"19\s+x\s+i8\]",
|
57
|
+
str(mod),
|
58
|
+
)
|
49
59
|
self.assertEqual(len(matches), 1)
|
50
60
|
|
51
|
-
ptx = compile_ir(str(mod)).decode(
|
61
|
+
ptx = compile_ir(str(mod)).decode("ascii")
|
52
62
|
matches = list(re.findall(r"\.const.*__conststring__", ptx))
|
53
63
|
|
54
64
|
self.assertEqual(len(matches), 1)
|
@@ -70,8 +80,8 @@ class TestConstString(CUDATestCase):
|
|
70
80
|
# Expected result, e.g.:
|
71
81
|
# ['XYZ' 'XYZ' 'XYZ' 'XYZ' 'XYZ' 'XYZ' 'XYZ' 'XYZ' '']
|
72
82
|
expected = np.zeros_like(arr)
|
73
|
-
expected[:-1] =
|
74
|
-
expected[-1] =
|
83
|
+
expected[:-1] = "XYZ"
|
84
|
+
expected[-1] = ""
|
75
85
|
np.testing.assert_equal(arr, expected)
|
76
86
|
|
77
87
|
def test_assign_const_byte_string(self):
|
@@ -88,42 +98,42 @@ class TestConstString(CUDATestCase):
|
|
88
98
|
# Expected result, e.g.:
|
89
99
|
# [b'XYZ' b'XYZ' b'XYZ' b'XYZ' b'XYZ' b'XYZ' b'XYZ' b'XYZ' b'']
|
90
100
|
expected = np.zeros_like(arr)
|
91
|
-
expected[:-1] = b
|
92
|
-
expected[-1] = b
|
101
|
+
expected[:-1] = b"XYZ"
|
102
|
+
expected[-1] = b""
|
93
103
|
np.testing.assert_equal(arr, expected)
|
94
104
|
|
95
105
|
def test_assign_const_string_in_record(self):
|
96
106
|
@cuda.jit
|
97
107
|
def f(a):
|
98
|
-
a[0][
|
99
|
-
a[0][
|
100
|
-
a[1][
|
101
|
-
a[1][
|
108
|
+
a[0]["x"] = 1
|
109
|
+
a[0]["y"] = "ABC"
|
110
|
+
a[1]["x"] = 2
|
111
|
+
a[1]["y"] = "XYZ"
|
102
112
|
|
103
|
-
dt = np.dtype([(
|
113
|
+
dt = np.dtype([("x", np.int32), ("y", np.dtype("<U12"))])
|
104
114
|
a = np.zeros(2, dt)
|
105
115
|
|
106
116
|
f[1, 1](a)
|
107
117
|
|
108
|
-
reference = np.asarray([(1,
|
118
|
+
reference = np.asarray([(1, "ABC"), (2, "XYZ")], dtype=dt)
|
109
119
|
np.testing.assert_array_equal(reference, a)
|
110
120
|
|
111
121
|
def test_assign_const_bytes_in_record(self):
|
112
122
|
@cuda.jit
|
113
123
|
def f(a):
|
114
|
-
a[0][
|
115
|
-
a[0][
|
116
|
-
a[1][
|
117
|
-
a[1][
|
124
|
+
a[0]["x"] = 1
|
125
|
+
a[0]["y"] = b"ABC"
|
126
|
+
a[1]["x"] = 2
|
127
|
+
a[1]["y"] = b"XYZ"
|
118
128
|
|
119
|
-
dt = np.dtype([(
|
129
|
+
dt = np.dtype([("x", np.float32), ("y", np.dtype("S12"))])
|
120
130
|
a = np.zeros(2, dt)
|
121
131
|
|
122
132
|
f[1, 1](a)
|
123
133
|
|
124
|
-
reference = np.asarray([(1, b
|
134
|
+
reference = np.asarray([(1, b"ABC"), (2, b"XYZ")], dtype=dt)
|
125
135
|
np.testing.assert_array_equal(reference, a)
|
126
136
|
|
127
137
|
|
128
|
-
if __name__ ==
|
138
|
+
if __name__ == "__main__":
|
129
139
|
unittest.main()
|
@@ -5,30 +5,26 @@ from numba.cuda.testing import unittest, CUDATestCase
|
|
5
5
|
from numba.core.config import ENABLE_CUDASIM
|
6
6
|
|
7
7
|
CONST_EMPTY = np.array([])
|
8
|
-
CONST1D = np.arange(10, dtype=np.float64) / 2.
|
9
|
-
CONST2D = np.asfortranarray(
|
10
|
-
|
11
|
-
CONST3D = ((np.arange(5 * 5 * 5, dtype=np.complex64).reshape(5, 5, 5) + 1j) /
|
12
|
-
2j)
|
8
|
+
CONST1D = np.arange(10, dtype=np.float64) / 2.0
|
9
|
+
CONST2D = np.asfortranarray(np.arange(100, dtype=np.int32).reshape(10, 10))
|
10
|
+
CONST3D = (np.arange(5 * 5 * 5, dtype=np.complex64).reshape(5, 5, 5) + 1j) / 2j
|
13
11
|
CONST3BYTES = np.arange(3, dtype=np.uint8)
|
14
12
|
|
15
|
-
CONST_RECORD_EMPTY = np.array(
|
16
|
-
|
17
|
-
dtype=[('x', float), ('y', int)])
|
18
|
-
CONST_RECORD = np.array(
|
19
|
-
[(1.0, 2), (3.0, 4)],
|
20
|
-
dtype=[('x', float), ('y', int)])
|
13
|
+
CONST_RECORD_EMPTY = np.array([], dtype=[("x", float), ("y", int)])
|
14
|
+
CONST_RECORD = np.array([(1.0, 2), (3.0, 4)], dtype=[("x", float), ("y", int)])
|
21
15
|
CONST_RECORD_ALIGN = np.array(
|
22
16
|
[(1, 2, 3, 0xDEADBEEF, 8), (4, 5, 6, 0xBEEFDEAD, 10)],
|
23
17
|
dtype=np.dtype(
|
24
18
|
dtype=[
|
25
|
-
(
|
26
|
-
(
|
27
|
-
(
|
28
|
-
(
|
29
|
-
(
|
19
|
+
("a", np.uint8),
|
20
|
+
("b", np.uint8),
|
21
|
+
("x", np.uint8),
|
22
|
+
("y", np.uint32),
|
23
|
+
("z", np.uint8),
|
30
24
|
],
|
31
|
-
align=True
|
25
|
+
align=True,
|
26
|
+
),
|
27
|
+
)
|
32
28
|
|
33
29
|
|
34
30
|
def cuconstEmpty(A):
|
@@ -68,18 +64,18 @@ def cuconstRecEmpty(A):
|
|
68
64
|
def cuconstRec(A, B):
|
69
65
|
C = cuda.const.array_like(CONST_RECORD)
|
70
66
|
i = cuda.grid(1)
|
71
|
-
A[i] = C[i][
|
72
|
-
B[i] = C[i][
|
67
|
+
A[i] = C[i]["x"]
|
68
|
+
B[i] = C[i]["y"]
|
73
69
|
|
74
70
|
|
75
71
|
def cuconstRecAlign(A, B, C, D, E):
|
76
72
|
Z = cuda.const.array_like(CONST_RECORD_ALIGN)
|
77
73
|
i = cuda.grid(1)
|
78
|
-
A[i] = Z[i][
|
79
|
-
B[i] = Z[i][
|
80
|
-
C[i] = Z[i][
|
81
|
-
D[i] = Z[i][
|
82
|
-
E[i] = Z[i][
|
74
|
+
A[i] = Z[i]["a"]
|
75
|
+
B[i] = Z[i]["b"]
|
76
|
+
C[i] = Z[i]["x"]
|
77
|
+
D[i] = Z[i]["y"]
|
78
|
+
E[i] = Z[i]["z"]
|
83
79
|
|
84
80
|
|
85
81
|
def cuconstAlign(z):
|
@@ -99,50 +95,52 @@ class TestCudaConstantMemory(CUDATestCase):
|
|
99
95
|
|
100
96
|
if not ENABLE_CUDASIM:
|
101
97
|
self.assertIn(
|
102
|
-
|
98
|
+
"ld.const.f64",
|
103
99
|
jcuconst.inspect_asm(sig),
|
104
|
-
"as we're adding to it, load as a double"
|
100
|
+
"as we're adding to it, load as a double",
|
101
|
+
)
|
105
102
|
|
106
103
|
def test_const_empty(self):
|
107
|
-
jcuconstEmpty = cuda.jit(
|
104
|
+
jcuconstEmpty = cuda.jit("void(int64[:])")(cuconstEmpty)
|
108
105
|
A = np.full(1, fill_value=-1, dtype=np.int64)
|
109
106
|
jcuconstEmpty[1, 1](A)
|
110
107
|
self.assertTrue(np.all(A == 0))
|
111
108
|
|
112
109
|
def test_const_align(self):
|
113
|
-
jcuconstAlign = cuda.jit(
|
110
|
+
jcuconstAlign = cuda.jit("void(float64[:])")(cuconstAlign)
|
114
111
|
A = np.full(3, fill_value=np.nan, dtype=float)
|
115
112
|
jcuconstAlign[1, 3](A)
|
116
113
|
self.assertTrue(np.all(A == (CONST3BYTES + CONST1D[:3])))
|
117
114
|
|
118
115
|
def test_const_array_2d(self):
|
119
|
-
sig = (int32[
|
116
|
+
sig = (int32[:, :],)
|
120
117
|
jcuconst2d = cuda.jit(sig)(cuconst2d)
|
121
|
-
A = np.zeros_like(CONST2D, order=
|
118
|
+
A = np.zeros_like(CONST2D, order="C")
|
122
119
|
jcuconst2d[(2, 2), (5, 5)](A)
|
123
120
|
self.assertTrue(np.all(A == CONST2D))
|
124
121
|
|
125
122
|
if not ENABLE_CUDASIM:
|
126
123
|
self.assertIn(
|
127
|
-
|
124
|
+
"ld.const.u32",
|
128
125
|
jcuconst2d.inspect_asm(sig),
|
129
|
-
"load the ints as ints"
|
126
|
+
"load the ints as ints",
|
127
|
+
)
|
130
128
|
|
131
129
|
def test_const_array_3d(self):
|
132
|
-
sig = (complex64[
|
130
|
+
sig = (complex64[:, :, :],)
|
133
131
|
jcuconst3d = cuda.jit(sig)(cuconst3d)
|
134
|
-
A = np.zeros_like(CONST3D, order=
|
132
|
+
A = np.zeros_like(CONST3D, order="F")
|
135
133
|
jcuconst3d[1, (5, 5, 5)](A)
|
136
134
|
self.assertTrue(np.all(A == CONST3D))
|
137
135
|
|
138
136
|
if not ENABLE_CUDASIM:
|
139
137
|
asm = jcuconst3d.inspect_asm(sig)
|
140
|
-
complex_load =
|
141
|
-
description =
|
138
|
+
complex_load = "ld.const.v2.f32"
|
139
|
+
description = "Load the complex as a vector of 2x f32"
|
142
140
|
self.assertIn(complex_load, asm, description)
|
143
141
|
|
144
142
|
def test_const_record_empty(self):
|
145
|
-
jcuconstRecEmpty = cuda.jit(
|
143
|
+
jcuconstRecEmpty = cuda.jit("void(int64[:])")(cuconstRecEmpty)
|
146
144
|
A = np.full(1, fill_value=-1, dtype=np.int64)
|
147
145
|
jcuconstRecEmpty[1, 1](A)
|
148
146
|
self.assertTrue(np.all(A == 0))
|
@@ -153,8 +151,8 @@ class TestCudaConstantMemory(CUDATestCase):
|
|
153
151
|
jcuconst = cuda.jit(cuconstRec).specialize(A, B)
|
154
152
|
|
155
153
|
jcuconst[2, 1](A, B)
|
156
|
-
np.testing.assert_allclose(A, CONST_RECORD[
|
157
|
-
np.testing.assert_allclose(B, CONST_RECORD[
|
154
|
+
np.testing.assert_allclose(A, CONST_RECORD["x"])
|
155
|
+
np.testing.assert_allclose(B, CONST_RECORD["y"])
|
158
156
|
|
159
157
|
def test_const_record_align(self):
|
160
158
|
A = np.zeros(2, dtype=np.float64)
|
@@ -165,12 +163,12 @@ class TestCudaConstantMemory(CUDATestCase):
|
|
165
163
|
jcuconst = cuda.jit(cuconstRecAlign).specialize(A, B, C, D, E)
|
166
164
|
|
167
165
|
jcuconst[2, 1](A, B, C, D, E)
|
168
|
-
np.testing.assert_allclose(A, CONST_RECORD_ALIGN[
|
169
|
-
np.testing.assert_allclose(B, CONST_RECORD_ALIGN[
|
170
|
-
np.testing.assert_allclose(C, CONST_RECORD_ALIGN[
|
171
|
-
np.testing.assert_allclose(D, CONST_RECORD_ALIGN[
|
172
|
-
np.testing.assert_allclose(E, CONST_RECORD_ALIGN[
|
166
|
+
np.testing.assert_allclose(A, CONST_RECORD_ALIGN["a"])
|
167
|
+
np.testing.assert_allclose(B, CONST_RECORD_ALIGN["b"])
|
168
|
+
np.testing.assert_allclose(C, CONST_RECORD_ALIGN["x"])
|
169
|
+
np.testing.assert_allclose(D, CONST_RECORD_ALIGN["y"])
|
170
|
+
np.testing.assert_allclose(E, CONST_RECORD_ALIGN["z"])
|
173
171
|
|
174
172
|
|
175
|
-
if __name__ ==
|
173
|
+
if __name__ == "__main__":
|
176
174
|
unittest.main()
|