numba-cuda 0.8.1__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +232 -113
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_fp16.h +661 -661
- numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
- numba_cuda/numba/cuda/cuda_paths.py +291 -99
- numba_cuda/numba/cuda/cudadecl.py +125 -69
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +317 -233
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +8 -6
- numba_cuda/numba/cuda/decorators.py +75 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +69 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +1 -1
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
- numba_cuda/numba/cuda/intrinsics.py +31 -27
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +139 -102
- numba_cuda/numba/cuda/target.py +64 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +2 -2
- numba_cuda/numba/cuda/vectorizers.py +37 -32
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
- numba_cuda-0.9.0.dist-info/RECORD +253 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.1.dist-info/RECORD +0 -251
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -8,6 +8,7 @@ import numpy as np
|
|
8
8
|
|
9
9
|
# Dummy function definitions to overload
|
10
10
|
|
11
|
+
|
11
12
|
def generic_func_1():
|
12
13
|
pass
|
13
14
|
|
@@ -83,109 +84,124 @@ CUDA_TARGET_OL_CALLS_TARGET_OL = 43
|
|
83
84
|
|
84
85
|
# Overload implementations
|
85
86
|
|
86
|
-
|
87
|
+
|
88
|
+
@overload(generic_func_1, target="generic")
|
87
89
|
def ol_generic_func_1(x):
|
88
90
|
def impl(x):
|
89
91
|
x[0] *= GENERIC_FUNCTION_1
|
92
|
+
|
90
93
|
return impl
|
91
94
|
|
92
95
|
|
93
|
-
@overload(cuda_func_1, target=
|
96
|
+
@overload(cuda_func_1, target="cuda")
|
94
97
|
def ol_cuda_func_1(x):
|
95
98
|
def impl(x):
|
96
99
|
x[0] *= CUDA_FUNCTION_1
|
100
|
+
|
97
101
|
return impl
|
98
102
|
|
99
103
|
|
100
|
-
@overload(generic_func_2, target=
|
104
|
+
@overload(generic_func_2, target="generic")
|
101
105
|
def ol_generic_func_2(x):
|
102
106
|
def impl(x):
|
103
107
|
x[0] *= GENERIC_FUNCTION_2
|
108
|
+
|
104
109
|
return impl
|
105
110
|
|
106
111
|
|
107
|
-
@overload(cuda_func_2, target=
|
112
|
+
@overload(cuda_func_2, target="cuda")
|
108
113
|
def ol_cuda_func(x):
|
109
114
|
def impl(x):
|
110
115
|
x[0] *= CUDA_FUNCTION_2
|
116
|
+
|
111
117
|
return impl
|
112
118
|
|
113
119
|
|
114
|
-
@overload(generic_calls_generic, target=
|
120
|
+
@overload(generic_calls_generic, target="generic")
|
115
121
|
def ol_generic_calls_generic(x):
|
116
122
|
def impl(x):
|
117
123
|
x[0] *= GENERIC_CALLS_GENERIC
|
118
124
|
generic_func_1(x)
|
125
|
+
|
119
126
|
return impl
|
120
127
|
|
121
128
|
|
122
|
-
@overload(generic_calls_cuda, target=
|
129
|
+
@overload(generic_calls_cuda, target="generic")
|
123
130
|
def ol_generic_calls_cuda(x):
|
124
131
|
def impl(x):
|
125
132
|
x[0] *= GENERIC_CALLS_CUDA
|
126
133
|
cuda_func_1(x)
|
134
|
+
|
127
135
|
return impl
|
128
136
|
|
129
137
|
|
130
|
-
@overload(cuda_calls_generic, target=
|
138
|
+
@overload(cuda_calls_generic, target="cuda")
|
131
139
|
def ol_cuda_calls_generic(x):
|
132
140
|
def impl(x):
|
133
141
|
x[0] *= CUDA_CALLS_GENERIC
|
134
142
|
generic_func_1(x)
|
143
|
+
|
135
144
|
return impl
|
136
145
|
|
137
146
|
|
138
|
-
@overload(cuda_calls_cuda, target=
|
147
|
+
@overload(cuda_calls_cuda, target="cuda")
|
139
148
|
def ol_cuda_calls_cuda(x):
|
140
149
|
def impl(x):
|
141
150
|
x[0] *= CUDA_CALLS_CUDA
|
142
151
|
cuda_func_1(x)
|
152
|
+
|
143
153
|
return impl
|
144
154
|
|
145
155
|
|
146
|
-
@overload(target_overloaded, target=
|
156
|
+
@overload(target_overloaded, target="generic")
|
147
157
|
def ol_target_overloaded_generic(x):
|
148
158
|
def impl(x):
|
149
159
|
x[0] *= GENERIC_TARGET_OL
|
160
|
+
|
150
161
|
return impl
|
151
162
|
|
152
163
|
|
153
|
-
@overload(target_overloaded, target=
|
164
|
+
@overload(target_overloaded, target="cuda")
|
154
165
|
def ol_target_overloaded_cuda(x):
|
155
166
|
def impl(x):
|
156
167
|
x[0] *= CUDA_TARGET_OL
|
168
|
+
|
157
169
|
return impl
|
158
170
|
|
159
171
|
|
160
|
-
@overload(generic_calls_target_overloaded, target=
|
172
|
+
@overload(generic_calls_target_overloaded, target="generic")
|
161
173
|
def ol_generic_calls_target_overloaded(x):
|
162
174
|
def impl(x):
|
163
175
|
x[0] *= GENERIC_CALLS_TARGET_OL
|
164
176
|
target_overloaded(x)
|
177
|
+
|
165
178
|
return impl
|
166
179
|
|
167
180
|
|
168
|
-
@overload(cuda_calls_target_overloaded, target=
|
181
|
+
@overload(cuda_calls_target_overloaded, target="cuda")
|
169
182
|
def ol_cuda_calls_target_overloaded(x):
|
170
183
|
def impl(x):
|
171
184
|
x[0] *= CUDA_CALLS_TARGET_OL
|
172
185
|
target_overloaded(x)
|
186
|
+
|
173
187
|
return impl
|
174
188
|
|
175
189
|
|
176
|
-
@overload(target_overloaded_calls_target_overloaded, target=
|
190
|
+
@overload(target_overloaded_calls_target_overloaded, target="generic")
|
177
191
|
def ol_generic_calls_target_overloaded_generic(x):
|
178
192
|
def impl(x):
|
179
193
|
x[0] *= GENERIC_TARGET_OL_CALLS_TARGET_OL
|
180
194
|
target_overloaded(x)
|
195
|
+
|
181
196
|
return impl
|
182
197
|
|
183
198
|
|
184
|
-
@overload(target_overloaded_calls_target_overloaded, target=
|
199
|
+
@overload(target_overloaded_calls_target_overloaded, target="cuda")
|
185
200
|
def ol_generic_calls_target_overloaded_cuda(x):
|
186
201
|
def impl(x):
|
187
202
|
x[0] *= CUDA_TARGET_OL_CALLS_TARGET_OL
|
188
203
|
target_overloaded(x)
|
204
|
+
|
189
205
|
return impl
|
190
206
|
|
191
207
|
|
@@ -193,10 +209,11 @@ def ol_generic_calls_target_overloaded_cuda(x):
|
|
193
209
|
def ol_default_values_and_kwargs(out, x, y=5, z=6):
|
194
210
|
def impl(out, x, y=5, z=6):
|
195
211
|
out[0], out[1] = x + y, z
|
212
|
+
|
196
213
|
return impl
|
197
214
|
|
198
215
|
|
199
|
-
@skip_on_cudasim(
|
216
|
+
@skip_on_cudasim("Overloading not supported in cudasim")
|
200
217
|
class TestOverload(CUDATestCase):
|
201
218
|
def check_overload(self, kernel, expected):
|
202
219
|
x = np.ones(1, dtype=np.int32)
|
@@ -311,7 +328,7 @@ class TestOverload(CUDATestCase):
|
|
311
328
|
MyDummy, MyDummyType = self.make_dummy_type()
|
312
329
|
mydummy_type = typeof(MyDummy())
|
313
330
|
|
314
|
-
@overload_attribute(MyDummyType,
|
331
|
+
@overload_attribute(MyDummyType, "cuda_only", target="cuda")
|
315
332
|
def ov_dummy_cuda_attr(obj):
|
316
333
|
def imp(obj):
|
317
334
|
return 42
|
@@ -330,6 +347,7 @@ class TestOverload(CUDATestCase):
|
|
330
347
|
msg = "Unknown attribute 'cuda_only'"
|
331
348
|
|
332
349
|
with self.assertRaisesRegex(TypingError, msg):
|
350
|
+
|
333
351
|
@njit(types.int64(mydummy_type))
|
334
352
|
def illegal_target_attr_use(x):
|
335
353
|
return x.cuda_only
|
@@ -345,14 +363,15 @@ class TestOverload(CUDATestCase):
|
|
345
363
|
"""
|
346
364
|
Test default values and kwargs.
|
347
365
|
"""
|
366
|
+
|
348
367
|
@cuda.jit()
|
349
368
|
def kernel(a, b, out):
|
350
369
|
default_values_and_kwargs(out, a, z=b)
|
351
370
|
|
352
371
|
out = np.empty(2, dtype=np.int64)
|
353
|
-
kernel[1,1](1, 2, out)
|
372
|
+
kernel[1, 1](1, 2, out)
|
354
373
|
self.assertEqual(tuple(out), (6, 2))
|
355
374
|
|
356
375
|
|
357
|
-
if __name__ ==
|
376
|
+
if __name__ == "__main__":
|
358
377
|
unittest.main()
|
@@ -47,7 +47,7 @@ def vec_pow_inplace_binop(r, x):
|
|
47
47
|
|
48
48
|
def random_complex(N):
|
49
49
|
np.random.seed(123)
|
50
|
-
return
|
50
|
+
return np.random.random(1) + np.random.random(1) * 1j
|
51
51
|
|
52
52
|
|
53
53
|
class TestCudaPowi(CUDATestCase):
|
@@ -59,7 +59,7 @@ class TestCudaPowi(CUDATestCase):
|
|
59
59
|
A = np.arange(10, dtype=np.float64).reshape(2, 5)
|
60
60
|
Aout = np.empty_like(A)
|
61
61
|
kernel[1, A.shape](A, power, Aout)
|
62
|
-
self.assertTrue(np.allclose(Aout, A
|
62
|
+
self.assertTrue(np.allclose(Aout, A**power))
|
63
63
|
|
64
64
|
def test_powi_binop(self):
|
65
65
|
dec = cuda.jit(void(float64[:, :], int8, float64[:, :]))
|
@@ -69,7 +69,7 @@ class TestCudaPowi(CUDATestCase):
|
|
69
69
|
A = np.arange(10, dtype=np.float64).reshape(2, 5)
|
70
70
|
Aout = np.empty_like(A)
|
71
71
|
kernel[1, A.shape](A, power, Aout)
|
72
|
-
self.assertTrue(np.allclose(Aout, A
|
72
|
+
self.assertTrue(np.allclose(Aout, A**power))
|
73
73
|
|
74
74
|
# Relative tolerance kwarg is provided because 1.0e-7 (the default for
|
75
75
|
# assert_allclose) is a bit tight for single precision.
|
@@ -81,7 +81,7 @@ class TestCudaPowi(CUDATestCase):
|
|
81
81
|
|
82
82
|
cfunc = cuda.jit(func)
|
83
83
|
cfunc[1, N](r, x, y)
|
84
|
-
np.testing.assert_allclose(r, x
|
84
|
+
np.testing.assert_allclose(r, x**y, rtol=rtol)
|
85
85
|
|
86
86
|
# Checks special cases
|
87
87
|
x = np.asarray([0.0j, 1.0j], dtype=dtype)
|
@@ -89,7 +89,7 @@ class TestCudaPowi(CUDATestCase):
|
|
89
89
|
r = np.zeros_like(x)
|
90
90
|
|
91
91
|
cfunc[1, 2](r, x, y)
|
92
|
-
np.testing.assert_allclose(r, x
|
92
|
+
np.testing.assert_allclose(r, x**y, rtol=rtol)
|
93
93
|
|
94
94
|
def test_cpow_complex64_pow(self):
|
95
95
|
self._test_cpow(np.complex64, vec_pow, rtol=3.0e-7)
|
@@ -107,7 +107,7 @@ class TestCudaPowi(CUDATestCase):
|
|
107
107
|
N = 32
|
108
108
|
x = random_complex(N).astype(dtype)
|
109
109
|
y = random_complex(N).astype(dtype)
|
110
|
-
r = x
|
110
|
+
r = x**y
|
111
111
|
|
112
112
|
cfunc = cuda.jit(vec_pow_inplace_binop)
|
113
113
|
cfunc[1, N](x, y)
|
@@ -120,5 +120,5 @@ class TestCudaPowi(CUDATestCase):
|
|
120
120
|
self._test_cpow_inplace_binop(np.complex128, rtol=3.0e-7)
|
121
121
|
|
122
122
|
|
123
|
-
if __name__ ==
|
123
|
+
if __name__ == "__main__":
|
124
124
|
unittest.main()
|
@@ -113,7 +113,7 @@ class TestPrint(CUDATestCase):
|
|
113
113
|
def test_cuhello(self):
|
114
114
|
output, _ = self.run_code(cuhello_usecase)
|
115
115
|
actual = [line.strip() for line in output.splitlines()]
|
116
|
-
expected = [
|
116
|
+
expected = ["-42"] * 6 + ["%d 999" % i for i in range(6)]
|
117
117
|
# The output of GPU threads is intermingled, but each print()
|
118
118
|
# call is still atomic
|
119
119
|
self.assertEqual(sorted(actual), expected)
|
@@ -136,7 +136,7 @@ class TestPrint(CUDATestCase):
|
|
136
136
|
def test_string(self):
|
137
137
|
output, _ = self.run_code(printstring_usecase)
|
138
138
|
lines = [line.strip() for line in output.splitlines(True)]
|
139
|
-
expected = [
|
139
|
+
expected = ["%d hop! 999" % i for i in range(3)]
|
140
140
|
self.assertEqual(sorted(lines), expected)
|
141
141
|
|
142
142
|
def test_dim3(self):
|
@@ -145,7 +145,7 @@ class TestPrint(CUDATestCase):
|
|
145
145
|
expected = [str(i) for i in np.ndindex(2, 2, 2)]
|
146
146
|
self.assertEqual(sorted(lines), expected)
|
147
147
|
|
148
|
-
@skip_on_cudasim(
|
148
|
+
@skip_on_cudasim("cudasim can print unlimited output")
|
149
149
|
def test_too_many_args(self):
|
150
150
|
# Tests that we emit the format string and warn when there are more
|
151
151
|
# than 32 arguments, in common with CUDA C/C++ printf - this is due to
|
@@ -155,14 +155,16 @@ class TestPrint(CUDATestCase):
|
|
155
155
|
output, errors = self.run_code(print_too_many_usecase)
|
156
156
|
|
157
157
|
# Check that the format string was printed instead of formatted garbage
|
158
|
-
expected_fmt_string =
|
158
|
+
expected_fmt_string = " ".join(["%lld" for _ in range(33)])
|
159
159
|
self.assertIn(expected_fmt_string, output)
|
160
160
|
|
161
161
|
# Check for the expected warning about formatting more than 32 items
|
162
|
-
warn_msg = (
|
163
|
-
|
162
|
+
warn_msg = (
|
163
|
+
"CUDA print() cannot print more than 32 items. The raw "
|
164
|
+
"format string will be emitted by the kernel instead."
|
165
|
+
)
|
164
166
|
self.assertIn(warn_msg, errors)
|
165
167
|
|
166
168
|
|
167
|
-
if __name__ ==
|
169
|
+
if __name__ == "__main__":
|
168
170
|
unittest.main()
|
@@ -6,9 +6,12 @@ from numba import cuda
|
|
6
6
|
from numba.cuda.testing import unittest
|
7
7
|
from numba.cuda.testing import skip_on_cudasim, CUDATestCase
|
8
8
|
|
9
|
-
from numba.cuda.random import
|
10
|
-
xoroshiro128p_uniform_float32,
|
11
|
-
|
9
|
+
from numba.cuda.random import (
|
10
|
+
xoroshiro128p_uniform_float32,
|
11
|
+
xoroshiro128p_normal_float32,
|
12
|
+
xoroshiro128p_uniform_float64,
|
13
|
+
xoroshiro128p_normal_float64,
|
14
|
+
)
|
12
15
|
|
13
16
|
|
14
17
|
# Distributions
|
@@ -52,8 +55,9 @@ class TestCudaRandomXoroshiro128p(CUDATestCase):
|
|
52
55
|
states = cuda.random.create_xoroshiro128p_states(10, seed=1)
|
53
56
|
s1 = states.copy_to_host()
|
54
57
|
|
55
|
-
states = cuda.random.create_xoroshiro128p_states(
|
56
|
-
|
58
|
+
states = cuda.random.create_xoroshiro128p_states(
|
59
|
+
10, seed=1, subsequence_start=3
|
60
|
+
)
|
57
61
|
s2 = states.copy_to_host()
|
58
62
|
|
59
63
|
# Starting seeds should match up with offset of 3
|
@@ -61,8 +65,9 @@ class TestCudaRandomXoroshiro128p(CUDATestCase):
|
|
61
65
|
|
62
66
|
def test_create_stream(self):
|
63
67
|
stream = cuda.stream()
|
64
|
-
states = cuda.random.create_xoroshiro128p_states(
|
65
|
-
|
68
|
+
states = cuda.random.create_xoroshiro128p_states(
|
69
|
+
10, seed=1, stream=stream
|
70
|
+
)
|
66
71
|
s = states.copy_to_host()
|
67
72
|
self.assertEqual(len(np.unique(s)), 10)
|
68
73
|
|
@@ -79,7 +84,7 @@ class TestCudaRandomXoroshiro128p(CUDATestCase):
|
|
79
84
|
def test_uniform_float32(self):
|
80
85
|
self.check_uniform(rng_kernel_float32, np.float32)
|
81
86
|
|
82
|
-
@skip_on_cudasim(
|
87
|
+
@skip_on_cudasim("skip test for speed under cudasim")
|
83
88
|
def test_uniform_float64(self):
|
84
89
|
self.check_uniform(rng_kernel_float64, np.float64)
|
85
90
|
|
@@ -95,10 +100,10 @@ class TestCudaRandomXoroshiro128p(CUDATestCase):
|
|
95
100
|
def test_normal_float32(self):
|
96
101
|
self.check_normal(rng_kernel_float32, np.float32)
|
97
102
|
|
98
|
-
@skip_on_cudasim(
|
103
|
+
@skip_on_cudasim("skip test for speed under cudasim")
|
99
104
|
def test_normal_float64(self):
|
100
105
|
self.check_normal(rng_kernel_float64, np.float64)
|
101
106
|
|
102
107
|
|
103
|
-
if __name__ ==
|
108
|
+
if __name__ == "__main__":
|
104
109
|
unittest.main()
|