numba-cuda 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +246 -114
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
- numba_cuda/numba/cuda/cuda_paths.py +293 -99
- numba_cuda/numba/cuda/cudadecl.py +93 -79
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +27 -3
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +146 -30
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +296 -275
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +99 -7
- numba_cuda/numba/cuda/decorators.py +87 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +68 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +55 -1
- numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
- numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
- numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +1090 -927
- numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +468 -319
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
- numba_cuda/numba/cuda/intrinsics.py +203 -28
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/lowering.py +43 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +134 -108
- numba_cuda/numba/cuda/target.py +92 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +10 -7
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +59 -23
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +77 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +24 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +21 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +81 -30
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +5 -3
- numba_cuda/numba/cuda/vectorizers.py +38 -33
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
- numba_cuda-0.10.0.dist-info/RECORD +263 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.1.dist-info/RECORD +0 -251
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0
@@ -15,19 +15,18 @@ def add_kernel(r, x, y):
|
|
15
15
|
r[0] = x + y
|
16
16
|
|
17
17
|
|
18
|
-
@skip_on_cudasim(
|
18
|
+
@skip_on_cudasim("Specialization not implemented in the simulator")
|
19
19
|
class TestDispatcherSpecialization(CUDATestCase):
|
20
20
|
def _test_no_double_specialize(self, dispatcher, ty):
|
21
|
-
|
22
21
|
with self.assertRaises(RuntimeError) as e:
|
23
22
|
dispatcher.specialize(ty)
|
24
23
|
|
25
|
-
self.assertIn(
|
24
|
+
self.assertIn("Dispatcher already specialized", str(e.exception))
|
26
25
|
|
27
26
|
def test_no_double_specialize_sig_same_types(self):
|
28
27
|
# Attempting to specialize a kernel jitted with a signature is illegal,
|
29
28
|
# even for the same types the kernel is already specialized for.
|
30
|
-
@cuda.jit(
|
29
|
+
@cuda.jit("void(float32[::1])")
|
31
30
|
def f(x):
|
32
31
|
pass
|
33
32
|
|
@@ -45,7 +44,7 @@ class TestDispatcherSpecialization(CUDATestCase):
|
|
45
44
|
|
46
45
|
def test_no_double_specialize_sig_diff_types(self):
|
47
46
|
# Attempting to specialize a kernel jitted with a signature is illegal.
|
48
|
-
@cuda.jit(
|
47
|
+
@cuda.jit("void(int32[::1])")
|
49
48
|
def f(x):
|
50
49
|
pass
|
51
50
|
|
@@ -132,13 +131,13 @@ class TestDispatcher(CUDATestCase):
|
|
132
131
|
self.assertEqual(r[0], add(12300000000, 456))
|
133
132
|
|
134
133
|
# Now force compilation of only a single specialization
|
135
|
-
c_add = cuda.jit(
|
134
|
+
c_add = cuda.jit("(i4[::1], i4, i4)")(add_kernel)
|
136
135
|
r = np.zeros(1, dtype=np.int32)
|
137
136
|
|
138
137
|
c_add[1, 1](r, 123, 456)
|
139
138
|
self.assertPreciseEqual(r[0], add(123, 456))
|
140
139
|
|
141
|
-
@skip_on_cudasim(
|
140
|
+
@skip_on_cudasim("Simulator ignores signature")
|
142
141
|
@unittest.expectedFailure
|
143
142
|
def test_coerce_input_types_unsafe(self):
|
144
143
|
# Implicit (unsafe) conversion of float to int, originally from
|
@@ -149,25 +148,24 @@ class TestDispatcher(CUDATestCase):
|
|
149
148
|
#
|
150
149
|
# This test is marked as xfail until future changes enable this
|
151
150
|
# behavior.
|
152
|
-
c_add = cuda.jit(
|
151
|
+
c_add = cuda.jit("(i4[::1], i4, i4)")(add_kernel)
|
153
152
|
r = np.zeros(1, dtype=np.int32)
|
154
153
|
|
155
154
|
c_add[1, 1](r, 12.3, 45.6)
|
156
155
|
self.assertPreciseEqual(r[0], add(12, 45))
|
157
156
|
|
158
|
-
@skip_on_cudasim(
|
157
|
+
@skip_on_cudasim("Simulator ignores signature")
|
159
158
|
def test_coerce_input_types_unsafe_complex(self):
|
160
159
|
# Implicit conversion of complex to int disallowed
|
161
|
-
c_add = cuda.jit(
|
160
|
+
c_add = cuda.jit("(i4[::1], i4, i4)")(add_kernel)
|
162
161
|
r = np.zeros(1, dtype=np.int32)
|
163
162
|
|
164
163
|
with self.assertRaises(TypeError):
|
165
164
|
c_add[1, 1](r, 12.3, 45.6j)
|
166
165
|
|
167
|
-
@skip_on_cudasim(
|
166
|
+
@skip_on_cudasim("Simulator does not track overloads")
|
168
167
|
def test_ambiguous_new_version(self):
|
169
|
-
"""Test compiling new version in an ambiguous case
|
170
|
-
"""
|
168
|
+
"""Test compiling new version in an ambiguous case"""
|
171
169
|
c_add = cuda.jit(add_kernel)
|
172
170
|
|
173
171
|
r = np.zeros(1, dtype=np.float64)
|
@@ -190,8 +188,9 @@ class TestDispatcher(CUDATestCase):
|
|
190
188
|
# to (float, int) or (int, float) with equal weight.
|
191
189
|
c_add[1, 1](r, 1, 1)
|
192
190
|
self.assertAlmostEqual(r[0], INT + INT)
|
193
|
-
self.assertEqual(
|
194
|
-
|
191
|
+
self.assertEqual(
|
192
|
+
len(c_add.overloads), 4, "didn't compile a new version"
|
193
|
+
)
|
195
194
|
|
196
195
|
@skip_on_cudasim("Simulator doesn't support concurrent kernels")
|
197
196
|
def test_lock(self):
|
@@ -245,8 +244,10 @@ class TestDispatcher(CUDATestCase):
|
|
245
244
|
|
246
245
|
def test_explicit_signatures_strings(self):
|
247
246
|
# Check with a list of strings for signatures
|
248
|
-
sigs = [
|
249
|
-
|
247
|
+
sigs = [
|
248
|
+
"(int64[::1], int64, int64)",
|
249
|
+
"(float64[::1], float64, float64)",
|
250
|
+
]
|
250
251
|
self._test_explicit_signatures(sigs)
|
251
252
|
|
252
253
|
def test_explicit_signatures_tuples(self):
|
@@ -256,26 +257,31 @@ class TestDispatcher(CUDATestCase):
|
|
256
257
|
|
257
258
|
def test_explicit_signatures_signatures(self):
|
258
259
|
# Check with a list of Signature objects for signatures
|
259
|
-
sigs = [
|
260
|
-
|
260
|
+
sigs = [
|
261
|
+
void(int64[::1], int64, int64),
|
262
|
+
void(float64[::1], float64, float64),
|
263
|
+
]
|
261
264
|
self._test_explicit_signatures(sigs)
|
262
265
|
|
263
266
|
def test_explicit_signatures_mixed(self):
|
264
267
|
# Check when we mix types of signature objects in a list of signatures
|
265
268
|
|
266
269
|
# Tuple and string
|
267
|
-
sigs = [(int64[::1], int64, int64),
|
268
|
-
"(float64[::1], float64, float64)"]
|
270
|
+
sigs = [(int64[::1], int64, int64), "(float64[::1], float64, float64)"]
|
269
271
|
self._test_explicit_signatures(sigs)
|
270
272
|
|
271
273
|
# Tuple and Signature object
|
272
|
-
sigs = [
|
273
|
-
|
274
|
+
sigs = [
|
275
|
+
(int64[::1], int64, int64),
|
276
|
+
void(float64[::1], float64, float64),
|
277
|
+
]
|
274
278
|
self._test_explicit_signatures(sigs)
|
275
279
|
|
276
280
|
# Signature object and string
|
277
|
-
sigs = [
|
278
|
-
|
281
|
+
sigs = [
|
282
|
+
void(int64[::1], int64, int64),
|
283
|
+
"(float64[::1], float64, float64)",
|
284
|
+
]
|
279
285
|
self._test_explicit_signatures(sigs)
|
280
286
|
|
281
287
|
def test_explicit_signatures_same_type_class(self):
|
@@ -284,8 +290,10 @@ class TestDispatcher(CUDATestCase):
|
|
284
290
|
# that dispatch is differentiated on the types of x and y only, to
|
285
291
|
# closely preserve the intent of the original test from
|
286
292
|
# numba.tests.test_dispatcher)
|
287
|
-
sigs = [
|
288
|
-
|
293
|
+
sigs = [
|
294
|
+
"(float64[::1], float32, float32)",
|
295
|
+
"(float64[::1], float64, float64)",
|
296
|
+
]
|
289
297
|
f = cuda.jit(sigs)(add_kernel)
|
290
298
|
|
291
299
|
r = np.zeros(1, dtype=np.float64)
|
@@ -296,13 +304,17 @@ class TestDispatcher(CUDATestCase):
|
|
296
304
|
f[1, 1](r, 1, 2**-25)
|
297
305
|
self.assertPreciseEqual(r[0], 1.0000000298023224)
|
298
306
|
|
299
|
-
@skip_on_cudasim(
|
307
|
+
@skip_on_cudasim("No overload resolution in the simulator")
|
300
308
|
def test_explicit_signatures_ambiguous_resolution(self):
|
301
309
|
# Fail to resolve ambiguity between the two best overloads
|
302
310
|
# (Also deliberate float64[::1] for the first argument in all cases)
|
303
|
-
f = cuda.jit(
|
304
|
-
|
305
|
-
|
311
|
+
f = cuda.jit(
|
312
|
+
[
|
313
|
+
"(float64[::1], float32, float64)",
|
314
|
+
"(float64[::1], float64, float32)",
|
315
|
+
"(float64[::1], int64, int64)",
|
316
|
+
]
|
317
|
+
)(add_kernel)
|
306
318
|
with self.assertRaises(TypeError) as cm:
|
307
319
|
r = np.zeros(1, dtype=np.float64)
|
308
320
|
f[1, 1](r, 1.0, 2.0)
|
@@ -317,12 +329,12 @@ class TestDispatcher(CUDATestCase):
|
|
317
329
|
r"\(Array\(float64, 1, 'C', False, aligned=True\), float32,"
|
318
330
|
r" float64\) -> none\n"
|
319
331
|
r"\(Array\(float64, 1, 'C', False, aligned=True\), float64,"
|
320
|
-
r" float32\) -> none"
|
332
|
+
r" float32\) -> none",
|
321
333
|
)
|
322
334
|
# The integer signature is not part of the best matches
|
323
335
|
self.assertNotIn("int64", str(cm.exception))
|
324
336
|
|
325
|
-
@skip_on_cudasim(
|
337
|
+
@skip_on_cudasim("Simulator does not use _prepare_args")
|
326
338
|
@unittest.expectedFailure
|
327
339
|
def test_explicit_signatures_unsafe(self):
|
328
340
|
# These tests are from test_explicit_signatures, but have to be xfail
|
@@ -336,8 +348,10 @@ class TestDispatcher(CUDATestCase):
|
|
336
348
|
self.assertPreciseEqual(r[0], 3)
|
337
349
|
self.assertEqual(len(f.overloads), 1, f.overloads)
|
338
350
|
|
339
|
-
sigs = [
|
340
|
-
|
351
|
+
sigs = [
|
352
|
+
"(int64[::1], int64, int64)",
|
353
|
+
"(float64[::1], float64, float64)",
|
354
|
+
]
|
341
355
|
f = cuda.jit(sigs)(add_kernel)
|
342
356
|
r = np.zeros(1, dtype=np.float64)
|
343
357
|
# Approximate match (int32 -> float64 is a safe conversion)
|
@@ -414,7 +428,7 @@ class TestDispatcher(CUDATestCase):
|
|
414
428
|
f[1, 1](r, 1.5, 2.5)
|
415
429
|
self.assertPreciseEqual(r[0], 4.0)
|
416
430
|
|
417
|
-
@skip_on_cudasim(
|
431
|
+
@skip_on_cudasim("CUDA Simulator does not force casting")
|
418
432
|
def test_explicit_signatures_device_unsafe(self):
|
419
433
|
# These tests are from test_explicit_signatures. The device function
|
420
434
|
# variant of these tests can succeed on CUDA because the compilation
|
@@ -489,17 +503,15 @@ class TestDispatcherKernelProperties(CUDATestCase):
|
|
489
503
|
# provides the same values as getting the registers per thread for
|
490
504
|
# individual signatures.
|
491
505
|
regs_per_thread_all = pi_sin_array.get_regs_per_thread()
|
492
|
-
self.assertEqual(regs_per_thread_all[sig_f32.args],
|
493
|
-
|
494
|
-
self.assertEqual(regs_per_thread_all[sig_f64.args],
|
495
|
-
regs_per_thread_f64)
|
506
|
+
self.assertEqual(regs_per_thread_all[sig_f32.args], regs_per_thread_f32)
|
507
|
+
self.assertEqual(regs_per_thread_all[sig_f64.args], regs_per_thread_f64)
|
496
508
|
|
497
509
|
if regs_per_thread_f32 == regs_per_thread_f64:
|
498
510
|
# If the register usage is the same for both variants, there may be
|
499
511
|
# a bug, but this may also be an artifact of the compiler / driver
|
500
512
|
# / device combination, so produce an informational message only.
|
501
|
-
print(
|
502
|
-
print(
|
513
|
+
print("f32 and f64 variant thread usages are equal.")
|
514
|
+
print("This may warrant some investigation. Devices:")
|
503
515
|
cuda.detect()
|
504
516
|
|
505
517
|
def test_get_regs_per_thread_specialized(self):
|
@@ -696,5 +708,5 @@ class TestDispatcherKernelProperties(CUDATestCase):
|
|
696
708
|
self.assertGreaterEqual(local_mem_per_thread, N * 4)
|
697
709
|
|
698
710
|
|
699
|
-
if __name__ ==
|
711
|
+
if __name__ == "__main__":
|
700
712
|
unittest.main()
|
@@ -6,24 +6,24 @@ import numpy as np
|
|
6
6
|
|
7
7
|
from numba import int16, int32
|
8
8
|
from numba import cuda, vectorize, njit
|
9
|
+
from numba.core import types
|
9
10
|
from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
|
10
11
|
from numba.tests.enum_usecases import (
|
11
12
|
Color,
|
12
13
|
Shape,
|
13
14
|
Planet,
|
14
15
|
RequestError,
|
15
|
-
IntEnumWithNegatives
|
16
|
+
IntEnumWithNegatives,
|
16
17
|
)
|
17
18
|
|
18
19
|
|
19
20
|
class EnumTest(CUDATestCase):
|
20
|
-
|
21
21
|
pairs = [
|
22
22
|
(Color.red, Color.red),
|
23
23
|
(Color.red, Color.green),
|
24
24
|
(Planet.EARTH, Planet.EARTH),
|
25
25
|
(Planet.VENUS, Planet.MARS),
|
26
|
-
(Shape.circle, IntEnumWithNegatives.two)
|
26
|
+
(Shape.circle, IntEnumWithNegatives.two), # IntEnum, same value
|
27
27
|
]
|
28
28
|
|
29
29
|
def test_compare(self):
|
@@ -45,7 +45,7 @@ class EnumTest(CUDATestCase):
|
|
45
45
|
def f(out):
|
46
46
|
# Lookup of an enum member on its class
|
47
47
|
out[0] = Color.red == Color.green
|
48
|
-
out[1] = Color[
|
48
|
+
out[1] = Color["red"] == Color["green"]
|
49
49
|
|
50
50
|
cuda_f = cuda.jit(f)
|
51
51
|
got = np.zeros((2,), dtype=np.bool_)
|
@@ -106,16 +106,33 @@ class EnumTest(CUDATestCase):
|
|
106
106
|
def test_vectorize(self):
|
107
107
|
def f(x):
|
108
108
|
if x != RequestError.not_found:
|
109
|
-
return RequestError[
|
109
|
+
return RequestError["internal_error"]
|
110
110
|
else:
|
111
111
|
return RequestError.dummy
|
112
112
|
|
113
|
-
cuda_func = vectorize("int64(int64)", target=
|
113
|
+
cuda_func = vectorize("int64(int64)", target="cuda")(f)
|
114
114
|
arr = np.array([2, 404, 500, 404], dtype=np.int64)
|
115
115
|
expected = np.array([f(x) for x in arr], dtype=np.int64)
|
116
116
|
got = cuda_func(arr)
|
117
117
|
self.assertPreciseEqual(expected, got)
|
118
118
|
|
119
|
+
def test_int_enum_no_conversion(self):
|
120
|
+
# Ported from Numba PR #10047: "Fix IntEnumMember.can_convert_to() when
|
121
|
+
# no conversions found", https://github.com/numba/numba/pull/10047.
|
122
|
+
|
123
|
+
# The original test is intended to ensures that
|
124
|
+
# IntEnumMember.can_convert_to() handles the case when the typing
|
125
|
+
# context's can_convert() method returns None to signal no possible
|
126
|
+
# conversion. In Numba-CUDA, we had to patch the CUDA target context to
|
127
|
+
# work around this issue, because we cannot guarantee that the
|
128
|
+
# IntEnumMember method can be patched before instances are created.
|
129
|
+
ctx = cuda.descriptor.cuda_target.typing_context
|
130
|
+
|
131
|
+
int_enum_type = types.IntEnumMember(Shape, types.int64)
|
132
|
+
# Conversion of an int enum member to a 1D array would be invalid
|
133
|
+
invalid_toty = types.int64[::1]
|
134
|
+
self.assertIsNone(ctx.can_convert(int_enum_type, invalid_toty))
|
135
|
+
|
119
136
|
|
120
|
-
if __name__ ==
|
137
|
+
if __name__ == "__main__":
|
121
138
|
unittest.main()
|
@@ -17,34 +17,49 @@ class TestJitErrors(CUDATestCase):
|
|
17
17
|
|
18
18
|
with self.assertRaises(ValueError) as raises:
|
19
19
|
kernfunc[(1, 2, 3, 4), (5, 6)]
|
20
|
-
self.assertIn(
|
21
|
-
|
22
|
-
|
20
|
+
self.assertIn(
|
21
|
+
"griddim must be a sequence of 1, 2 or 3 integers, "
|
22
|
+
"got [1, 2, 3, 4]",
|
23
|
+
str(raises.exception),
|
24
|
+
)
|
23
25
|
|
24
26
|
with self.assertRaises(ValueError) as raises:
|
25
|
-
kernfunc[
|
26
|
-
|
27
|
-
|
28
|
-
|
27
|
+
kernfunc[
|
28
|
+
(
|
29
|
+
1,
|
30
|
+
2,
|
31
|
+
),
|
32
|
+
(3, 4, 5, 6),
|
33
|
+
]
|
34
|
+
self.assertIn(
|
35
|
+
"blockdim must be a sequence of 1, 2 or 3 integers, "
|
36
|
+
"got [3, 4, 5, 6]",
|
37
|
+
str(raises.exception),
|
38
|
+
)
|
29
39
|
|
30
40
|
def test_non_integral_dims(self):
|
31
41
|
kernfunc = cuda.jit(noop)
|
32
42
|
|
33
43
|
with self.assertRaises(TypeError) as raises:
|
34
44
|
kernfunc[2.0, 3]
|
35
|
-
self.assertIn(
|
36
|
-
|
45
|
+
self.assertIn(
|
46
|
+
"griddim must be a sequence of integers, got [2.0]",
|
47
|
+
str(raises.exception),
|
48
|
+
)
|
37
49
|
|
38
50
|
with self.assertRaises(TypeError) as raises:
|
39
51
|
kernfunc[2, 3.0]
|
40
|
-
self.assertIn(
|
41
|
-
|
52
|
+
self.assertIn(
|
53
|
+
"blockdim must be a sequence of integers, got [3.0]",
|
54
|
+
str(raises.exception),
|
55
|
+
)
|
42
56
|
|
43
57
|
def _test_unconfigured(self, kernfunc):
|
44
58
|
with self.assertRaises(ValueError) as raises:
|
45
59
|
kernfunc(0)
|
46
|
-
self.assertIn(
|
47
|
-
|
60
|
+
self.assertIn(
|
61
|
+
"launch configuration was not specified", str(raises.exception)
|
62
|
+
)
|
48
63
|
|
49
64
|
def test_unconfigured_typed_cudakernel(self):
|
50
65
|
kernfunc = cuda.jit("void(int32)")(noop)
|
@@ -54,7 +69,7 @@ class TestJitErrors(CUDATestCase):
|
|
54
69
|
kernfunc = cuda.jit(noop)
|
55
70
|
self._test_unconfigured(kernfunc)
|
56
71
|
|
57
|
-
@skip_on_cudasim(
|
72
|
+
@skip_on_cudasim("TypingError does not occur on simulator")
|
58
73
|
def test_typing_error(self):
|
59
74
|
# see #5860, this is present to catch changes to error reporting
|
60
75
|
# accidentally breaking the CUDA target
|
@@ -75,5 +90,5 @@ class TestJitErrors(CUDATestCase):
|
|
75
90
|
self.assertIn("NameError: name 'floor' is not defined", excstr)
|
76
91
|
|
77
92
|
|
78
|
-
if __name__ ==
|
93
|
+
if __name__ == "__main__":
|
79
94
|
unittest.main()
|
@@ -83,20 +83,19 @@ class TestException(CUDATestCase):
|
|
83
83
|
x[i] += x[i] // y[i]
|
84
84
|
|
85
85
|
n = 32
|
86
|
-
got_x = 1. / (np.arange(n) + 0.01)
|
87
|
-
got_y = 1. / (np.arange(n) + 0.01)
|
86
|
+
got_x = 1.0 / (np.arange(n) + 0.01)
|
87
|
+
got_y = 1.0 / (np.arange(n) + 0.01)
|
88
88
|
problematic[1, n](got_x, got_y)
|
89
89
|
|
90
|
-
expect_x = 1. / (np.arange(n) + 0.01)
|
91
|
-
expect_y = 1. / (np.arange(n) + 0.01)
|
90
|
+
expect_x = 1.0 / (np.arange(n) + 0.01)
|
91
|
+
expect_y = 1.0 / (np.arange(n) + 0.01)
|
92
92
|
oracle[1, n](expect_x, expect_y)
|
93
93
|
|
94
94
|
np.testing.assert_almost_equal(expect_x, got_x)
|
95
95
|
np.testing.assert_almost_equal(expect_y, got_y)
|
96
96
|
|
97
97
|
def test_raise_causing_warp_diverge(self):
|
98
|
-
"""Test case for issue #2655.
|
99
|
-
"""
|
98
|
+
"""Test case for issue #2655."""
|
100
99
|
self.case_raise_causing_warp_diverge(with_debug_mode=False)
|
101
100
|
|
102
101
|
# The following two cases relate to Issue #7806: Division by zero stops the
|
@@ -117,8 +116,8 @@ class TestException(CUDATestCase):
|
|
117
116
|
|
118
117
|
f[1, 1](r, x, y)
|
119
118
|
|
120
|
-
self.assertTrue(np.isinf(r[0]),
|
121
|
-
self.assertEqual(r[1], y[0],
|
119
|
+
self.assertTrue(np.isinf(r[0]), "Expected inf from div by zero")
|
120
|
+
self.assertEqual(r[1], y[0], "Expected execution to continue")
|
122
121
|
|
123
122
|
def test_zero_division_error_in_debug(self):
|
124
123
|
# When debug is True:
|
@@ -146,15 +145,15 @@ class TestException(CUDATestCase):
|
|
146
145
|
with self.assertRaises(exc):
|
147
146
|
f[1, 1](r, x, y)
|
148
147
|
|
149
|
-
self.assertEqual(r[0], 0,
|
150
|
-
self.assertEqual(r[1], 0,
|
148
|
+
self.assertEqual(r[0], 0, "Expected result to be left unset")
|
149
|
+
self.assertEqual(r[1], 0, "Expected execution to stop")
|
151
150
|
|
152
151
|
@xfail_unless_cudasim
|
153
152
|
def test_raise_in_device_function(self):
|
154
153
|
# This is an expected failure because reporting of exceptions raised in
|
155
154
|
# device functions does not work correctly - see Issue #8036:
|
156
155
|
# https://github.com/numba/numba/issues/8036
|
157
|
-
msg =
|
156
|
+
msg = "Device Function Error"
|
158
157
|
|
159
158
|
@cuda.jit(device=True)
|
160
159
|
def f():
|
@@ -170,5 +169,5 @@ class TestException(CUDATestCase):
|
|
170
169
|
self.assertIn(msg, str(raises.exception))
|
171
170
|
|
172
171
|
|
173
|
-
if __name__ ==
|
172
|
+
if __name__ == "__main__":
|
174
173
|
unittest.main()
|
@@ -8,12 +8,13 @@ class Interval:
|
|
8
8
|
"""
|
9
9
|
A half-open interval on the real number line.
|
10
10
|
"""
|
11
|
+
|
11
12
|
def __init__(self, lo, hi):
|
12
13
|
self.lo = lo
|
13
14
|
self.hi = hi
|
14
15
|
|
15
16
|
def __repr__(self):
|
16
|
-
return
|
17
|
+
return "Interval(%f, %f)" % (self.lo, self.hi)
|
17
18
|
|
18
19
|
@property
|
19
20
|
def width(self):
|
@@ -32,16 +33,23 @@ def sum_intervals(i, j):
|
|
32
33
|
|
33
34
|
if not config.ENABLE_CUDASIM:
|
34
35
|
from numba.core import cgutils
|
35
|
-
from numba.core.extending import (
|
36
|
-
|
37
|
-
|
36
|
+
from numba.core.extending import (
|
37
|
+
lower_builtin,
|
38
|
+
models,
|
39
|
+
type_callable,
|
40
|
+
typeof_impl,
|
41
|
+
)
|
38
42
|
from numba.core.typing.templates import AttributeTemplate
|
39
43
|
from numba.cuda.cudadecl import registry as cuda_registry
|
40
44
|
from numba.cuda.cudaimpl import lower_attr as cuda_lower_attr
|
45
|
+
from numba.cuda.extending import (
|
46
|
+
register_model,
|
47
|
+
make_attribute_wrapper,
|
48
|
+
)
|
41
49
|
|
42
50
|
class IntervalType(types.Type):
|
43
51
|
def __init__(self):
|
44
|
-
super().__init__(name=
|
52
|
+
super().__init__(name="Interval")
|
45
53
|
|
46
54
|
interval_type = IntervalType()
|
47
55
|
|
@@ -54,19 +62,20 @@ if not config.ENABLE_CUDASIM:
|
|
54
62
|
def typer(lo, hi):
|
55
63
|
if isinstance(lo, types.Float) and isinstance(hi, types.Float):
|
56
64
|
return interval_type
|
65
|
+
|
57
66
|
return typer
|
58
67
|
|
59
68
|
@register_model(IntervalType)
|
60
69
|
class IntervalModel(models.StructModel):
|
61
70
|
def __init__(self, dmm, fe_type):
|
62
71
|
members = [
|
63
|
-
(
|
64
|
-
(
|
72
|
+
("lo", types.float64),
|
73
|
+
("hi", types.float64),
|
65
74
|
]
|
66
75
|
models.StructModel.__init__(self, dmm, fe_type, members)
|
67
76
|
|
68
|
-
make_attribute_wrapper(IntervalType,
|
69
|
-
make_attribute_wrapper(IntervalType,
|
77
|
+
make_attribute_wrapper(IntervalType, "lo", "lo")
|
78
|
+
make_attribute_wrapper(IntervalType, "hi", "hi")
|
70
79
|
|
71
80
|
@lower_builtin(Interval, types.Float, types.Float)
|
72
81
|
def impl_interval(context, builder, sig, args):
|
@@ -84,14 +93,14 @@ if not config.ENABLE_CUDASIM:
|
|
84
93
|
def resolve_width(self, mod):
|
85
94
|
return types.float64
|
86
95
|
|
87
|
-
@cuda_lower_attr(IntervalType,
|
96
|
+
@cuda_lower_attr(IntervalType, "width")
|
88
97
|
def cuda_Interval_width(context, builder, sig, arg):
|
89
98
|
lo = builder.extract_value(arg, 0)
|
90
99
|
hi = builder.extract_value(arg, 1)
|
91
100
|
return builder.fsub(hi, lo)
|
92
101
|
|
93
102
|
|
94
|
-
@skip_on_cudasim(
|
103
|
+
@skip_on_cudasim("Extensions not supported in the simulator")
|
95
104
|
class TestExtending(CUDATestCase):
|
96
105
|
def test_attributes(self):
|
97
106
|
@cuda.jit
|
@@ -151,5 +160,5 @@ class TestExtending(CUDATestCase):
|
|
151
160
|
np.testing.assert_allclose(r, expected)
|
152
161
|
|
153
162
|
|
154
|
-
if __name__ ==
|
163
|
+
if __name__ == "__main__":
|
155
164
|
unittest.main()
|