numba-cuda 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +232 -113
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_fp16.h +661 -661
- numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
- numba_cuda/numba/cuda/cuda_paths.py +291 -99
- numba_cuda/numba/cuda/cudadecl.py +125 -69
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +463 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +317 -233
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +8 -6
- numba_cuda/numba/cuda/decorators.py +75 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +69 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +1 -1
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
- numba_cuda/numba/cuda/intrinsics.py +31 -27
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +139 -102
- numba_cuda/numba/cuda/target.py +64 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +2 -2
- numba_cuda/numba/cuda/vectorizers.py +37 -32
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
- numba_cuda-0.9.0.dist-info/RECORD +253 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.0.dist-info/RECORD +0 -251
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
from numba.tests.support import
|
1
|
+
from numba.tests.support import override_config, captured_stdout
|
2
2
|
from numba.cuda.testing import skip_on_cudasim
|
3
3
|
from numba import cuda
|
4
4
|
from numba.core import types
|
@@ -8,7 +8,7 @@ import re
|
|
8
8
|
import unittest
|
9
9
|
|
10
10
|
|
11
|
-
@skip_on_cudasim(
|
11
|
+
@skip_on_cudasim("Simulator does not produce debug dumps")
|
12
12
|
class TestCudaDebugInfo(CUDATestCase):
|
13
13
|
"""
|
14
14
|
These tests only checks the compiled PTX for debuginfo section
|
@@ -49,7 +49,7 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
49
49
|
self._check(foo, sig=(types.int32[:],), expect=True)
|
50
50
|
|
51
51
|
def test_environment_override(self):
|
52
|
-
with override_config(
|
52
|
+
with override_config("CUDA_DEBUGINFO_DEFAULT", 1):
|
53
53
|
# Using default value
|
54
54
|
@cuda.jit(opt=False)
|
55
55
|
def foo(x):
|
@@ -86,7 +86,7 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
86
86
|
|
87
87
|
llvm_ir = f.inspect_llvm(sig)
|
88
88
|
# A varible name starting with "bool" in the debug metadata
|
89
|
-
pat = r
|
89
|
+
pat = r"!DILocalVariable\(.*name:\s+\"bool"
|
90
90
|
match = re.compile(pat).search(llvm_ir)
|
91
91
|
self.assertIsNone(match, msg=llvm_ir)
|
92
92
|
|
@@ -106,7 +106,7 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
106
106
|
mdnode_id = match.group(1)
|
107
107
|
|
108
108
|
# verify the DIBasicType has correct encoding attribute DW_ATE_boolean
|
109
|
-
pat = rf
|
109
|
+
pat = rf"!{mdnode_id}\s+=\s+!DIBasicType\(.*DW_ATE_boolean"
|
110
110
|
match = re.compile(pat).search(llvm_ir)
|
111
111
|
self.assertIsNotNone(match, msg=llvm_ir)
|
112
112
|
|
@@ -133,14 +133,17 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
133
133
|
|
134
134
|
llvm_ir = f.inspect_llvm(sig)
|
135
135
|
|
136
|
-
defines = [
|
137
|
-
|
136
|
+
defines = [
|
137
|
+
line
|
138
|
+
for line in llvm_ir.splitlines()
|
139
|
+
if 'define void @"_ZN6cudapy' in line
|
140
|
+
]
|
138
141
|
|
139
142
|
# Make sure we only found one definition
|
140
143
|
self.assertEqual(len(defines), 1)
|
141
144
|
|
142
145
|
wrapper_define = defines[0]
|
143
|
-
self.assertIn(
|
146
|
+
self.assertIn("!dbg", wrapper_define)
|
144
147
|
|
145
148
|
def test_debug_function_calls_internal_impl(self):
|
146
149
|
# Calling a function in a module generated from an implementation
|
@@ -198,16 +201,16 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
198
201
|
debug_opts = itertools.product(*[(True, False)] * 3)
|
199
202
|
|
200
203
|
for kernel_debug, f1_debug, f2_debug in debug_opts:
|
201
|
-
with self.subTest(
|
202
|
-
|
203
|
-
|
204
|
-
self._test_chained_device_function(
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
def _test_chained_device_function_two_calls(
|
209
|
-
|
210
|
-
|
204
|
+
with self.subTest(
|
205
|
+
kernel_debug=kernel_debug, f1_debug=f1_debug, f2_debug=f2_debug
|
206
|
+
):
|
207
|
+
self._test_chained_device_function(
|
208
|
+
kernel_debug, f1_debug, f2_debug
|
209
|
+
)
|
210
|
+
|
211
|
+
def _test_chained_device_function_two_calls(
|
212
|
+
self, kernel_debug, f1_debug, f2_debug
|
213
|
+
):
|
211
214
|
@cuda.jit(device=True, debug=f2_debug, opt=False)
|
212
215
|
def f2(x):
|
213
216
|
return x + 1
|
@@ -232,12 +235,12 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
232
235
|
debug_opts = itertools.product(*[(True, False)] * 3)
|
233
236
|
|
234
237
|
for kernel_debug, f1_debug, f2_debug in debug_opts:
|
235
|
-
with self.subTest(
|
236
|
-
|
237
|
-
|
238
|
-
self._test_chained_device_function_two_calls(
|
239
|
-
|
240
|
-
|
238
|
+
with self.subTest(
|
239
|
+
kernel_debug=kernel_debug, f1_debug=f1_debug, f2_debug=f2_debug
|
240
|
+
):
|
241
|
+
self._test_chained_device_function_two_calls(
|
242
|
+
kernel_debug, f1_debug, f2_debug
|
243
|
+
)
|
241
244
|
|
242
245
|
def test_chained_device_three_functions(self):
|
243
246
|
# Like test_chained_device_function, but with enough functions (three)
|
@@ -278,13 +281,13 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
278
281
|
llvm_ir = f.inspect_llvm(sig)
|
279
282
|
|
280
283
|
# extract the metadata node id from `types` field of DISubroutineType
|
281
|
-
pat = r
|
284
|
+
pat = r"!DISubroutineType\(types:\s+!(\d+)\)"
|
282
285
|
match = re.compile(pat).search(llvm_ir)
|
283
286
|
self.assertIsNotNone(match, msg=llvm_ir)
|
284
287
|
mdnode_id = match.group(1)
|
285
288
|
|
286
289
|
# extract the metadata node ids from the flexible node of types
|
287
|
-
pat = rf
|
290
|
+
pat = rf"!{mdnode_id}\s+=\s+!{{\s+!(\d+),\s+!(\d+)\s+}}"
|
288
291
|
match = re.compile(pat).search(llvm_ir)
|
289
292
|
self.assertIsNotNone(match, msg=llvm_ir)
|
290
293
|
mdnode_id1 = match.group(1)
|
@@ -303,10 +306,10 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
303
306
|
|
304
307
|
def test_kernel_args_types_dump(self):
|
305
308
|
# see issue#135
|
306
|
-
with override_config(
|
309
|
+
with override_config("DUMP_LLVM", 1):
|
307
310
|
with captured_stdout():
|
308
311
|
self._test_kernel_args_types()
|
309
312
|
|
310
313
|
|
311
|
-
if __name__ ==
|
314
|
+
if __name__ == "__main__":
|
312
315
|
unittest.main()
|
@@ -3,8 +3,13 @@ import cffi
|
|
3
3
|
|
4
4
|
import numpy as np
|
5
5
|
|
6
|
-
from numba.cuda.testing import (
|
7
|
-
|
6
|
+
from numba.cuda.testing import (
|
7
|
+
skip_if_curand_kernel_missing,
|
8
|
+
skip_on_cudasim,
|
9
|
+
test_data_dir,
|
10
|
+
unittest,
|
11
|
+
CUDATestCase,
|
12
|
+
)
|
8
13
|
from numba import cuda, jit, float32, int32, types
|
9
14
|
from numba.core.errors import TypingError
|
10
15
|
from numba.tests.support import skip_unless_cffi
|
@@ -12,9 +17,7 @@ from types import ModuleType
|
|
12
17
|
|
13
18
|
|
14
19
|
class TestDeviceFunc(CUDATestCase):
|
15
|
-
|
16
20
|
def test_use_add2f(self):
|
17
|
-
|
18
21
|
@cuda.jit("float32(float32, float32)", device=True)
|
19
22
|
def add2f(a, b):
|
20
23
|
return a + b
|
@@ -33,7 +36,6 @@ class TestDeviceFunc(CUDATestCase):
|
|
33
36
|
self.assertTrue(np.all(ary == exp), (ary, exp))
|
34
37
|
|
35
38
|
def test_indirect_add2f(self):
|
36
|
-
|
37
39
|
@cuda.jit("float32(float32, float32)", device=True)
|
38
40
|
def add2f(a, b):
|
39
41
|
return a + b
|
@@ -74,12 +76,12 @@ class TestDeviceFunc(CUDATestCase):
|
|
74
76
|
|
75
77
|
self._check_cpu_dispatcher(add)
|
76
78
|
|
77
|
-
@skip_on_cudasim(
|
79
|
+
@skip_on_cudasim("not supported in cudasim")
|
78
80
|
def test_cpu_dispatcher_invalid(self):
|
79
81
|
# Test invalid usage
|
80
82
|
# Explicit signature disables compilation, which also disable
|
81
83
|
# compiling on CUDA.
|
82
|
-
@jit(
|
84
|
+
@jit("(i4, i4)")
|
83
85
|
def add(a, b):
|
84
86
|
return a + b
|
85
87
|
|
@@ -95,7 +97,7 @@ class TestDeviceFunc(CUDATestCase):
|
|
95
97
|
def add(a, b):
|
96
98
|
return a + b
|
97
99
|
|
98
|
-
mymod = ModuleType(name=
|
100
|
+
mymod = ModuleType(name="mymod")
|
99
101
|
mymod.add = add
|
100
102
|
del add
|
101
103
|
|
@@ -109,7 +111,7 @@ class TestDeviceFunc(CUDATestCase):
|
|
109
111
|
add_kernel[1, ary.size](ary)
|
110
112
|
np.testing.assert_equal(expect, ary)
|
111
113
|
|
112
|
-
@skip_on_cudasim(
|
114
|
+
@skip_on_cudasim("not supported in cudasim")
|
113
115
|
def test_inspect_llvm(self):
|
114
116
|
@cuda.jit(device=True)
|
115
117
|
def foo(x, y):
|
@@ -120,13 +122,13 @@ class TestDeviceFunc(CUDATestCase):
|
|
120
122
|
|
121
123
|
fname = cres.fndesc.mangled_name
|
122
124
|
# Verify that the function name has "foo" in it as in the python name
|
123
|
-
self.assertIn(
|
125
|
+
self.assertIn("foo", fname)
|
124
126
|
|
125
127
|
llvm = foo.inspect_llvm(args)
|
126
128
|
# Check that the compiled function name is in the LLVM.
|
127
129
|
self.assertIn(fname, llvm)
|
128
130
|
|
129
|
-
@skip_on_cudasim(
|
131
|
+
@skip_on_cudasim("not supported in cudasim")
|
130
132
|
def test_inspect_asm(self):
|
131
133
|
@cuda.jit(device=True)
|
132
134
|
def foo(x, y):
|
@@ -137,13 +139,13 @@ class TestDeviceFunc(CUDATestCase):
|
|
137
139
|
|
138
140
|
fname = cres.fndesc.mangled_name
|
139
141
|
# Verify that the function name has "foo" in it as in the python name
|
140
|
-
self.assertIn(
|
142
|
+
self.assertIn("foo", fname)
|
141
143
|
|
142
144
|
ptx = foo.inspect_asm(args)
|
143
145
|
# Check that the compiled function name is in the PTX
|
144
146
|
self.assertIn(fname, ptx)
|
145
147
|
|
146
|
-
@skip_on_cudasim(
|
148
|
+
@skip_on_cudasim("not supported in cudasim")
|
147
149
|
def test_inspect_sass_disallowed(self):
|
148
150
|
@cuda.jit(device=True)
|
149
151
|
def foo(x, y):
|
@@ -152,10 +154,11 @@ class TestDeviceFunc(CUDATestCase):
|
|
152
154
|
with self.assertRaises(RuntimeError) as raises:
|
153
155
|
foo.inspect_sass((int32, int32))
|
154
156
|
|
155
|
-
self.assertIn(
|
156
|
-
|
157
|
+
self.assertIn(
|
158
|
+
"Cannot inspect SASS of a device function", str(raises.exception)
|
159
|
+
)
|
157
160
|
|
158
|
-
@skip_on_cudasim(
|
161
|
+
@skip_on_cudasim("cudasim will allow calling any function")
|
159
162
|
def test_device_func_as_kernel_disallowed(self):
|
160
163
|
@cuda.jit(device=True)
|
161
164
|
def f():
|
@@ -164,10 +167,12 @@ class TestDeviceFunc(CUDATestCase):
|
|
164
167
|
with self.assertRaises(RuntimeError) as raises:
|
165
168
|
f[1, 1]()
|
166
169
|
|
167
|
-
self.assertIn(
|
168
|
-
|
170
|
+
self.assertIn(
|
171
|
+
"Cannot compile a device function as a kernel",
|
172
|
+
str(raises.exception),
|
173
|
+
)
|
169
174
|
|
170
|
-
@skip_on_cudasim(
|
175
|
+
@skip_on_cudasim("cudasim ignores casting by jit decorator signature")
|
171
176
|
def test_device_casting(self):
|
172
177
|
# Ensure that casts to the correct type are forced when calling a
|
173
178
|
# device function with a signature. This test ensures that:
|
@@ -176,20 +181,23 @@ class TestDeviceFunc(CUDATestCase):
|
|
176
181
|
# shouldn't
|
177
182
|
# - We insert a cast when calling rgba, as opposed to failing to type.
|
178
183
|
|
179
|
-
@cuda.jit(
|
184
|
+
@cuda.jit("int32(int32, int32, int32, int32)", device=True)
|
180
185
|
def rgba(r, g, b, a):
|
181
|
-
return (
|
182
|
-
|
183
|
-
|
184
|
-
|
186
|
+
return (
|
187
|
+
((r & 0xFF) << 16)
|
188
|
+
| ((g & 0xFF) << 8)
|
189
|
+
| ((b & 0xFF) << 0)
|
190
|
+
| ((a & 0xFF) << 24)
|
191
|
+
)
|
185
192
|
|
186
193
|
@cuda.jit
|
187
194
|
def rgba_caller(x, channels):
|
188
195
|
x[0] = rgba(channels[0], channels[1], channels[2], channels[3])
|
189
196
|
|
190
197
|
x = cuda.device_array(1, dtype=np.int32)
|
191
|
-
channels = cuda.to_device(
|
192
|
-
|
198
|
+
channels = cuda.to_device(
|
199
|
+
np.asarray([1.0, 2.0, 3.0, 4.0], dtype=np.float32)
|
200
|
+
)
|
193
201
|
|
194
202
|
rgba_caller[1, 1](x, channels)
|
195
203
|
|
@@ -259,32 +267,31 @@ int random_number(unsigned int *out, unsigned long long seed)
|
|
259
267
|
}""")
|
260
268
|
|
261
269
|
|
262
|
-
@skip_on_cudasim(
|
270
|
+
@skip_on_cudasim("External functions unsupported in the simulator")
|
263
271
|
class TestDeclareDevice(CUDATestCase):
|
264
|
-
|
265
272
|
def check_api(self, decl):
|
266
|
-
self.assertEqual(decl.name,
|
273
|
+
self.assertEqual(decl.name, "f1")
|
267
274
|
self.assertEqual(decl.sig.args, (float32[:],))
|
268
275
|
self.assertEqual(decl.sig.return_type, int32)
|
269
276
|
|
270
277
|
def test_declare_device_signature(self):
|
271
|
-
f1 = cuda.declare_device(
|
278
|
+
f1 = cuda.declare_device("f1", int32(float32[:]))
|
272
279
|
self.check_api(f1)
|
273
280
|
|
274
281
|
def test_declare_device_string(self):
|
275
|
-
f1 = cuda.declare_device(
|
282
|
+
f1 = cuda.declare_device("f1", "int32(float32[:])")
|
276
283
|
self.check_api(f1)
|
277
284
|
|
278
285
|
def test_bad_declare_device_tuple(self):
|
279
|
-
with self.assertRaisesRegex(TypeError,
|
280
|
-
cuda.declare_device(
|
286
|
+
with self.assertRaisesRegex(TypeError, "Return type"):
|
287
|
+
cuda.declare_device("f1", (float32[:],))
|
281
288
|
|
282
289
|
def test_bad_declare_device_string(self):
|
283
|
-
with self.assertRaisesRegex(TypeError,
|
284
|
-
cuda.declare_device(
|
290
|
+
with self.assertRaisesRegex(TypeError, "Return type"):
|
291
|
+
cuda.declare_device("f1", "(float32[:],)")
|
285
292
|
|
286
293
|
def test_link_cu_source(self):
|
287
|
-
times2 = cuda.declare_device(
|
294
|
+
times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
|
288
295
|
|
289
296
|
@cuda.jit
|
290
297
|
def kernel(r, x):
|
@@ -301,7 +308,7 @@ class TestDeclareDevice(CUDATestCase):
|
|
301
308
|
|
302
309
|
def _test_link_multiple_sources(self, link_type):
|
303
310
|
link = link_type([times2_cu, times4_cu])
|
304
|
-
times4 = cuda.declare_device(
|
311
|
+
times4 = cuda.declare_device("times4", "int32(int32)", link=link)
|
305
312
|
|
306
313
|
@cuda.jit
|
307
314
|
def kernel(r, x):
|
@@ -360,7 +367,7 @@ class TestDeclareDevice(CUDATestCase):
|
|
360
367
|
np.testing.assert_equal(x[0], 323845807)
|
361
368
|
|
362
369
|
def test_declared_in_called_function(self):
|
363
|
-
times2 = cuda.declare_device(
|
370
|
+
times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
|
364
371
|
|
365
372
|
@cuda.jit
|
366
373
|
def device_func(x):
|
@@ -380,7 +387,7 @@ class TestDeclareDevice(CUDATestCase):
|
|
380
387
|
np.testing.assert_equal(r, x * 2)
|
381
388
|
|
382
389
|
def test_declared_in_called_function_twice(self):
|
383
|
-
times2 = cuda.declare_device(
|
390
|
+
times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
|
384
391
|
|
385
392
|
@cuda.jit
|
386
393
|
def device_func_1(x):
|
@@ -404,7 +411,7 @@ class TestDeclareDevice(CUDATestCase):
|
|
404
411
|
np.testing.assert_equal(r, x * 2)
|
405
412
|
|
406
413
|
def test_declared_in_called_function_two_calls(self):
|
407
|
-
times2 = cuda.declare_device(
|
414
|
+
times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
|
408
415
|
|
409
416
|
@cuda.jit
|
410
417
|
def device_func(x):
|
@@ -424,7 +431,7 @@ class TestDeclareDevice(CUDATestCase):
|
|
424
431
|
np.testing.assert_equal(r, x * 6)
|
425
432
|
|
426
433
|
def test_call_declared_function_twice(self):
|
427
|
-
times2 = cuda.declare_device(
|
434
|
+
times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
|
428
435
|
|
429
436
|
@cuda.jit
|
430
437
|
def kernel(r, x):
|
@@ -440,7 +447,7 @@ class TestDeclareDevice(CUDATestCase):
|
|
440
447
|
np.testing.assert_equal(r, x * 6)
|
441
448
|
|
442
449
|
def test_declared_in_called_function_and_parent(self):
|
443
|
-
times2 = cuda.declare_device(
|
450
|
+
times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
|
444
451
|
|
445
452
|
@cuda.jit
|
446
453
|
def device_func(x):
|
@@ -460,8 +467,8 @@ class TestDeclareDevice(CUDATestCase):
|
|
460
467
|
np.testing.assert_equal(r, x * 4)
|
461
468
|
|
462
469
|
def test_call_two_different_declared_functions(self):
|
463
|
-
times2 = cuda.declare_device(
|
464
|
-
times3 = cuda.declare_device(
|
470
|
+
times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
|
471
|
+
times3 = cuda.declare_device("times3", "int32(int32)", link=times3_cu)
|
465
472
|
|
466
473
|
@cuda.jit
|
467
474
|
def kernel(r, x):
|
@@ -477,5 +484,5 @@ class TestDeclareDevice(CUDATestCase):
|
|
477
484
|
np.testing.assert_equal(r, x * 5)
|
478
485
|
|
479
486
|
|
480
|
-
if __name__ ==
|
487
|
+
if __name__ == "__main__":
|
481
488
|
unittest.main()
|
@@ -15,19 +15,18 @@ def add_kernel(r, x, y):
|
|
15
15
|
r[0] = x + y
|
16
16
|
|
17
17
|
|
18
|
-
@skip_on_cudasim(
|
18
|
+
@skip_on_cudasim("Specialization not implemented in the simulator")
|
19
19
|
class TestDispatcherSpecialization(CUDATestCase):
|
20
20
|
def _test_no_double_specialize(self, dispatcher, ty):
|
21
|
-
|
22
21
|
with self.assertRaises(RuntimeError) as e:
|
23
22
|
dispatcher.specialize(ty)
|
24
23
|
|
25
|
-
self.assertIn(
|
24
|
+
self.assertIn("Dispatcher already specialized", str(e.exception))
|
26
25
|
|
27
26
|
def test_no_double_specialize_sig_same_types(self):
|
28
27
|
# Attempting to specialize a kernel jitted with a signature is illegal,
|
29
28
|
# even for the same types the kernel is already specialized for.
|
30
|
-
@cuda.jit(
|
29
|
+
@cuda.jit("void(float32[::1])")
|
31
30
|
def f(x):
|
32
31
|
pass
|
33
32
|
|
@@ -45,7 +44,7 @@ class TestDispatcherSpecialization(CUDATestCase):
|
|
45
44
|
|
46
45
|
def test_no_double_specialize_sig_diff_types(self):
|
47
46
|
# Attempting to specialize a kernel jitted with a signature is illegal.
|
48
|
-
@cuda.jit(
|
47
|
+
@cuda.jit("void(int32[::1])")
|
49
48
|
def f(x):
|
50
49
|
pass
|
51
50
|
|
@@ -132,13 +131,13 @@ class TestDispatcher(CUDATestCase):
|
|
132
131
|
self.assertEqual(r[0], add(12300000000, 456))
|
133
132
|
|
134
133
|
# Now force compilation of only a single specialization
|
135
|
-
c_add = cuda.jit(
|
134
|
+
c_add = cuda.jit("(i4[::1], i4, i4)")(add_kernel)
|
136
135
|
r = np.zeros(1, dtype=np.int32)
|
137
136
|
|
138
137
|
c_add[1, 1](r, 123, 456)
|
139
138
|
self.assertPreciseEqual(r[0], add(123, 456))
|
140
139
|
|
141
|
-
@skip_on_cudasim(
|
140
|
+
@skip_on_cudasim("Simulator ignores signature")
|
142
141
|
@unittest.expectedFailure
|
143
142
|
def test_coerce_input_types_unsafe(self):
|
144
143
|
# Implicit (unsafe) conversion of float to int, originally from
|
@@ -149,25 +148,24 @@ class TestDispatcher(CUDATestCase):
|
|
149
148
|
#
|
150
149
|
# This test is marked as xfail until future changes enable this
|
151
150
|
# behavior.
|
152
|
-
c_add = cuda.jit(
|
151
|
+
c_add = cuda.jit("(i4[::1], i4, i4)")(add_kernel)
|
153
152
|
r = np.zeros(1, dtype=np.int32)
|
154
153
|
|
155
154
|
c_add[1, 1](r, 12.3, 45.6)
|
156
155
|
self.assertPreciseEqual(r[0], add(12, 45))
|
157
156
|
|
158
|
-
@skip_on_cudasim(
|
157
|
+
@skip_on_cudasim("Simulator ignores signature")
|
159
158
|
def test_coerce_input_types_unsafe_complex(self):
|
160
159
|
# Implicit conversion of complex to int disallowed
|
161
|
-
c_add = cuda.jit(
|
160
|
+
c_add = cuda.jit("(i4[::1], i4, i4)")(add_kernel)
|
162
161
|
r = np.zeros(1, dtype=np.int32)
|
163
162
|
|
164
163
|
with self.assertRaises(TypeError):
|
165
164
|
c_add[1, 1](r, 12.3, 45.6j)
|
166
165
|
|
167
|
-
@skip_on_cudasim(
|
166
|
+
@skip_on_cudasim("Simulator does not track overloads")
|
168
167
|
def test_ambiguous_new_version(self):
|
169
|
-
"""Test compiling new version in an ambiguous case
|
170
|
-
"""
|
168
|
+
"""Test compiling new version in an ambiguous case"""
|
171
169
|
c_add = cuda.jit(add_kernel)
|
172
170
|
|
173
171
|
r = np.zeros(1, dtype=np.float64)
|
@@ -190,8 +188,9 @@ class TestDispatcher(CUDATestCase):
|
|
190
188
|
# to (float, int) or (int, float) with equal weight.
|
191
189
|
c_add[1, 1](r, 1, 1)
|
192
190
|
self.assertAlmostEqual(r[0], INT + INT)
|
193
|
-
self.assertEqual(
|
194
|
-
|
191
|
+
self.assertEqual(
|
192
|
+
len(c_add.overloads), 4, "didn't compile a new version"
|
193
|
+
)
|
195
194
|
|
196
195
|
@skip_on_cudasim("Simulator doesn't support concurrent kernels")
|
197
196
|
def test_lock(self):
|
@@ -245,8 +244,10 @@ class TestDispatcher(CUDATestCase):
|
|
245
244
|
|
246
245
|
def test_explicit_signatures_strings(self):
|
247
246
|
# Check with a list of strings for signatures
|
248
|
-
sigs = [
|
249
|
-
|
247
|
+
sigs = [
|
248
|
+
"(int64[::1], int64, int64)",
|
249
|
+
"(float64[::1], float64, float64)",
|
250
|
+
]
|
250
251
|
self._test_explicit_signatures(sigs)
|
251
252
|
|
252
253
|
def test_explicit_signatures_tuples(self):
|
@@ -256,26 +257,31 @@ class TestDispatcher(CUDATestCase):
|
|
256
257
|
|
257
258
|
def test_explicit_signatures_signatures(self):
|
258
259
|
# Check with a list of Signature objects for signatures
|
259
|
-
sigs = [
|
260
|
-
|
260
|
+
sigs = [
|
261
|
+
void(int64[::1], int64, int64),
|
262
|
+
void(float64[::1], float64, float64),
|
263
|
+
]
|
261
264
|
self._test_explicit_signatures(sigs)
|
262
265
|
|
263
266
|
def test_explicit_signatures_mixed(self):
|
264
267
|
# Check when we mix types of signature objects in a list of signatures
|
265
268
|
|
266
269
|
# Tuple and string
|
267
|
-
sigs = [(int64[::1], int64, int64),
|
268
|
-
"(float64[::1], float64, float64)"]
|
270
|
+
sigs = [(int64[::1], int64, int64), "(float64[::1], float64, float64)"]
|
269
271
|
self._test_explicit_signatures(sigs)
|
270
272
|
|
271
273
|
# Tuple and Signature object
|
272
|
-
sigs = [
|
273
|
-
|
274
|
+
sigs = [
|
275
|
+
(int64[::1], int64, int64),
|
276
|
+
void(float64[::1], float64, float64),
|
277
|
+
]
|
274
278
|
self._test_explicit_signatures(sigs)
|
275
279
|
|
276
280
|
# Signature object and string
|
277
|
-
sigs = [
|
278
|
-
|
281
|
+
sigs = [
|
282
|
+
void(int64[::1], int64, int64),
|
283
|
+
"(float64[::1], float64, float64)",
|
284
|
+
]
|
279
285
|
self._test_explicit_signatures(sigs)
|
280
286
|
|
281
287
|
def test_explicit_signatures_same_type_class(self):
|
@@ -284,8 +290,10 @@ class TestDispatcher(CUDATestCase):
|
|
284
290
|
# that dispatch is differentiated on the types of x and y only, to
|
285
291
|
# closely preserve the intent of the original test from
|
286
292
|
# numba.tests.test_dispatcher)
|
287
|
-
sigs = [
|
288
|
-
|
293
|
+
sigs = [
|
294
|
+
"(float64[::1], float32, float32)",
|
295
|
+
"(float64[::1], float64, float64)",
|
296
|
+
]
|
289
297
|
f = cuda.jit(sigs)(add_kernel)
|
290
298
|
|
291
299
|
r = np.zeros(1, dtype=np.float64)
|
@@ -296,13 +304,17 @@ class TestDispatcher(CUDATestCase):
|
|
296
304
|
f[1, 1](r, 1, 2**-25)
|
297
305
|
self.assertPreciseEqual(r[0], 1.0000000298023224)
|
298
306
|
|
299
|
-
@skip_on_cudasim(
|
307
|
+
@skip_on_cudasim("No overload resolution in the simulator")
|
300
308
|
def test_explicit_signatures_ambiguous_resolution(self):
|
301
309
|
# Fail to resolve ambiguity between the two best overloads
|
302
310
|
# (Also deliberate float64[::1] for the first argument in all cases)
|
303
|
-
f = cuda.jit(
|
304
|
-
|
305
|
-
|
311
|
+
f = cuda.jit(
|
312
|
+
[
|
313
|
+
"(float64[::1], float32, float64)",
|
314
|
+
"(float64[::1], float64, float32)",
|
315
|
+
"(float64[::1], int64, int64)",
|
316
|
+
]
|
317
|
+
)(add_kernel)
|
306
318
|
with self.assertRaises(TypeError) as cm:
|
307
319
|
r = np.zeros(1, dtype=np.float64)
|
308
320
|
f[1, 1](r, 1.0, 2.0)
|
@@ -317,12 +329,12 @@ class TestDispatcher(CUDATestCase):
|
|
317
329
|
r"\(Array\(float64, 1, 'C', False, aligned=True\), float32,"
|
318
330
|
r" float64\) -> none\n"
|
319
331
|
r"\(Array\(float64, 1, 'C', False, aligned=True\), float64,"
|
320
|
-
r" float32\) -> none"
|
332
|
+
r" float32\) -> none",
|
321
333
|
)
|
322
334
|
# The integer signature is not part of the best matches
|
323
335
|
self.assertNotIn("int64", str(cm.exception))
|
324
336
|
|
325
|
-
@skip_on_cudasim(
|
337
|
+
@skip_on_cudasim("Simulator does not use _prepare_args")
|
326
338
|
@unittest.expectedFailure
|
327
339
|
def test_explicit_signatures_unsafe(self):
|
328
340
|
# These tests are from test_explicit_signatures, but have to be xfail
|
@@ -336,8 +348,10 @@ class TestDispatcher(CUDATestCase):
|
|
336
348
|
self.assertPreciseEqual(r[0], 3)
|
337
349
|
self.assertEqual(len(f.overloads), 1, f.overloads)
|
338
350
|
|
339
|
-
sigs = [
|
340
|
-
|
351
|
+
sigs = [
|
352
|
+
"(int64[::1], int64, int64)",
|
353
|
+
"(float64[::1], float64, float64)",
|
354
|
+
]
|
341
355
|
f = cuda.jit(sigs)(add_kernel)
|
342
356
|
r = np.zeros(1, dtype=np.float64)
|
343
357
|
# Approximate match (int32 -> float64 is a safe conversion)
|
@@ -414,7 +428,7 @@ class TestDispatcher(CUDATestCase):
|
|
414
428
|
f[1, 1](r, 1.5, 2.5)
|
415
429
|
self.assertPreciseEqual(r[0], 4.0)
|
416
430
|
|
417
|
-
@skip_on_cudasim(
|
431
|
+
@skip_on_cudasim("CUDA Simulator does not force casting")
|
418
432
|
def test_explicit_signatures_device_unsafe(self):
|
419
433
|
# These tests are from test_explicit_signatures. The device function
|
420
434
|
# variant of these tests can succeed on CUDA because the compilation
|
@@ -489,17 +503,15 @@ class TestDispatcherKernelProperties(CUDATestCase):
|
|
489
503
|
# provides the same values as getting the registers per thread for
|
490
504
|
# individual signatures.
|
491
505
|
regs_per_thread_all = pi_sin_array.get_regs_per_thread()
|
492
|
-
self.assertEqual(regs_per_thread_all[sig_f32.args],
|
493
|
-
|
494
|
-
self.assertEqual(regs_per_thread_all[sig_f64.args],
|
495
|
-
regs_per_thread_f64)
|
506
|
+
self.assertEqual(regs_per_thread_all[sig_f32.args], regs_per_thread_f32)
|
507
|
+
self.assertEqual(regs_per_thread_all[sig_f64.args], regs_per_thread_f64)
|
496
508
|
|
497
509
|
if regs_per_thread_f32 == regs_per_thread_f64:
|
498
510
|
# If the register usage is the same for both variants, there may be
|
499
511
|
# a bug, but this may also be an artifact of the compiler / driver
|
500
512
|
# / device combination, so produce an informational message only.
|
501
|
-
print(
|
502
|
-
print(
|
513
|
+
print("f32 and f64 variant thread usages are equal.")
|
514
|
+
print("This may warrant some investigation. Devices:")
|
503
515
|
cuda.detect()
|
504
516
|
|
505
517
|
def test_get_regs_per_thread_specialized(self):
|
@@ -696,5 +708,5 @@ class TestDispatcherKernelProperties(CUDATestCase):
|
|
696
708
|
self.assertGreaterEqual(local_mem_per_thread, N * 4)
|
697
709
|
|
698
710
|
|
699
|
-
if __name__ ==
|
711
|
+
if __name__ == "__main__":
|
700
712
|
unittest.main()
|