numba-cuda 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +246 -114
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
- numba_cuda/numba/cuda/cuda_paths.py +293 -99
- numba_cuda/numba/cuda/cudadecl.py +93 -79
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +27 -3
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +146 -30
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +296 -275
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +99 -7
- numba_cuda/numba/cuda/decorators.py +87 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +68 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +55 -1
- numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
- numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
- numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +1090 -927
- numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +468 -319
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
- numba_cuda/numba/cuda/intrinsics.py +203 -28
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/lowering.py +43 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +134 -108
- numba_cuda/numba/cuda/target.py +92 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +10 -7
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +59 -23
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +77 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +24 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +21 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +81 -30
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +5 -3
- numba_cuda/numba/cuda/vectorizers.py +38 -33
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
- numba_cuda-0.10.0.dist-info/RECORD +263 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.1.dist-info/RECORD +0 -251
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
from numba.tests.support import
|
1
|
+
from numba.tests.support import override_config, captured_stdout
|
2
2
|
from numba.cuda.testing import skip_on_cudasim
|
3
3
|
from numba import cuda
|
4
4
|
from numba.core import types
|
@@ -8,7 +8,7 @@ import re
|
|
8
8
|
import unittest
|
9
9
|
|
10
10
|
|
11
|
-
@skip_on_cudasim(
|
11
|
+
@skip_on_cudasim("Simulator does not produce debug dumps")
|
12
12
|
class TestCudaDebugInfo(CUDATestCase):
|
13
13
|
"""
|
14
14
|
These tests only checks the compiled PTX for debuginfo section
|
@@ -49,7 +49,7 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
49
49
|
self._check(foo, sig=(types.int32[:],), expect=True)
|
50
50
|
|
51
51
|
def test_environment_override(self):
|
52
|
-
with override_config(
|
52
|
+
with override_config("CUDA_DEBUGINFO_DEFAULT", 1):
|
53
53
|
# Using default value
|
54
54
|
@cuda.jit(opt=False)
|
55
55
|
def foo(x):
|
@@ -86,7 +86,7 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
86
86
|
|
87
87
|
llvm_ir = f.inspect_llvm(sig)
|
88
88
|
# A varible name starting with "bool" in the debug metadata
|
89
|
-
pat = r
|
89
|
+
pat = r"!DILocalVariable\(.*name:\s+\"bool"
|
90
90
|
match = re.compile(pat).search(llvm_ir)
|
91
91
|
self.assertIsNone(match, msg=llvm_ir)
|
92
92
|
|
@@ -106,7 +106,7 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
106
106
|
mdnode_id = match.group(1)
|
107
107
|
|
108
108
|
# verify the DIBasicType has correct encoding attribute DW_ATE_boolean
|
109
|
-
pat = rf
|
109
|
+
pat = rf"!{mdnode_id}\s+=\s+!DIBasicType\(.*DW_ATE_boolean"
|
110
110
|
match = re.compile(pat).search(llvm_ir)
|
111
111
|
self.assertIsNotNone(match, msg=llvm_ir)
|
112
112
|
|
@@ -133,14 +133,17 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
133
133
|
|
134
134
|
llvm_ir = f.inspect_llvm(sig)
|
135
135
|
|
136
|
-
defines = [
|
137
|
-
|
136
|
+
defines = [
|
137
|
+
line
|
138
|
+
for line in llvm_ir.splitlines()
|
139
|
+
if 'define void @"_ZN6cudapy' in line
|
140
|
+
]
|
138
141
|
|
139
142
|
# Make sure we only found one definition
|
140
143
|
self.assertEqual(len(defines), 1)
|
141
144
|
|
142
145
|
wrapper_define = defines[0]
|
143
|
-
self.assertIn(
|
146
|
+
self.assertIn("!dbg", wrapper_define)
|
144
147
|
|
145
148
|
def test_debug_function_calls_internal_impl(self):
|
146
149
|
# Calling a function in a module generated from an implementation
|
@@ -198,16 +201,16 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
198
201
|
debug_opts = itertools.product(*[(True, False)] * 3)
|
199
202
|
|
200
203
|
for kernel_debug, f1_debug, f2_debug in debug_opts:
|
201
|
-
with self.subTest(
|
202
|
-
|
203
|
-
|
204
|
-
self._test_chained_device_function(
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
def _test_chained_device_function_two_calls(
|
209
|
-
|
210
|
-
|
204
|
+
with self.subTest(
|
205
|
+
kernel_debug=kernel_debug, f1_debug=f1_debug, f2_debug=f2_debug
|
206
|
+
):
|
207
|
+
self._test_chained_device_function(
|
208
|
+
kernel_debug, f1_debug, f2_debug
|
209
|
+
)
|
210
|
+
|
211
|
+
def _test_chained_device_function_two_calls(
|
212
|
+
self, kernel_debug, f1_debug, f2_debug
|
213
|
+
):
|
211
214
|
@cuda.jit(device=True, debug=f2_debug, opt=False)
|
212
215
|
def f2(x):
|
213
216
|
return x + 1
|
@@ -232,12 +235,12 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
232
235
|
debug_opts = itertools.product(*[(True, False)] * 3)
|
233
236
|
|
234
237
|
for kernel_debug, f1_debug, f2_debug in debug_opts:
|
235
|
-
with self.subTest(
|
236
|
-
|
237
|
-
|
238
|
-
self._test_chained_device_function_two_calls(
|
239
|
-
|
240
|
-
|
238
|
+
with self.subTest(
|
239
|
+
kernel_debug=kernel_debug, f1_debug=f1_debug, f2_debug=f2_debug
|
240
|
+
):
|
241
|
+
self._test_chained_device_function_two_calls(
|
242
|
+
kernel_debug, f1_debug, f2_debug
|
243
|
+
)
|
241
244
|
|
242
245
|
def test_chained_device_three_functions(self):
|
243
246
|
# Like test_chained_device_function, but with enough functions (three)
|
@@ -278,13 +281,13 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
278
281
|
llvm_ir = f.inspect_llvm(sig)
|
279
282
|
|
280
283
|
# extract the metadata node id from `types` field of DISubroutineType
|
281
|
-
pat = r
|
284
|
+
pat = r"!DISubroutineType\(types:\s+!(\d+)\)"
|
282
285
|
match = re.compile(pat).search(llvm_ir)
|
283
286
|
self.assertIsNotNone(match, msg=llvm_ir)
|
284
287
|
mdnode_id = match.group(1)
|
285
288
|
|
286
289
|
# extract the metadata node ids from the flexible node of types
|
287
|
-
pat = rf
|
290
|
+
pat = rf"!{mdnode_id}\s+=\s+!{{\s+!(\d+),\s+!(\d+)\s+}}"
|
288
291
|
match = re.compile(pat).search(llvm_ir)
|
289
292
|
self.assertIsNotNone(match, msg=llvm_ir)
|
290
293
|
mdnode_id1 = match.group(1)
|
@@ -303,10 +306,56 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
303
306
|
|
304
307
|
def test_kernel_args_types_dump(self):
|
305
308
|
# see issue#135
|
306
|
-
with override_config(
|
309
|
+
with override_config("DUMP_LLVM", 1):
|
307
310
|
with captured_stdout():
|
308
311
|
self._test_kernel_args_types()
|
309
312
|
|
313
|
+
def test_llvm_dbg_value(self):
|
314
|
+
sig = (types.int32, types.int32)
|
315
|
+
|
316
|
+
@cuda.jit("void(int32, int32)", debug=True, opt=False)
|
317
|
+
def f(x, y):
|
318
|
+
z = x # noqa: F841
|
319
|
+
z = 100 # noqa: F841
|
320
|
+
z = y # noqa: F841
|
321
|
+
z = True # noqa: F841
|
322
|
+
|
323
|
+
llvm_ir = f.inspect_llvm(sig)
|
324
|
+
# Verify the call to llvm.dbg.declare is replaced by llvm.dbg.value
|
325
|
+
pat1 = r'call void @"llvm.dbg.declare"'
|
326
|
+
match = re.compile(pat1).search(llvm_ir)
|
327
|
+
self.assertIsNone(match, msg=llvm_ir)
|
328
|
+
pat2 = r'call void @"llvm.dbg.value"'
|
329
|
+
match = re.compile(pat2).search(llvm_ir)
|
330
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
331
|
+
|
332
|
+
def test_no_user_var_alias(self):
|
333
|
+
sig = (types.int32, types.int32)
|
334
|
+
|
335
|
+
@cuda.jit("void(int32, int32)", debug=True, opt=False)
|
336
|
+
def f(x, y):
|
337
|
+
z = x # noqa: F841
|
338
|
+
z = y # noqa: F841
|
339
|
+
|
340
|
+
llvm_ir = f.inspect_llvm(sig)
|
341
|
+
pat = r'!DILocalVariable.*name:\s+"z\$1".*'
|
342
|
+
match = re.compile(pat).search(llvm_ir)
|
343
|
+
self.assertIsNone(match, msg=llvm_ir)
|
344
|
+
|
345
|
+
def test_no_literal_type(self):
|
346
|
+
sig = (types.int32,)
|
347
|
+
|
348
|
+
@cuda.jit("void(int32)", debug=True, opt=False)
|
349
|
+
def f(x):
|
350
|
+
z = x # noqa: F841
|
351
|
+
z = 100 # noqa: F841
|
352
|
+
z = True # noqa: F841
|
353
|
+
|
354
|
+
llvm_ir = f.inspect_llvm(sig)
|
355
|
+
pat = r'!DIBasicType.*name:\s+"Literal.*'
|
356
|
+
match = re.compile(pat).search(llvm_ir)
|
357
|
+
self.assertIsNone(match, msg=llvm_ir)
|
358
|
+
|
310
359
|
|
311
|
-
if __name__ ==
|
360
|
+
if __name__ == "__main__":
|
312
361
|
unittest.main()
|
@@ -3,8 +3,13 @@ import cffi
|
|
3
3
|
|
4
4
|
import numpy as np
|
5
5
|
|
6
|
-
from numba.cuda.testing import (
|
7
|
-
|
6
|
+
from numba.cuda.testing import (
|
7
|
+
skip_if_curand_kernel_missing,
|
8
|
+
skip_on_cudasim,
|
9
|
+
test_data_dir,
|
10
|
+
unittest,
|
11
|
+
CUDATestCase,
|
12
|
+
)
|
8
13
|
from numba import cuda, jit, float32, int32, types
|
9
14
|
from numba.core.errors import TypingError
|
10
15
|
from numba.tests.support import skip_unless_cffi
|
@@ -12,9 +17,7 @@ from types import ModuleType
|
|
12
17
|
|
13
18
|
|
14
19
|
class TestDeviceFunc(CUDATestCase):
|
15
|
-
|
16
20
|
def test_use_add2f(self):
|
17
|
-
|
18
21
|
@cuda.jit("float32(float32, float32)", device=True)
|
19
22
|
def add2f(a, b):
|
20
23
|
return a + b
|
@@ -33,7 +36,6 @@ class TestDeviceFunc(CUDATestCase):
|
|
33
36
|
self.assertTrue(np.all(ary == exp), (ary, exp))
|
34
37
|
|
35
38
|
def test_indirect_add2f(self):
|
36
|
-
|
37
39
|
@cuda.jit("float32(float32, float32)", device=True)
|
38
40
|
def add2f(a, b):
|
39
41
|
return a + b
|
@@ -74,12 +76,12 @@ class TestDeviceFunc(CUDATestCase):
|
|
74
76
|
|
75
77
|
self._check_cpu_dispatcher(add)
|
76
78
|
|
77
|
-
@skip_on_cudasim(
|
79
|
+
@skip_on_cudasim("not supported in cudasim")
|
78
80
|
def test_cpu_dispatcher_invalid(self):
|
79
81
|
# Test invalid usage
|
80
82
|
# Explicit signature disables compilation, which also disable
|
81
83
|
# compiling on CUDA.
|
82
|
-
@jit(
|
84
|
+
@jit("(i4, i4)")
|
83
85
|
def add(a, b):
|
84
86
|
return a + b
|
85
87
|
|
@@ -95,7 +97,7 @@ class TestDeviceFunc(CUDATestCase):
|
|
95
97
|
def add(a, b):
|
96
98
|
return a + b
|
97
99
|
|
98
|
-
mymod = ModuleType(name=
|
100
|
+
mymod = ModuleType(name="mymod")
|
99
101
|
mymod.add = add
|
100
102
|
del add
|
101
103
|
|
@@ -109,7 +111,7 @@ class TestDeviceFunc(CUDATestCase):
|
|
109
111
|
add_kernel[1, ary.size](ary)
|
110
112
|
np.testing.assert_equal(expect, ary)
|
111
113
|
|
112
|
-
@skip_on_cudasim(
|
114
|
+
@skip_on_cudasim("not supported in cudasim")
|
113
115
|
def test_inspect_llvm(self):
|
114
116
|
@cuda.jit(device=True)
|
115
117
|
def foo(x, y):
|
@@ -120,13 +122,13 @@ class TestDeviceFunc(CUDATestCase):
|
|
120
122
|
|
121
123
|
fname = cres.fndesc.mangled_name
|
122
124
|
# Verify that the function name has "foo" in it as in the python name
|
123
|
-
self.assertIn(
|
125
|
+
self.assertIn("foo", fname)
|
124
126
|
|
125
127
|
llvm = foo.inspect_llvm(args)
|
126
128
|
# Check that the compiled function name is in the LLVM.
|
127
129
|
self.assertIn(fname, llvm)
|
128
130
|
|
129
|
-
@skip_on_cudasim(
|
131
|
+
@skip_on_cudasim("not supported in cudasim")
|
130
132
|
def test_inspect_asm(self):
|
131
133
|
@cuda.jit(device=True)
|
132
134
|
def foo(x, y):
|
@@ -137,13 +139,13 @@ class TestDeviceFunc(CUDATestCase):
|
|
137
139
|
|
138
140
|
fname = cres.fndesc.mangled_name
|
139
141
|
# Verify that the function name has "foo" in it as in the python name
|
140
|
-
self.assertIn(
|
142
|
+
self.assertIn("foo", fname)
|
141
143
|
|
142
144
|
ptx = foo.inspect_asm(args)
|
143
145
|
# Check that the compiled function name is in the PTX
|
144
146
|
self.assertIn(fname, ptx)
|
145
147
|
|
146
|
-
@skip_on_cudasim(
|
148
|
+
@skip_on_cudasim("not supported in cudasim")
|
147
149
|
def test_inspect_sass_disallowed(self):
|
148
150
|
@cuda.jit(device=True)
|
149
151
|
def foo(x, y):
|
@@ -152,10 +154,11 @@ class TestDeviceFunc(CUDATestCase):
|
|
152
154
|
with self.assertRaises(RuntimeError) as raises:
|
153
155
|
foo.inspect_sass((int32, int32))
|
154
156
|
|
155
|
-
self.assertIn(
|
156
|
-
|
157
|
+
self.assertIn(
|
158
|
+
"Cannot inspect SASS of a device function", str(raises.exception)
|
159
|
+
)
|
157
160
|
|
158
|
-
@skip_on_cudasim(
|
161
|
+
@skip_on_cudasim("cudasim will allow calling any function")
|
159
162
|
def test_device_func_as_kernel_disallowed(self):
|
160
163
|
@cuda.jit(device=True)
|
161
164
|
def f():
|
@@ -164,10 +167,12 @@ class TestDeviceFunc(CUDATestCase):
|
|
164
167
|
with self.assertRaises(RuntimeError) as raises:
|
165
168
|
f[1, 1]()
|
166
169
|
|
167
|
-
self.assertIn(
|
168
|
-
|
170
|
+
self.assertIn(
|
171
|
+
"Cannot compile a device function as a kernel",
|
172
|
+
str(raises.exception),
|
173
|
+
)
|
169
174
|
|
170
|
-
@skip_on_cudasim(
|
175
|
+
@skip_on_cudasim("cudasim ignores casting by jit decorator signature")
|
171
176
|
def test_device_casting(self):
|
172
177
|
# Ensure that casts to the correct type are forced when calling a
|
173
178
|
# device function with a signature. This test ensures that:
|
@@ -176,20 +181,23 @@ class TestDeviceFunc(CUDATestCase):
|
|
176
181
|
# shouldn't
|
177
182
|
# - We insert a cast when calling rgba, as opposed to failing to type.
|
178
183
|
|
179
|
-
@cuda.jit(
|
184
|
+
@cuda.jit("int32(int32, int32, int32, int32)", device=True)
|
180
185
|
def rgba(r, g, b, a):
|
181
|
-
return (
|
182
|
-
|
183
|
-
|
184
|
-
|
186
|
+
return (
|
187
|
+
((r & 0xFF) << 16)
|
188
|
+
| ((g & 0xFF) << 8)
|
189
|
+
| ((b & 0xFF) << 0)
|
190
|
+
| ((a & 0xFF) << 24)
|
191
|
+
)
|
185
192
|
|
186
193
|
@cuda.jit
|
187
194
|
def rgba_caller(x, channels):
|
188
195
|
x[0] = rgba(channels[0], channels[1], channels[2], channels[3])
|
189
196
|
|
190
197
|
x = cuda.device_array(1, dtype=np.int32)
|
191
|
-
channels = cuda.to_device(
|
192
|
-
|
198
|
+
channels = cuda.to_device(
|
199
|
+
np.asarray([1.0, 2.0, 3.0, 4.0], dtype=np.float32)
|
200
|
+
)
|
193
201
|
|
194
202
|
rgba_caller[1, 1](x, channels)
|
195
203
|
|
@@ -259,32 +267,31 @@ int random_number(unsigned int *out, unsigned long long seed)
|
|
259
267
|
}""")
|
260
268
|
|
261
269
|
|
262
|
-
@skip_on_cudasim(
|
270
|
+
@skip_on_cudasim("External functions unsupported in the simulator")
|
263
271
|
class TestDeclareDevice(CUDATestCase):
|
264
|
-
|
265
272
|
def check_api(self, decl):
|
266
|
-
self.assertEqual(decl.name,
|
273
|
+
self.assertEqual(decl.name, "f1")
|
267
274
|
self.assertEqual(decl.sig.args, (float32[:],))
|
268
275
|
self.assertEqual(decl.sig.return_type, int32)
|
269
276
|
|
270
277
|
def test_declare_device_signature(self):
|
271
|
-
f1 = cuda.declare_device(
|
278
|
+
f1 = cuda.declare_device("f1", int32(float32[:]))
|
272
279
|
self.check_api(f1)
|
273
280
|
|
274
281
|
def test_declare_device_string(self):
|
275
|
-
f1 = cuda.declare_device(
|
282
|
+
f1 = cuda.declare_device("f1", "int32(float32[:])")
|
276
283
|
self.check_api(f1)
|
277
284
|
|
278
285
|
def test_bad_declare_device_tuple(self):
|
279
|
-
with self.assertRaisesRegex(TypeError,
|
280
|
-
cuda.declare_device(
|
286
|
+
with self.assertRaisesRegex(TypeError, "Return type"):
|
287
|
+
cuda.declare_device("f1", (float32[:],))
|
281
288
|
|
282
289
|
def test_bad_declare_device_string(self):
|
283
|
-
with self.assertRaisesRegex(TypeError,
|
284
|
-
cuda.declare_device(
|
290
|
+
with self.assertRaisesRegex(TypeError, "Return type"):
|
291
|
+
cuda.declare_device("f1", "(float32[:],)")
|
285
292
|
|
286
293
|
def test_link_cu_source(self):
|
287
|
-
times2 = cuda.declare_device(
|
294
|
+
times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
|
288
295
|
|
289
296
|
@cuda.jit
|
290
297
|
def kernel(r, x):
|
@@ -301,7 +308,7 @@ class TestDeclareDevice(CUDATestCase):
|
|
301
308
|
|
302
309
|
def _test_link_multiple_sources(self, link_type):
|
303
310
|
link = link_type([times2_cu, times4_cu])
|
304
|
-
times4 = cuda.declare_device(
|
311
|
+
times4 = cuda.declare_device("times4", "int32(int32)", link=link)
|
305
312
|
|
306
313
|
@cuda.jit
|
307
314
|
def kernel(r, x):
|
@@ -360,7 +367,7 @@ class TestDeclareDevice(CUDATestCase):
|
|
360
367
|
np.testing.assert_equal(x[0], 323845807)
|
361
368
|
|
362
369
|
def test_declared_in_called_function(self):
|
363
|
-
times2 = cuda.declare_device(
|
370
|
+
times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
|
364
371
|
|
365
372
|
@cuda.jit
|
366
373
|
def device_func(x):
|
@@ -380,7 +387,7 @@ class TestDeclareDevice(CUDATestCase):
|
|
380
387
|
np.testing.assert_equal(r, x * 2)
|
381
388
|
|
382
389
|
def test_declared_in_called_function_twice(self):
|
383
|
-
times2 = cuda.declare_device(
|
390
|
+
times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
|
384
391
|
|
385
392
|
@cuda.jit
|
386
393
|
def device_func_1(x):
|
@@ -404,7 +411,7 @@ class TestDeclareDevice(CUDATestCase):
|
|
404
411
|
np.testing.assert_equal(r, x * 2)
|
405
412
|
|
406
413
|
def test_declared_in_called_function_two_calls(self):
|
407
|
-
times2 = cuda.declare_device(
|
414
|
+
times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
|
408
415
|
|
409
416
|
@cuda.jit
|
410
417
|
def device_func(x):
|
@@ -424,7 +431,7 @@ class TestDeclareDevice(CUDATestCase):
|
|
424
431
|
np.testing.assert_equal(r, x * 6)
|
425
432
|
|
426
433
|
def test_call_declared_function_twice(self):
|
427
|
-
times2 = cuda.declare_device(
|
434
|
+
times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
|
428
435
|
|
429
436
|
@cuda.jit
|
430
437
|
def kernel(r, x):
|
@@ -440,7 +447,7 @@ class TestDeclareDevice(CUDATestCase):
|
|
440
447
|
np.testing.assert_equal(r, x * 6)
|
441
448
|
|
442
449
|
def test_declared_in_called_function_and_parent(self):
|
443
|
-
times2 = cuda.declare_device(
|
450
|
+
times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
|
444
451
|
|
445
452
|
@cuda.jit
|
446
453
|
def device_func(x):
|
@@ -460,8 +467,8 @@ class TestDeclareDevice(CUDATestCase):
|
|
460
467
|
np.testing.assert_equal(r, x * 4)
|
461
468
|
|
462
469
|
def test_call_two_different_declared_functions(self):
|
463
|
-
times2 = cuda.declare_device(
|
464
|
-
times3 = cuda.declare_device(
|
470
|
+
times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
|
471
|
+
times3 = cuda.declare_device("times3", "int32(int32)", link=times3_cu)
|
465
472
|
|
466
473
|
@cuda.jit
|
467
474
|
def kernel(r, x):
|
@@ -477,5 +484,5 @@ class TestDeclareDevice(CUDATestCase):
|
|
477
484
|
np.testing.assert_equal(r, x * 5)
|
478
485
|
|
479
486
|
|
480
|
-
if __name__ ==
|
487
|
+
if __name__ == "__main__":
|
481
488
|
unittest.main()
|