numba-cuda 0.0.0__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +1 -0
- _numba_cuda_redirector.py +74 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +5 -0
- numba_cuda/_version.py +19 -0
- numba_cuda/numba/cuda/__init__.py +22 -0
- numba_cuda/numba/cuda/api.py +526 -0
- numba_cuda/numba/cuda/api_util.py +30 -0
- numba_cuda/numba/cuda/args.py +77 -0
- numba_cuda/numba/cuda/cg.py +62 -0
- numba_cuda/numba/cuda/codegen.py +378 -0
- numba_cuda/numba/cuda/compiler.py +422 -0
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
- numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
- numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
- numba_cuda/numba/cuda/cuda_paths.py +258 -0
- numba_cuda/numba/cuda/cudadecl.py +806 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
- numba_cuda/numba/cuda/cudadrv/error.py +36 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
- numba_cuda/numba/cuda/cudaimpl.py +1055 -0
- numba_cuda/numba/cuda/cudamath.py +140 -0
- numba_cuda/numba/cuda/decorators.py +189 -0
- numba_cuda/numba/cuda/descriptor.py +33 -0
- numba_cuda/numba/cuda/device_init.py +89 -0
- numba_cuda/numba/cuda/deviceufunc.py +908 -0
- numba_cuda/numba/cuda/dispatcher.py +1057 -0
- numba_cuda/numba/cuda/errors.py +59 -0
- numba_cuda/numba/cuda/extending.py +7 -0
- numba_cuda/numba/cuda/initialize.py +13 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
- numba_cuda/numba/cuda/intrinsics.py +198 -0
- numba_cuda/numba/cuda/kernels/__init__.py +0 -0
- numba_cuda/numba/cuda/kernels/reduction.py +262 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3382 -0
- numba_cuda/numba/cuda/libdevicedecl.py +17 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
- numba_cuda/numba/cuda/mathimpl.py +448 -0
- numba_cuda/numba/cuda/models.py +48 -0
- numba_cuda/numba/cuda/nvvmutils.py +235 -0
- numba_cuda/numba/cuda/printimpl.py +86 -0
- numba_cuda/numba/cuda/random.py +292 -0
- numba_cuda/numba/cuda/simulator/__init__.py +38 -0
- numba_cuda/numba/cuda/simulator/api.py +110 -0
- numba_cuda/numba/cuda/simulator/compiler.py +9 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
- numba_cuda/numba/cuda/simulator/kernel.py +308 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
- numba_cuda/numba/cuda/simulator/reduction.py +15 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
- numba_cuda/numba/cuda/simulator_init.py +17 -0
- numba_cuda/numba/cuda/stubs.py +902 -0
- numba_cuda/numba/cuda/target.py +440 -0
- numba_cuda/numba/cuda/testing.py +202 -0
- numba_cuda/numba/cuda/tests/__init__.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +7 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
- numba_cuda/numba/cuda/types.py +37 -0
- numba_cuda/numba/cuda/ufuncs.py +662 -0
- numba_cuda/numba/cuda/vector_types.py +209 -0
- numba_cuda/numba/cuda/vectorizers.py +252 -0
- numba_cuda-0.0.12.dist-info/LICENSE +25 -0
- numba_cuda-0.0.12.dist-info/METADATA +68 -0
- numba_cuda-0.0.12.dist-info/RECORD +231 -0
- {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
- numba_cuda-0.0.0.dist-info/METADATA +0 -6
- numba_cuda-0.0.0.dist-info/RECORD +0 -5
- {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,37 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from numba import cuda, float32, float64, int32, void
|
3
|
+
from numba.cuda.testing import unittest, CUDATestCase
|
4
|
+
|
5
|
+
|
6
|
+
class TestCudaIDiv(CUDATestCase):
|
7
|
+
def test_inplace_div(self):
|
8
|
+
|
9
|
+
@cuda.jit(void(float32[:, :], int32, int32))
|
10
|
+
def div(grid, l_x, l_y):
|
11
|
+
for x in range(l_x):
|
12
|
+
for y in range(l_y):
|
13
|
+
grid[x, y] /= 2.0
|
14
|
+
|
15
|
+
x = np.ones((2, 2), dtype=np.float32)
|
16
|
+
grid = cuda.to_device(x)
|
17
|
+
div[1, 1](grid, 2, 2)
|
18
|
+
y = grid.copy_to_host()
|
19
|
+
self.assertTrue(np.all(y == 0.5))
|
20
|
+
|
21
|
+
def test_inplace_div_double(self):
|
22
|
+
|
23
|
+
@cuda.jit(void(float64[:, :], int32, int32))
|
24
|
+
def div_double(grid, l_x, l_y):
|
25
|
+
for x in range(l_x):
|
26
|
+
for y in range(l_y):
|
27
|
+
grid[x, y] /= 2.0
|
28
|
+
|
29
|
+
x = np.ones((2, 2), dtype=np.float64)
|
30
|
+
grid = cuda.to_device(x)
|
31
|
+
div_double[1, 1](grid, 2, 2)
|
32
|
+
y = grid.copy_to_host()
|
33
|
+
self.assertTrue(np.all(y == 0.5))
|
34
|
+
|
35
|
+
|
36
|
+
if __name__ == '__main__':
|
37
|
+
unittest.main()
|
@@ -0,0 +1,165 @@
|
|
1
|
+
import numpy as np
|
2
|
+
|
3
|
+
from io import StringIO
|
4
|
+
from numba import cuda, float32, float64, int32, intp
|
5
|
+
from numba.cuda.testing import unittest, CUDATestCase
|
6
|
+
from numba.cuda.testing import (skip_on_cudasim, skip_with_nvdisasm,
|
7
|
+
skip_without_nvdisasm)
|
8
|
+
|
9
|
+
|
10
|
+
@skip_on_cudasim('Simulator does not generate code to be inspected')
|
11
|
+
class TestInspect(CUDATestCase):
|
12
|
+
@property
|
13
|
+
def cc(self):
|
14
|
+
return cuda.current_context().device.compute_capability
|
15
|
+
|
16
|
+
def test_monotyped(self):
|
17
|
+
sig = (float32, int32)
|
18
|
+
|
19
|
+
@cuda.jit(sig)
|
20
|
+
def foo(x, y):
|
21
|
+
pass
|
22
|
+
|
23
|
+
file = StringIO()
|
24
|
+
foo.inspect_types(file=file)
|
25
|
+
typeanno = file.getvalue()
|
26
|
+
# Function name in annotation
|
27
|
+
self.assertIn("foo", typeanno)
|
28
|
+
# Signature in annotation
|
29
|
+
self.assertIn("(float32, int32)", typeanno)
|
30
|
+
file.close()
|
31
|
+
# Function name in LLVM
|
32
|
+
llvm = foo.inspect_llvm(sig)
|
33
|
+
self.assertIn("foo", llvm)
|
34
|
+
|
35
|
+
# Kernel in LLVM
|
36
|
+
self.assertIn('cuda.kernel.wrapper', llvm)
|
37
|
+
|
38
|
+
# Wrapped device function body in LLVM
|
39
|
+
self.assertIn("define linkonce_odr i32", llvm)
|
40
|
+
|
41
|
+
asm = foo.inspect_asm(sig)
|
42
|
+
|
43
|
+
# Function name in PTX
|
44
|
+
self.assertIn("foo", asm)
|
45
|
+
# NVVM inserted comments in PTX
|
46
|
+
self.assertIn("Generated by NVIDIA NVVM Compiler", asm)
|
47
|
+
|
48
|
+
def test_polytyped(self):
|
49
|
+
@cuda.jit
|
50
|
+
def foo(x, y):
|
51
|
+
pass
|
52
|
+
|
53
|
+
foo[1, 1](1, 1)
|
54
|
+
foo[1, 1](1.2, 2.4)
|
55
|
+
|
56
|
+
file = StringIO()
|
57
|
+
foo.inspect_types(file=file)
|
58
|
+
typeanno = file.getvalue()
|
59
|
+
file.close()
|
60
|
+
# Signature in annotation
|
61
|
+
self.assertIn("({0}, {0})".format(intp), typeanno)
|
62
|
+
self.assertIn("(float64, float64)", typeanno)
|
63
|
+
|
64
|
+
# Signature in LLVM dict
|
65
|
+
llvmirs = foo.inspect_llvm()
|
66
|
+
self.assertEqual(2, len(llvmirs), )
|
67
|
+
self.assertIn((intp, intp), llvmirs)
|
68
|
+
self.assertIn((float64, float64), llvmirs)
|
69
|
+
|
70
|
+
# Function name in LLVM
|
71
|
+
self.assertIn("foo", llvmirs[intp, intp])
|
72
|
+
self.assertIn("foo", llvmirs[float64, float64])
|
73
|
+
|
74
|
+
# Kernels in LLVM
|
75
|
+
self.assertIn('cuda.kernel.wrapper', llvmirs[intp, intp])
|
76
|
+
self.assertIn('cuda.kernel.wrapper', llvmirs[float64, float64])
|
77
|
+
|
78
|
+
# Wrapped device function bodies in LLVM
|
79
|
+
self.assertIn("define linkonce_odr i32", llvmirs[intp, intp])
|
80
|
+
self.assertIn("define linkonce_odr i32", llvmirs[float64, float64])
|
81
|
+
|
82
|
+
asmdict = foo.inspect_asm()
|
83
|
+
|
84
|
+
# Signature in assembly dict
|
85
|
+
self.assertEqual(2, len(asmdict), )
|
86
|
+
self.assertIn((intp, intp), asmdict)
|
87
|
+
self.assertIn((float64, float64), asmdict)
|
88
|
+
|
89
|
+
# NVVM inserted in PTX
|
90
|
+
self.assertIn("foo", asmdict[intp, intp])
|
91
|
+
self.assertIn("foo", asmdict[float64, float64])
|
92
|
+
|
93
|
+
def _test_inspect_sass(self, kernel, name, sass):
|
94
|
+
# Ensure function appears in output
|
95
|
+
seen_function = False
|
96
|
+
for line in sass.split():
|
97
|
+
if '.text' in line and name in line:
|
98
|
+
seen_function = True
|
99
|
+
self.assertTrue(seen_function)
|
100
|
+
|
101
|
+
self.assertRegex(sass, r'//## File ".*/test_inspect.py", line [0-9]')
|
102
|
+
|
103
|
+
# Some instructions common to all supported architectures that should
|
104
|
+
# appear in the output
|
105
|
+
self.assertIn('S2R', sass) # Special register to register
|
106
|
+
self.assertIn('BRA', sass) # Branch
|
107
|
+
self.assertIn('EXIT', sass) # Exit program
|
108
|
+
|
109
|
+
@skip_without_nvdisasm('nvdisasm needed for inspect_sass()')
|
110
|
+
def test_inspect_sass_eager(self):
|
111
|
+
sig = (float32[::1], int32[::1])
|
112
|
+
|
113
|
+
@cuda.jit(sig, lineinfo=True)
|
114
|
+
def add(x, y):
|
115
|
+
i = cuda.grid(1)
|
116
|
+
if i < len(x):
|
117
|
+
x[i] += y[i]
|
118
|
+
|
119
|
+
self._test_inspect_sass(add, 'add', add.inspect_sass(sig))
|
120
|
+
|
121
|
+
@skip_without_nvdisasm('nvdisasm needed for inspect_sass()')
|
122
|
+
def test_inspect_sass_lazy(self):
|
123
|
+
@cuda.jit(lineinfo=True)
|
124
|
+
def add(x, y):
|
125
|
+
i = cuda.grid(1)
|
126
|
+
if i < len(x):
|
127
|
+
x[i] += y[i]
|
128
|
+
|
129
|
+
x = np.arange(10).astype(np.int32)
|
130
|
+
y = np.arange(10).astype(np.float32)
|
131
|
+
add[1, 10](x, y)
|
132
|
+
|
133
|
+
signature = (int32[::1], float32[::1])
|
134
|
+
self._test_inspect_sass(add, 'add', add.inspect_sass(signature))
|
135
|
+
|
136
|
+
@skip_with_nvdisasm('Missing nvdisasm exception only generated when it is '
|
137
|
+
'not present')
|
138
|
+
def test_inspect_sass_nvdisasm_missing(self):
|
139
|
+
@cuda.jit((float32[::1],))
|
140
|
+
def f(x):
|
141
|
+
x[0] = 0
|
142
|
+
|
143
|
+
with self.assertRaises(RuntimeError) as raises:
|
144
|
+
f.inspect_sass()
|
145
|
+
|
146
|
+
self.assertIn('nvdisasm has not been found', str(raises.exception))
|
147
|
+
|
148
|
+
@skip_without_nvdisasm('nvdisasm needed for inspect_sass_cfg()')
|
149
|
+
def test_inspect_sass_cfg(self):
|
150
|
+
sig = (float32[::1], int32[::1])
|
151
|
+
|
152
|
+
@cuda.jit(sig)
|
153
|
+
def add(x, y):
|
154
|
+
i = cuda.grid(1)
|
155
|
+
if i < len(x):
|
156
|
+
x[i] += y[i]
|
157
|
+
|
158
|
+
self.assertRegex(
|
159
|
+
add.inspect_sass_cfg(signature=sig),
|
160
|
+
r'digraph\s*\w\s*{(.|\n)*\n}'
|
161
|
+
)
|
162
|
+
|
163
|
+
|
164
|
+
if __name__ == '__main__':
|
165
|
+
unittest.main()
|