numba-cuda 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +232 -113
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_fp16.h +661 -661
- numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
- numba_cuda/numba/cuda/cuda_paths.py +291 -99
- numba_cuda/numba/cuda/cudadecl.py +125 -69
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +463 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +317 -233
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +8 -6
- numba_cuda/numba/cuda/decorators.py +75 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +69 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +1 -1
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
- numba_cuda/numba/cuda/intrinsics.py +31 -27
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +139 -102
- numba_cuda/numba/cuda/target.py +64 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +2 -2
- numba_cuda/numba/cuda/vectorizers.py +37 -32
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
- numba_cuda-0.9.0.dist-info/RECORD +253 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.0.dist-info/RECORD +0 -251
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,294 @@
|
|
1
|
+
import unittest
|
2
|
+
import threading
|
3
|
+
|
4
|
+
import numpy as np
|
5
|
+
|
6
|
+
from numba import cuda, config
|
7
|
+
from numba.cuda.cudadrv.linkable_code import CUSource
|
8
|
+
from numba.cuda.testing import CUDATestCase, ContextResettingTestCase
|
9
|
+
|
10
|
+
from cuda.bindings.driver import cuModuleGetGlobal, cuMemcpyHtoD
|
11
|
+
|
12
|
+
if config.CUDA_USE_NVIDIA_BINDING:
|
13
|
+
from cuda.cuda import CUmodule as cu_module_type
|
14
|
+
else:
|
15
|
+
from numba.cuda.cudadrv.drvapi import cu_module as cu_module_type
|
16
|
+
|
17
|
+
|
18
|
+
def wipe_all_modules_in_context():
|
19
|
+
"""Cleans all modules reference held by current context.
|
20
|
+
This simulates the behavior on interpreter shutdown.
|
21
|
+
|
22
|
+
TODO: This is a temp solution until
|
23
|
+
https://github.com/NVIDIA/numba-cuda/issues/171 is implemented.
|
24
|
+
"""
|
25
|
+
ctx = cuda.current_context()
|
26
|
+
ctx.reset()
|
27
|
+
|
28
|
+
|
29
|
+
def get_hashable_handle_value(handle):
|
30
|
+
if not config.CUDA_USE_NVIDIA_BINDING:
|
31
|
+
handle = handle.value
|
32
|
+
return handle
|
33
|
+
|
34
|
+
|
35
|
+
class TestModuleCallbacksBasic(ContextResettingTestCase):
|
36
|
+
def test_basic(self):
|
37
|
+
counter = 0
|
38
|
+
|
39
|
+
def setup(handle):
|
40
|
+
self.assertTrue(isinstance(handle, cu_module_type))
|
41
|
+
nonlocal counter
|
42
|
+
counter += 1
|
43
|
+
|
44
|
+
def teardown(handle):
|
45
|
+
self.assertTrue(isinstance(handle, cu_module_type))
|
46
|
+
nonlocal counter
|
47
|
+
counter -= 1
|
48
|
+
|
49
|
+
lib = CUSource("", setup_callback=setup, teardown_callback=teardown)
|
50
|
+
|
51
|
+
@cuda.jit(link=[lib])
|
52
|
+
def kernel():
|
53
|
+
pass
|
54
|
+
|
55
|
+
self.assertEqual(counter, 0)
|
56
|
+
kernel[1, 1]()
|
57
|
+
self.assertEqual(counter, 1)
|
58
|
+
kernel[1, 1]() # cached
|
59
|
+
self.assertEqual(counter, 1)
|
60
|
+
|
61
|
+
wipe_all_modules_in_context()
|
62
|
+
del kernel
|
63
|
+
self.assertEqual(counter, 0)
|
64
|
+
|
65
|
+
def test_different_argtypes(self):
|
66
|
+
counter = 0
|
67
|
+
setup_seen = set()
|
68
|
+
teardown_seen = set()
|
69
|
+
|
70
|
+
def setup(handle):
|
71
|
+
nonlocal counter, setup_seen
|
72
|
+
counter += 1
|
73
|
+
setup_seen.add(get_hashable_handle_value(handle))
|
74
|
+
|
75
|
+
def teardown(handle):
|
76
|
+
nonlocal counter
|
77
|
+
counter -= 1
|
78
|
+
teardown_seen.add(get_hashable_handle_value(handle))
|
79
|
+
|
80
|
+
lib = CUSource("", setup_callback=setup, teardown_callback=teardown)
|
81
|
+
|
82
|
+
@cuda.jit(link=[lib])
|
83
|
+
def kernel(arg):
|
84
|
+
pass
|
85
|
+
|
86
|
+
self.assertEqual(counter, 0)
|
87
|
+
kernel[1, 1](42) # (int64)->() : module 1
|
88
|
+
self.assertEqual(counter, 1)
|
89
|
+
kernel[1, 1](100) # (int64)->() : module 1, cached
|
90
|
+
self.assertEqual(counter, 1)
|
91
|
+
kernel[1, 1](3.14) # (float64)->() : module 2
|
92
|
+
self.assertEqual(counter, 2)
|
93
|
+
|
94
|
+
wipe_all_modules_in_context()
|
95
|
+
del kernel
|
96
|
+
self.assertEqual(counter, 0)
|
97
|
+
|
98
|
+
self.assertEqual(len(setup_seen), 2)
|
99
|
+
self.assertEqual(len(teardown_seen), 2)
|
100
|
+
|
101
|
+
def test_two_kernels(self):
|
102
|
+
counter = 0
|
103
|
+
setup_seen = set()
|
104
|
+
teardown_seen = set()
|
105
|
+
|
106
|
+
def setup(handle):
|
107
|
+
nonlocal counter, setup_seen
|
108
|
+
counter += 1
|
109
|
+
setup_seen.add(get_hashable_handle_value(handle))
|
110
|
+
|
111
|
+
def teardown(handle):
|
112
|
+
nonlocal counter, teardown_seen
|
113
|
+
counter -= 1
|
114
|
+
teardown_seen.add(get_hashable_handle_value(handle))
|
115
|
+
|
116
|
+
lib = CUSource("", setup_callback=setup, teardown_callback=teardown)
|
117
|
+
|
118
|
+
@cuda.jit(link=[lib])
|
119
|
+
def kernel():
|
120
|
+
pass
|
121
|
+
|
122
|
+
@cuda.jit(link=[lib])
|
123
|
+
def kernel2():
|
124
|
+
pass
|
125
|
+
|
126
|
+
kernel[1, 1]()
|
127
|
+
self.assertEqual(counter, 1)
|
128
|
+
kernel2[1, 1]()
|
129
|
+
self.assertEqual(counter, 2)
|
130
|
+
|
131
|
+
wipe_all_modules_in_context()
|
132
|
+
del kernel
|
133
|
+
self.assertEqual(counter, 0)
|
134
|
+
|
135
|
+
self.assertEqual(len(setup_seen), 2)
|
136
|
+
self.assertEqual(len(teardown_seen), 2)
|
137
|
+
|
138
|
+
|
139
|
+
class TestModuleCallbacksAPICompleteness(CUDATestCase):
|
140
|
+
def test_api(self):
|
141
|
+
def setup(handle):
|
142
|
+
pass
|
143
|
+
|
144
|
+
def teardown(handle):
|
145
|
+
pass
|
146
|
+
|
147
|
+
api_combo = [
|
148
|
+
(setup, teardown),
|
149
|
+
(setup, None),
|
150
|
+
(None, teardown),
|
151
|
+
(None, None),
|
152
|
+
]
|
153
|
+
|
154
|
+
for setup, teardown in api_combo:
|
155
|
+
with self.subTest(setup=setup, teardown=teardown):
|
156
|
+
lib = CUSource(
|
157
|
+
"", setup_callback=setup, teardown_callback=teardown
|
158
|
+
)
|
159
|
+
|
160
|
+
@cuda.jit(link=[lib])
|
161
|
+
def kernel():
|
162
|
+
pass
|
163
|
+
|
164
|
+
kernel[1, 1]()
|
165
|
+
|
166
|
+
|
167
|
+
class TestModuleCallbacks(CUDATestCase):
|
168
|
+
def setUp(self):
|
169
|
+
super().setUp()
|
170
|
+
|
171
|
+
module = """
|
172
|
+
__device__ int num = 0;
|
173
|
+
extern "C"
|
174
|
+
__device__ int get_num(int &retval) {
|
175
|
+
retval = num;
|
176
|
+
return 0;
|
177
|
+
}
|
178
|
+
"""
|
179
|
+
|
180
|
+
def set_forty_two(handle):
|
181
|
+
# Initialize 42 to global variable `num`
|
182
|
+
res, dptr, size = cuModuleGetGlobal(
|
183
|
+
get_hashable_handle_value(handle), "num".encode()
|
184
|
+
)
|
185
|
+
|
186
|
+
arr = np.array([42], np.int32)
|
187
|
+
cuMemcpyHtoD(dptr, arr.ctypes.data, size)
|
188
|
+
|
189
|
+
self.lib = CUSource(
|
190
|
+
module, setup_callback=set_forty_two, teardown_callback=None
|
191
|
+
)
|
192
|
+
|
193
|
+
def test_decldevice_arg(self):
|
194
|
+
get_num = cuda.declare_device("get_num", "int32()", link=[self.lib])
|
195
|
+
|
196
|
+
@cuda.jit
|
197
|
+
def kernel(arr):
|
198
|
+
arr[0] = get_num()
|
199
|
+
|
200
|
+
arr = np.zeros(1, np.int32)
|
201
|
+
kernel[1, 1](arr)
|
202
|
+
self.assertEqual(arr[0], 42)
|
203
|
+
|
204
|
+
def test_jitarg(self):
|
205
|
+
get_num = cuda.declare_device("get_num", "int32()")
|
206
|
+
|
207
|
+
@cuda.jit(link=[self.lib])
|
208
|
+
def kernel(arr):
|
209
|
+
arr[0] = get_num()
|
210
|
+
|
211
|
+
arr = np.zeros(1, np.int32)
|
212
|
+
kernel[1, 1](arr)
|
213
|
+
self.assertEqual(arr[0], 42)
|
214
|
+
|
215
|
+
|
216
|
+
class TestMultithreadedCallbacks(CUDATestCase):
|
217
|
+
def test_concurrent_initialization(self):
|
218
|
+
seen_mods = set()
|
219
|
+
max_seen_mods = 0
|
220
|
+
|
221
|
+
def setup(mod):
|
222
|
+
nonlocal seen_mods, max_seen_mods
|
223
|
+
seen_mods.add(get_hashable_handle_value(mod))
|
224
|
+
max_seen_mods = max(max_seen_mods, len(seen_mods))
|
225
|
+
|
226
|
+
def teardown(mod):
|
227
|
+
nonlocal seen_mods
|
228
|
+
# Raises an error if the module is not found in the seen_mods
|
229
|
+
seen_mods.remove(get_hashable_handle_value(mod))
|
230
|
+
|
231
|
+
lib = CUSource("", setup_callback=setup, teardown_callback=teardown)
|
232
|
+
|
233
|
+
@cuda.jit(link=[lib])
|
234
|
+
def kernel():
|
235
|
+
pass
|
236
|
+
|
237
|
+
def concurrent_compilation_launch(kernel):
|
238
|
+
kernel[1, 1]()
|
239
|
+
|
240
|
+
threads = [
|
241
|
+
threading.Thread(
|
242
|
+
target=concurrent_compilation_launch, args=(kernel,)
|
243
|
+
)
|
244
|
+
for _ in range(4)
|
245
|
+
]
|
246
|
+
for t in threads:
|
247
|
+
t.start()
|
248
|
+
for t in threads:
|
249
|
+
t.join()
|
250
|
+
|
251
|
+
wipe_all_modules_in_context()
|
252
|
+
self.assertEqual(len(seen_mods), 0)
|
253
|
+
self.assertEqual(max_seen_mods, 1) # one moduled shared across threads
|
254
|
+
|
255
|
+
def test_concurrent_initialization_different_args(self):
|
256
|
+
seen_mods = set()
|
257
|
+
max_seen_mods = 0
|
258
|
+
|
259
|
+
def setup(mod):
|
260
|
+
nonlocal seen_mods, max_seen_mods
|
261
|
+
seen_mods.add(get_hashable_handle_value(mod))
|
262
|
+
max_seen_mods = max(max_seen_mods, len(seen_mods))
|
263
|
+
|
264
|
+
def teardown(mod):
|
265
|
+
nonlocal seen_mods
|
266
|
+
seen_mods.remove(get_hashable_handle_value(mod))
|
267
|
+
|
268
|
+
lib = CUSource("", setup_callback=setup, teardown_callback=teardown)
|
269
|
+
|
270
|
+
@cuda.jit(link=[lib])
|
271
|
+
def kernel(a):
|
272
|
+
pass
|
273
|
+
|
274
|
+
def concurrent_compilation_launch():
|
275
|
+
kernel[1, 1](42) # (int64)->() : module 1
|
276
|
+
kernel[1, 1](9) # (int64)->() : module 1 from cache
|
277
|
+
kernel[1, 1](3.14) # (float64)->() : module 2
|
278
|
+
|
279
|
+
threads = [
|
280
|
+
threading.Thread(target=concurrent_compilation_launch)
|
281
|
+
for _ in range(4)
|
282
|
+
]
|
283
|
+
for t in threads:
|
284
|
+
t.start()
|
285
|
+
for t in threads:
|
286
|
+
t.join()
|
287
|
+
|
288
|
+
wipe_all_modules_in_context()
|
289
|
+
assert len(seen_mods) == 0
|
290
|
+
self.assertEqual(max_seen_mods, 2) # two modules shared across threads
|
291
|
+
|
292
|
+
|
293
|
+
if __name__ == "__main__":
|
294
|
+
unittest.main()
|
@@ -1,8 +1,11 @@
|
|
1
1
|
import multiprocessing as mp
|
2
2
|
import traceback
|
3
3
|
from numba.cuda.testing import unittest, CUDATestCase
|
4
|
-
from numba.cuda.testing import (
|
5
|
-
|
4
|
+
from numba.cuda.testing import (
|
5
|
+
skip_on_cudasim,
|
6
|
+
skip_under_cuda_memcheck,
|
7
|
+
skip_if_mvc_libraries_unavailable,
|
8
|
+
)
|
6
9
|
from numba.tests.support import linux_only
|
7
10
|
|
8
11
|
|
@@ -24,7 +27,7 @@ def child_test_wrapper(result_queue):
|
|
24
27
|
output = child_test()
|
25
28
|
success = True
|
26
29
|
# Catch anything raised so it can be propagated
|
27
|
-
except:
|
30
|
+
except: # noqa: E722
|
28
31
|
output = traceback.format_exc()
|
29
32
|
success = False
|
30
33
|
|
@@ -32,13 +35,13 @@ def child_test_wrapper(result_queue):
|
|
32
35
|
|
33
36
|
|
34
37
|
@linux_only
|
35
|
-
@skip_under_cuda_memcheck(
|
36
|
-
@skip_on_cudasim(
|
38
|
+
@skip_under_cuda_memcheck("May hang CUDA memcheck")
|
39
|
+
@skip_on_cudasim("Simulator does not require or implement MVC")
|
37
40
|
@skip_if_mvc_libraries_unavailable
|
38
41
|
class TestMinorVersionCompatibility(CUDATestCase):
|
39
42
|
def test_mvc(self):
|
40
43
|
# Run test with Minor Version Compatibility enabled in a child process
|
41
|
-
ctx = mp.get_context(
|
44
|
+
ctx = mp.get_context("spawn")
|
42
45
|
result_queue = ctx.Queue()
|
43
46
|
proc = ctx.Process(target=child_test_wrapper, args=(result_queue,))
|
44
47
|
proc.start()
|
@@ -50,5 +53,5 @@ class TestMinorVersionCompatibility(CUDATestCase):
|
|
50
53
|
self.fail(output)
|
51
54
|
|
52
55
|
|
53
|
-
if __name__ ==
|
56
|
+
if __name__ == "__main__":
|
54
57
|
unittest.main()
|
@@ -10,6 +10,7 @@ from numba.tests.support import run_in_subprocess, override_config
|
|
10
10
|
|
11
11
|
try:
|
12
12
|
import pynvjitlink # noqa: F401
|
13
|
+
|
13
14
|
PYNVJITLINK_INSTALLED = True
|
14
15
|
except ImportError:
|
15
16
|
PYNVJITLINK_INSTALLED = False
|
@@ -52,7 +53,7 @@ if TEST_BIN_DIR:
|
|
52
53
|
|
53
54
|
@unittest.skipIf(
|
54
55
|
not config.CUDA_ENABLE_PYNVJITLINK or not TEST_BIN_DIR,
|
55
|
-
"pynvjitlink not enabled"
|
56
|
+
"pynvjitlink not enabled",
|
56
57
|
)
|
57
58
|
@skip_on_cudasim("Linking unsupported in the simulator")
|
58
59
|
class TestLinker(CUDATestCase):
|
@@ -85,7 +86,6 @@ class TestLinker(CUDATestCase):
|
|
85
86
|
PyNvJitLinker(cc=0)
|
86
87
|
|
87
88
|
def test_nvjitlink_ptx_compile_options(self):
|
88
|
-
|
89
89
|
max_registers = (None, 32)
|
90
90
|
lineinfo = (False, True)
|
91
91
|
lto = (False, True)
|
@@ -190,7 +190,7 @@ class TestLinker(CUDATestCase):
|
|
190
190
|
files = [
|
191
191
|
test_device_functions_cu,
|
192
192
|
test_device_functions_ltoir,
|
193
|
-
test_device_functions_fatbin_multi
|
193
|
+
test_device_functions_fatbin_multi,
|
194
194
|
]
|
195
195
|
|
196
196
|
config.DUMP_ASSEMBLY = True
|
@@ -228,7 +228,7 @@ class TestLinker(CUDATestCase):
|
|
228
228
|
for file in files:
|
229
229
|
with self.subTest(file=file):
|
230
230
|
with warnings.catch_warnings(record=True) as w:
|
231
|
-
with contextlib.redirect_stdout(None):
|
231
|
+
with contextlib.redirect_stdout(None): # suppress other PTX
|
232
232
|
sig = "uint32(uint32, uint32)"
|
233
233
|
add_from_numba = cuda.declare_device(
|
234
234
|
"add_from_numba", sig
|
@@ -243,8 +243,11 @@ class TestLinker(CUDATestCase):
|
|
243
243
|
assert result[0] == 3
|
244
244
|
|
245
245
|
assert len(w) == 1
|
246
|
-
self.assertIn(
|
247
|
-
|
246
|
+
self.assertIn(
|
247
|
+
"it is not optimizable at link time, and "
|
248
|
+
"`ignore_nonlto == True`",
|
249
|
+
str(w[0].message),
|
250
|
+
)
|
248
251
|
|
249
252
|
config.DUMP_ASSEMBLY = False
|
250
253
|
|
@@ -262,7 +265,7 @@ class TestLinker(CUDATestCase):
|
|
262
265
|
|
263
266
|
@unittest.skipIf(
|
264
267
|
not PYNVJITLINK_INSTALLED or not TEST_BIN_DIR,
|
265
|
-
reason="pynvjitlink not enabled"
|
268
|
+
reason="pynvjitlink not enabled",
|
266
269
|
)
|
267
270
|
class TestLinkerUsage(CUDATestCase):
|
268
271
|
"""Test that whether pynvjitlink can be enabled by both environment variable
|
@@ -295,12 +298,12 @@ class TestLinkerUsage(CUDATestCase):
|
|
295
298
|
|
296
299
|
def test_linker_enabled_envvar(self):
|
297
300
|
env = os.environ.copy()
|
298
|
-
env[
|
301
|
+
env["NUMBA_CUDA_ENABLE_PYNVJITLINK"] = "1"
|
299
302
|
run_in_subprocess(self.src.format(config=""), env=env)
|
300
303
|
|
301
304
|
def test_linker_disabled_envvar(self):
|
302
305
|
env = os.environ.copy()
|
303
|
-
env.pop(
|
306
|
+
env.pop("NUMBA_CUDA_ENABLE_PYNVJITLINK", None)
|
304
307
|
with self.assertRaisesRegex(
|
305
308
|
AssertionError, "LTO and additional flags require PyNvJitLinker"
|
306
309
|
):
|
@@ -310,19 +313,25 @@ class TestLinkerUsage(CUDATestCase):
|
|
310
313
|
|
311
314
|
def test_linker_enabled_config(self):
|
312
315
|
env = os.environ.copy()
|
313
|
-
env.pop(
|
314
|
-
run_in_subprocess(
|
315
|
-
config="config.CUDA_ENABLE_PYNVJITLINK = True"),
|
316
|
+
env.pop("NUMBA_CUDA_ENABLE_PYNVJITLINK", None)
|
317
|
+
run_in_subprocess(
|
318
|
+
self.src.format(config="config.CUDA_ENABLE_PYNVJITLINK = True"),
|
319
|
+
env=env,
|
320
|
+
)
|
316
321
|
|
317
322
|
def test_linker_disabled_config(self):
|
318
323
|
env = os.environ.copy()
|
319
|
-
env.pop(
|
324
|
+
env.pop("NUMBA_CUDA_ENABLE_PYNVJITLINK", None)
|
320
325
|
with override_config("CUDA_ENABLE_PYNVJITLINK", False):
|
321
326
|
with self.assertRaisesRegex(
|
322
327
|
AssertionError, "LTO and additional flags require PyNvJitLinker"
|
323
328
|
):
|
324
|
-
run_in_subprocess(
|
325
|
-
|
329
|
+
run_in_subprocess(
|
330
|
+
self.src.format(
|
331
|
+
config="config.CUDA_ENABLE_PYNVJITLINK = False"
|
332
|
+
),
|
333
|
+
env=env,
|
334
|
+
)
|
326
335
|
|
327
336
|
|
328
337
|
if __name__ == "__main__":
|
@@ -7,7 +7,7 @@ from numba.cuda.cudadrv.nvvm import LibDevice, NvvmError, NVVM
|
|
7
7
|
from numba.cuda.testing import skip_on_cudasim
|
8
8
|
|
9
9
|
|
10
|
-
@skip_on_cudasim(
|
10
|
+
@skip_on_cudasim("NVVM Driver unsupported in the simulator")
|
11
11
|
class TestNvvmDriver(unittest.TestCase):
|
12
12
|
def get_nvvmir(self):
|
13
13
|
versions = NVVM().get_ir_version()
|
@@ -16,9 +16,9 @@ class TestNvvmDriver(unittest.TestCase):
|
|
16
16
|
|
17
17
|
def test_nvvm_compile_simple(self):
|
18
18
|
nvvmir = self.get_nvvmir()
|
19
|
-
ptx = nvvm.compile_ir(nvvmir).decode(
|
20
|
-
self.assertTrue(
|
21
|
-
self.assertTrue(
|
19
|
+
ptx = nvvm.compile_ir(nvvmir).decode("utf8")
|
20
|
+
self.assertTrue("simple" in ptx)
|
21
|
+
self.assertTrue("ave" in ptx)
|
22
22
|
|
23
23
|
def test_nvvm_compile_nullary_option(self):
|
24
24
|
# Tests compilation with an option that doesn't take an argument
|
@@ -34,7 +34,7 @@ class TestNvvmDriver(unittest.TestCase):
|
|
34
34
|
|
35
35
|
# Verify we correctly passed the option by checking if we got LTOIR
|
36
36
|
# from NVVM (by looking for the expected magic number for LTOIR)
|
37
|
-
self.assertEqual(ltoir[:4], b
|
37
|
+
self.assertEqual(ltoir[:4], b"\xed\x43\x4e\x7f")
|
38
38
|
|
39
39
|
def test_nvvm_bad_option(self):
|
40
40
|
# Ensure that unsupported / non-existent options are reported as such
|
@@ -45,36 +45,37 @@ class TestNvvmDriver(unittest.TestCase):
|
|
45
45
|
|
46
46
|
def test_nvvm_from_llvm(self):
|
47
47
|
m = ir.Module("test_nvvm_from_llvm")
|
48
|
-
m.triple =
|
48
|
+
m.triple = "nvptx64-nvidia-cuda"
|
49
49
|
nvvm.add_ir_version(m)
|
50
50
|
fty = ir.FunctionType(ir.VoidType(), [ir.IntType(32)])
|
51
|
-
kernel = ir.Function(m, fty, name=
|
52
|
-
bldr = ir.IRBuilder(kernel.append_basic_block(
|
51
|
+
kernel = ir.Function(m, fty, name="mycudakernel")
|
52
|
+
bldr = ir.IRBuilder(kernel.append_basic_block("entry"))
|
53
53
|
bldr.ret_void()
|
54
54
|
nvvm.set_cuda_kernel(kernel)
|
55
55
|
|
56
56
|
m.data_layout = NVVM().data_layout
|
57
|
-
ptx = nvvm.compile_ir(str(m)).decode(
|
58
|
-
self.assertTrue(
|
59
|
-
self.assertTrue(
|
57
|
+
ptx = nvvm.compile_ir(str(m)).decode("utf8")
|
58
|
+
self.assertTrue("mycudakernel" in ptx)
|
59
|
+
self.assertTrue(".address_size 64" in ptx)
|
60
60
|
|
61
61
|
def test_used_list(self):
|
62
62
|
# Construct a module
|
63
63
|
m = ir.Module("test_used_list")
|
64
|
-
m.triple =
|
64
|
+
m.triple = "nvptx64-nvidia-cuda"
|
65
65
|
m.data_layout = NVVM().data_layout
|
66
66
|
nvvm.add_ir_version(m)
|
67
67
|
|
68
68
|
# Add a function and mark it as a kernel
|
69
69
|
fty = ir.FunctionType(ir.VoidType(), [ir.IntType(32)])
|
70
|
-
kernel = ir.Function(m, fty, name=
|
71
|
-
bldr = ir.IRBuilder(kernel.append_basic_block(
|
70
|
+
kernel = ir.Function(m, fty, name="mycudakernel")
|
71
|
+
bldr = ir.IRBuilder(kernel.append_basic_block("entry"))
|
72
72
|
bldr.ret_void()
|
73
73
|
nvvm.set_cuda_kernel(kernel)
|
74
74
|
|
75
75
|
# Verify that the used list was correctly constructed
|
76
|
-
used_lines = [
|
77
|
-
|
76
|
+
used_lines = [
|
77
|
+
line for line in str(m).splitlines() if "llvm.used" in line
|
78
|
+
]
|
78
79
|
msg = 'Expected exactly one @"llvm.used" array'
|
79
80
|
self.assertEqual(len(used_lines), 1, msg)
|
80
81
|
|
@@ -93,70 +94,71 @@ class TestNvvmDriver(unittest.TestCase):
|
|
93
94
|
m.triple = "unknown-unknown-unknown"
|
94
95
|
m.data_layout = NVVM().data_layout
|
95
96
|
nvvm.add_ir_version(m)
|
96
|
-
with self.assertRaisesRegex(NvvmError,
|
97
|
+
with self.assertRaisesRegex(NvvmError, "Invalid target triple"):
|
97
98
|
nvvm.compile_ir(str(m))
|
98
99
|
|
99
100
|
def _test_nvvm_support(self, arch):
|
100
|
-
compute_xx =
|
101
|
+
compute_xx = "compute_{0}{1}".format(*arch)
|
101
102
|
nvvmir = self.get_nvvmir()
|
102
|
-
ptx = nvvm.compile_ir(
|
103
|
-
|
103
|
+
ptx = nvvm.compile_ir(
|
104
|
+
nvvmir, arch=compute_xx, ftz=1, prec_sqrt=0, prec_div=0
|
105
|
+
).decode("utf8")
|
104
106
|
self.assertIn(".target sm_{0}{1}".format(*arch), ptx)
|
105
|
-
self.assertIn(
|
106
|
-
self.assertIn(
|
107
|
+
self.assertIn("simple", ptx)
|
108
|
+
self.assertIn("ave", ptx)
|
107
109
|
|
108
110
|
def test_nvvm_support(self):
|
109
|
-
"""Test supported CC by NVVM
|
110
|
-
"""
|
111
|
+
"""Test supported CC by NVVM"""
|
111
112
|
for arch in nvvm.get_supported_ccs():
|
112
113
|
self._test_nvvm_support(arch=arch)
|
113
114
|
|
114
115
|
def test_nvvm_warning(self):
|
115
116
|
m = ir.Module("test_nvvm_warning")
|
116
|
-
m.triple =
|
117
|
+
m.triple = "nvptx64-nvidia-cuda"
|
117
118
|
m.data_layout = NVVM().data_layout
|
118
119
|
nvvm.add_ir_version(m)
|
119
120
|
|
120
121
|
fty = ir.FunctionType(ir.VoidType(), [])
|
121
|
-
kernel = ir.Function(m, fty, name=
|
122
|
-
builder = ir.IRBuilder(kernel.append_basic_block(
|
122
|
+
kernel = ir.Function(m, fty, name="inlinekernel")
|
123
|
+
builder = ir.IRBuilder(kernel.append_basic_block("entry"))
|
123
124
|
builder.ret_void()
|
124
125
|
nvvm.set_cuda_kernel(kernel)
|
125
126
|
|
126
127
|
# Add the noinline attribute to trigger NVVM to generate a warning
|
127
|
-
kernel.attributes.add(
|
128
|
+
kernel.attributes.add("noinline")
|
128
129
|
|
129
130
|
with warnings.catch_warnings(record=True) as w:
|
130
131
|
nvvm.compile_ir(str(m))
|
131
132
|
|
132
133
|
self.assertEqual(len(w), 1)
|
133
|
-
self.assertIn(
|
134
|
+
self.assertIn("overriding noinline attribute", str(w[0]))
|
134
135
|
|
135
136
|
|
136
|
-
@skip_on_cudasim(
|
137
|
+
@skip_on_cudasim("NVVM Driver unsupported in the simulator")
|
137
138
|
class TestArchOption(unittest.TestCase):
|
138
139
|
def test_get_arch_option(self):
|
139
140
|
# Test returning the nearest lowest arch.
|
140
|
-
self.assertEqual(nvvm.get_arch_option(5, 3),
|
141
|
-
self.assertEqual(nvvm.get_arch_option(7, 5),
|
142
|
-
self.assertEqual(nvvm.get_arch_option(7, 7),
|
141
|
+
self.assertEqual(nvvm.get_arch_option(5, 3), "compute_53")
|
142
|
+
self.assertEqual(nvvm.get_arch_option(7, 5), "compute_75")
|
143
|
+
self.assertEqual(nvvm.get_arch_option(7, 7), "compute_75")
|
143
144
|
# Test known arch.
|
144
145
|
supported_cc = nvvm.get_supported_ccs()
|
145
146
|
for arch in supported_cc:
|
146
|
-
self.assertEqual(nvvm.get_arch_option(*arch),
|
147
|
-
self.assertEqual(
|
148
|
-
|
147
|
+
self.assertEqual(nvvm.get_arch_option(*arch), "compute_%d%d" % arch)
|
148
|
+
self.assertEqual(
|
149
|
+
nvvm.get_arch_option(1000, 0), "compute_%d%d" % supported_cc[-1]
|
150
|
+
)
|
149
151
|
|
150
152
|
|
151
|
-
@skip_on_cudasim(
|
153
|
+
@skip_on_cudasim("NVVM Driver unsupported in the simulator")
|
152
154
|
class TestLibDevice(unittest.TestCase):
|
153
155
|
def test_libdevice_load(self):
|
154
156
|
# Test that constructing LibDevice gives a bitcode file
|
155
157
|
libdevice = LibDevice()
|
156
|
-
self.assertEqual(libdevice.bc[:4], b
|
158
|
+
self.assertEqual(libdevice.bc[:4], b"BC\xc0\xde")
|
157
159
|
|
158
160
|
|
159
|
-
nvvmir_generic =
|
161
|
+
nvvmir_generic = """\
|
160
162
|
target triple="nvptx64-nvidia-cuda"
|
161
163
|
target datalayout = "{data_layout}"
|
162
164
|
|
@@ -194,8 +196,8 @@ declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() nounwind readnone
|
|
194
196
|
!2 = !{{void (i32*)* @simple, !"kernel", i32 1}}
|
195
197
|
|
196
198
|
@"llvm.used" = appending global [1 x i8*] [i8* bitcast (void (i32*)* @simple to i8*)], section "llvm.metadata"
|
197
|
-
|
199
|
+
""" # noqa: E501
|
198
200
|
|
199
201
|
|
200
|
-
if __name__ ==
|
202
|
+
if __name__ == "__main__":
|
201
203
|
unittest.main()
|
@@ -6,7 +6,6 @@ from numba.cuda.testing import unittest, ContextResettingTestCase
|
|
6
6
|
|
7
7
|
|
8
8
|
class TestPinned(ContextResettingTestCase):
|
9
|
-
|
10
9
|
def _run_copies(self, A):
|
11
10
|
A0 = np.copy(A)
|
12
11
|
|
@@ -20,8 +19,8 @@ class TestPinned(ContextResettingTestCase):
|
|
20
19
|
|
21
20
|
def test_pinned(self):
|
22
21
|
machine = platform.machine()
|
23
|
-
if machine.startswith(
|
24
|
-
count = 262144
|
22
|
+
if machine.startswith("arm") or machine.startswith("aarch64"):
|
23
|
+
count = 262144 # 2MB
|
25
24
|
else:
|
26
25
|
count = 2097152 # 16MB
|
27
26
|
A = np.arange(count)
|
@@ -29,9 +28,9 @@ class TestPinned(ContextResettingTestCase):
|
|
29
28
|
self._run_copies(A)
|
30
29
|
|
31
30
|
def test_unpinned(self):
|
32
|
-
A = np.arange(2 * 1024 * 1024)
|
31
|
+
A = np.arange(2 * 1024 * 1024) # 16 MB
|
33
32
|
self._run_copies(A)
|
34
33
|
|
35
34
|
|
36
|
-
if __name__ ==
|
35
|
+
if __name__ == "__main__":
|
37
36
|
unittest.main()
|
@@ -4,7 +4,7 @@ from numba import cuda
|
|
4
4
|
from numba.cuda.testing import skip_on_cudasim
|
5
5
|
|
6
6
|
|
7
|
-
@skip_on_cudasim(
|
7
|
+
@skip_on_cudasim("CUDA Profiler unsupported in the simulator")
|
8
8
|
class TestProfiler(ContextResettingTestCase):
|
9
9
|
def test_profiling(self):
|
10
10
|
with cuda.profiling():
|
@@ -16,5 +16,5 @@ class TestProfiler(ContextResettingTestCase):
|
|
16
16
|
del a
|
17
17
|
|
18
18
|
|
19
|
-
if __name__ ==
|
19
|
+
if __name__ == "__main__":
|
20
20
|
unittest.main()
|