numba-cuda 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +246 -114
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
- numba_cuda/numba/cuda/cuda_paths.py +293 -99
- numba_cuda/numba/cuda/cudadecl.py +93 -79
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +27 -3
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +146 -30
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +296 -275
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +99 -7
- numba_cuda/numba/cuda/decorators.py +87 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +68 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +55 -1
- numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
- numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
- numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +1090 -927
- numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +468 -319
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
- numba_cuda/numba/cuda/intrinsics.py +203 -28
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/lowering.py +43 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +134 -108
- numba_cuda/numba/cuda/target.py +92 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +10 -7
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +59 -23
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +77 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +24 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +21 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +81 -30
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +5 -3
- numba_cuda/numba/cuda/vectorizers.py +38 -33
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
- numba_cuda-0.10.0.dist-info/RECORD +263 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.1.dist-info/RECORD +0 -251
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0
numba_cuda/numba/cuda/target.py
CHANGED
@@ -35,19 +35,21 @@ class CUDATypingContext(typing.BaseContext):
|
|
35
35
|
def resolve_value_type(self, val):
|
36
36
|
# treat other dispatcher object as another device function
|
37
37
|
from numba.cuda.dispatcher import CUDADispatcher
|
38
|
-
|
39
|
-
|
38
|
+
|
39
|
+
if isinstance(val, Dispatcher) and not isinstance(val, CUDADispatcher):
|
40
40
|
try:
|
41
41
|
# use cached device function
|
42
42
|
val = val.__dispatcher
|
43
43
|
except AttributeError:
|
44
44
|
if not val._can_compile:
|
45
|
-
raise ValueError(
|
46
|
-
|
45
|
+
raise ValueError(
|
46
|
+
"using cpu function on device "
|
47
|
+
"but its compilation is disabled"
|
48
|
+
)
|
47
49
|
targetoptions = val.targetoptions.copy()
|
48
|
-
targetoptions[
|
49
|
-
targetoptions[
|
50
|
-
targetoptions[
|
50
|
+
targetoptions["device"] = True
|
51
|
+
targetoptions["debug"] = targetoptions.get("debug", False)
|
52
|
+
targetoptions["opt"] = targetoptions.get("opt", True)
|
51
53
|
disp = CUDADispatcher(val.py_func, targetoptions)
|
52
54
|
# cache the device function for future use and to avoid
|
53
55
|
# duplicated copy of the same function.
|
@@ -57,18 +59,47 @@ class CUDATypingContext(typing.BaseContext):
|
|
57
59
|
# continue with parent logic
|
58
60
|
return super(CUDATypingContext, self).resolve_value_type(val)
|
59
61
|
|
62
|
+
def can_convert(self, fromty, toty):
|
63
|
+
"""
|
64
|
+
Check whether conversion is possible from *fromty* to *toty*.
|
65
|
+
If successful, return a numba.typeconv.Conversion instance;
|
66
|
+
otherwise None is returned.
|
67
|
+
"""
|
68
|
+
|
69
|
+
# This implementation works around the issue addressed in Numba PR
|
70
|
+
# #10047, "Fix IntEnumMember.can_convert_to() when no conversions
|
71
|
+
# found", https://github.com/numba/numba/pull/10047.
|
72
|
+
#
|
73
|
+
# This should be gated on the version of Numba that the fix is
|
74
|
+
# incorporated into, and eventually removed when the minimum supported
|
75
|
+
# Numba version includes the fix.
|
76
|
+
|
77
|
+
try:
|
78
|
+
return super().can_convert(fromty, toty)
|
79
|
+
except TypeError:
|
80
|
+
if isinstance(fromty, types.IntEnumMember):
|
81
|
+
# IntEnumMember fails to correctly handle impossible
|
82
|
+
# conversions - in this scenario the correct thing to do is to
|
83
|
+
# return None to signal that the conversion was not possible
|
84
|
+
return None
|
85
|
+
else:
|
86
|
+
# Any failure involving conversion from a non-IntEnumMember is
|
87
|
+
# almost certainly a real and separate issue
|
88
|
+
raise
|
89
|
+
|
90
|
+
|
60
91
|
# -----------------------------------------------------------------------------
|
61
92
|
# Implementation
|
62
93
|
|
63
94
|
|
64
|
-
VALID_CHARS = re.compile(r
|
95
|
+
VALID_CHARS = re.compile(r"[^a-z0-9]", re.I)
|
65
96
|
|
66
97
|
|
67
98
|
class CUDATargetContext(BaseContext):
|
68
99
|
implement_powi_as_math_call = True
|
69
100
|
strict_alignment = True
|
70
101
|
|
71
|
-
def __init__(self, typingctx, target=
|
102
|
+
def __init__(self, typingctx, target="cuda"):
|
72
103
|
super().__init__(typingctx, target)
|
73
104
|
self.data_model_manager = cuda_data_manager.chain(
|
74
105
|
datamodel.default_manager
|
@@ -76,7 +107,7 @@ class CUDATargetContext(BaseContext):
|
|
76
107
|
|
77
108
|
@property
|
78
109
|
def enable_nrt(self):
|
79
|
-
return getattr(config,
|
110
|
+
return getattr(config, "CUDA_ENABLE_NRT", False)
|
80
111
|
|
81
112
|
@property
|
82
113
|
def DIBuilder(self):
|
@@ -98,18 +129,17 @@ class CUDATargetContext(BaseContext):
|
|
98
129
|
def load_additional_registries(self):
|
99
130
|
# side effect of import needed for numba.cpython.*, the builtins
|
100
131
|
# registry is updated at import time.
|
101
|
-
from numba.cpython import numbers, tupleobj, slicing
|
102
|
-
from numba.cpython import rangeobj, iterators, enumimpl
|
103
|
-
from numba.cpython import unicode, charseq
|
132
|
+
from numba.cpython import numbers, tupleobj, slicing # noqa: F401
|
133
|
+
from numba.cpython import rangeobj, iterators, enumimpl # noqa: F401
|
134
|
+
from numba.cpython import unicode, charseq # noqa: F401
|
104
135
|
from numba.cpython import cmathimpl
|
105
136
|
from numba.misc import cffiimpl
|
106
|
-
from numba.np import arrayobj
|
107
|
-
from numba.np import npdatetime
|
108
|
-
from . import
|
109
|
-
|
110
|
-
)
|
137
|
+
from numba.np import arrayobj # noqa: F401
|
138
|
+
from numba.np import npdatetime # noqa: F401
|
139
|
+
from . import cudaimpl, printimpl, libdeviceimpl, mathimpl, vector_types
|
140
|
+
|
111
141
|
# fix for #8940
|
112
|
-
from numba.np.unsafe import ndarray
|
142
|
+
from numba.np.unsafe import ndarray # noqa F401
|
113
143
|
|
114
144
|
self.install_registry(cudaimpl.registry)
|
115
145
|
self.install_registry(cffiimpl.registry)
|
@@ -136,10 +166,18 @@ class CUDATargetContext(BaseContext):
|
|
136
166
|
These include threadIdx, blockDim, etc.
|
137
167
|
"""
|
138
168
|
from numba import cuda
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
169
|
+
|
170
|
+
nonconsts = (
|
171
|
+
"threadIdx",
|
172
|
+
"blockDim",
|
173
|
+
"blockIdx",
|
174
|
+
"gridDim",
|
175
|
+
"laneid",
|
176
|
+
"warpsize",
|
177
|
+
)
|
178
|
+
nonconsts_with_mod = tuple(
|
179
|
+
[(types.Module(cuda), nc) for nc in nonconsts]
|
180
|
+
)
|
143
181
|
return nonconsts_with_mod
|
144
182
|
|
145
183
|
@cached_property
|
@@ -147,8 +185,9 @@ class CUDATargetContext(BaseContext):
|
|
147
185
|
return CUDACallConv(self)
|
148
186
|
|
149
187
|
def mangler(self, name, argtypes, *, abi_tags=(), uid=None):
|
150
|
-
return itanium_mangler.mangle(
|
151
|
-
|
188
|
+
return itanium_mangler.mangle(
|
189
|
+
name, argtypes, abi_tags=abi_tags, uid=uid
|
190
|
+
)
|
152
191
|
|
153
192
|
def make_constant_array(self, builder, aryty, arr):
|
154
193
|
"""
|
@@ -160,15 +199,16 @@ class CUDATargetContext(BaseContext):
|
|
160
199
|
|
161
200
|
constvals = [
|
162
201
|
self.get_constant(types.byte, i)
|
163
|
-
for i in iter(arr.tobytes(order=
|
202
|
+
for i in iter(arr.tobytes(order="A"))
|
164
203
|
]
|
165
204
|
constaryty = ir.ArrayType(ir.IntType(8), len(constvals))
|
166
205
|
constary = ir.Constant(constaryty, constvals)
|
167
206
|
|
168
207
|
addrspace = nvvm.ADDRSPACE_CONSTANT
|
169
|
-
gv = cgutils.add_global_variable(
|
170
|
-
|
171
|
-
|
208
|
+
gv = cgutils.add_global_variable(
|
209
|
+
lmod, constary.type, "_cudapy_cmem", addrspace=addrspace
|
210
|
+
)
|
211
|
+
gv.linkage = "internal"
|
172
212
|
gv.global_constant = True
|
173
213
|
gv.initializer = constary
|
174
214
|
|
@@ -179,17 +219,21 @@ class CUDATargetContext(BaseContext):
|
|
179
219
|
|
180
220
|
# Convert to generic address-space
|
181
221
|
ptrty = ir.PointerType(ir.IntType(8))
|
182
|
-
genptr = builder.addrspacecast(gv, ptrty,
|
222
|
+
genptr = builder.addrspacecast(gv, ptrty, "generic")
|
183
223
|
|
184
224
|
# Create array object
|
185
225
|
ary = self.make_array(aryty)(self, builder)
|
186
226
|
kshape = [self.get_constant(types.intp, s) for s in arr.shape]
|
187
227
|
kstrides = [self.get_constant(types.intp, s) for s in arr.strides]
|
188
|
-
self.populate_array(
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
228
|
+
self.populate_array(
|
229
|
+
ary,
|
230
|
+
data=builder.bitcast(genptr, ary.data.type),
|
231
|
+
shape=kshape,
|
232
|
+
strides=kstrides,
|
233
|
+
itemsize=ary.itemsize,
|
234
|
+
parent=ary.parent,
|
235
|
+
meminfo=None,
|
236
|
+
)
|
193
237
|
|
194
238
|
return ary._getvalue()
|
195
239
|
|
@@ -199,15 +243,17 @@ class CUDATargetContext(BaseContext):
|
|
199
243
|
addrspace.
|
200
244
|
"""
|
201
245
|
text = cgutils.make_bytearray(string.encode("utf-8") + b"\x00")
|
202
|
-
name =
|
203
|
-
|
246
|
+
name = "$".join(
|
247
|
+
["__conststring__", itanium_mangler.mangle_identifier(string)]
|
248
|
+
)
|
204
249
|
# Try to reuse existing global
|
205
250
|
gv = mod.globals.get(name)
|
206
251
|
if gv is None:
|
207
252
|
# Not defined yet
|
208
|
-
gv = cgutils.add_global_variable(
|
209
|
-
|
210
|
-
|
253
|
+
gv = cgutils.add_global_variable(
|
254
|
+
mod, text.type, name, addrspace=nvvm.ADDRSPACE_CONSTANT
|
255
|
+
)
|
256
|
+
gv.linkage = "internal"
|
211
257
|
gv.global_constant = True
|
212
258
|
gv.initializer = text
|
213
259
|
|
@@ -225,11 +271,10 @@ class CUDATargetContext(BaseContext):
|
|
225
271
|
lmod = builder.module
|
226
272
|
gv = self.insert_const_string(lmod, string)
|
227
273
|
charptrty = ir.PointerType(ir.IntType(8))
|
228
|
-
return builder.addrspacecast(gv, charptrty,
|
274
|
+
return builder.addrspacecast(gv, charptrty, "generic")
|
229
275
|
|
230
276
|
def optimize_function(self, func):
|
231
|
-
"""Run O1 function passes
|
232
|
-
"""
|
277
|
+
"""Run O1 function passes"""
|
233
278
|
pass
|
234
279
|
## XXX skipped for now
|
235
280
|
# fpm = lp.FunctionPassManager.new(func.module)
|
@@ -266,8 +311,9 @@ class CUDACABICallConv(BaseCallConv):
|
|
266
311
|
def return_value(self, builder, retval):
|
267
312
|
return builder.ret(retval)
|
268
313
|
|
269
|
-
def return_user_exc(
|
270
|
-
|
314
|
+
def return_user_exc(
|
315
|
+
self, builder, exc, exc_args=None, loc=None, func_name=None
|
316
|
+
):
|
271
317
|
msg = "Python exceptions are unsupported in the CUDA C/C++ ABI"
|
272
318
|
raise NotImplementedError(msg)
|
273
319
|
|
@@ -290,8 +336,7 @@ class CUDACABICallConv(BaseCallConv):
|
|
290
336
|
"""
|
291
337
|
assert not noalias
|
292
338
|
arginfo = self._get_arg_packer(fe_argtypes)
|
293
|
-
arginfo.assign_names(self.get_arguments(fn),
|
294
|
-
['arg.' + a for a in args])
|
339
|
+
arginfo.assign_names(self.get_arguments(fn), ["arg." + a for a in args])
|
295
340
|
|
296
341
|
def get_arguments(self, func):
|
297
342
|
"""
|
numba_cuda/numba/cuda/testing.py
CHANGED
@@ -11,7 +11,7 @@ from pathlib import Path
|
|
11
11
|
import unittest
|
12
12
|
|
13
13
|
numba_cuda_dir = Path(__file__).parent
|
14
|
-
test_data_dir = numba_cuda_dir /
|
14
|
+
test_data_dir = numba_cuda_dir / "tests" / "data"
|
15
15
|
|
16
16
|
|
17
17
|
class CUDATestCase(SerialMixin, TestCase):
|
@@ -55,6 +55,7 @@ class ContextResettingTestCase(CUDATestCase):
|
|
55
55
|
def tearDown(self):
|
56
56
|
super().tearDown()
|
57
57
|
from numba.cuda.cudadrv.devices import reset
|
58
|
+
|
58
59
|
reset()
|
59
60
|
|
60
61
|
|
@@ -89,26 +90,26 @@ def skip_unless_conda_cudatoolkit(reason):
|
|
89
90
|
|
90
91
|
def skip_if_external_memmgr(reason):
|
91
92
|
"""Skip test if an EMM Plugin is in use"""
|
92
|
-
return unittest.skipIf(config.CUDA_MEMORY_MANAGER !=
|
93
|
+
return unittest.skipIf(config.CUDA_MEMORY_MANAGER != "default", reason)
|
93
94
|
|
94
95
|
|
95
96
|
def skip_under_cuda_memcheck(reason):
|
96
|
-
return unittest.skipIf(os.environ.get(
|
97
|
+
return unittest.skipIf(os.environ.get("CUDA_MEMCHECK") is not None, reason)
|
97
98
|
|
98
99
|
|
99
100
|
def skip_without_nvdisasm(reason):
|
100
|
-
nvdisasm_path = shutil.which(
|
101
|
+
nvdisasm_path = shutil.which("nvdisasm")
|
101
102
|
return unittest.skipIf(nvdisasm_path is None, reason)
|
102
103
|
|
103
104
|
|
104
105
|
def skip_with_nvdisasm(reason):
|
105
|
-
nvdisasm_path = shutil.which(
|
106
|
+
nvdisasm_path = shutil.which("nvdisasm")
|
106
107
|
return unittest.skipIf(nvdisasm_path is not None, reason)
|
107
108
|
|
108
109
|
|
109
110
|
def skip_on_arm(reason):
|
110
111
|
cpu = platform.processor()
|
111
|
-
is_arm = cpu.startswith(
|
112
|
+
is_arm = cpu.startswith("arm") or cpu.startswith("aarch")
|
112
113
|
return unittest.skipIf(is_arm, reason)
|
113
114
|
|
114
115
|
|
@@ -116,25 +117,27 @@ def skip_if_cuda_includes_missing(fn):
|
|
116
117
|
# Skip when cuda.h is not available - generally this should indicate
|
117
118
|
# whether the CUDA includes are available or not
|
118
119
|
cuda_include_path = libs.get_cuda_include_dir()
|
119
|
-
cuda_h = os.path.join(cuda_include_path,
|
120
|
-
cuda_h_file =
|
121
|
-
reason =
|
120
|
+
cuda_h = os.path.join(cuda_include_path, "cuda.h")
|
121
|
+
cuda_h_file = os.path.exists(cuda_h) and os.path.isfile(cuda_h)
|
122
|
+
reason = "CUDA include dir not available on this system"
|
122
123
|
return unittest.skipUnless(cuda_h_file, reason)(fn)
|
123
124
|
|
124
125
|
|
125
126
|
def skip_if_curand_kernel_missing(fn):
|
126
127
|
cuda_include_path = libs.get_cuda_include_dir()
|
127
|
-
curand_kernel_h = os.path.join(cuda_include_path,
|
128
|
-
curand_kernel_h_file =
|
129
|
-
|
130
|
-
|
128
|
+
curand_kernel_h = os.path.join(cuda_include_path, "curand_kernel.h")
|
129
|
+
curand_kernel_h_file = os.path.exists(curand_kernel_h) and os.path.isfile(
|
130
|
+
curand_kernel_h
|
131
|
+
)
|
132
|
+
reason = "curand_kernel.h not available on this system"
|
131
133
|
return unittest.skipUnless(curand_kernel_h_file, reason)(fn)
|
132
134
|
|
133
135
|
|
134
136
|
def skip_if_mvc_enabled(reason):
|
135
137
|
"""Skip a test if Minor Version Compatibility is enabled"""
|
136
|
-
return unittest.skipIf(
|
137
|
-
|
138
|
+
return unittest.skipIf(
|
139
|
+
config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY, reason
|
140
|
+
)
|
138
141
|
|
139
142
|
|
140
143
|
def skip_if_mvc_libraries_unavailable(fn):
|
@@ -142,12 +145,14 @@ def skip_if_mvc_libraries_unavailable(fn):
|
|
142
145
|
try:
|
143
146
|
import cubinlinker # noqa: F401
|
144
147
|
import ptxcompiler # noqa: F401
|
148
|
+
|
145
149
|
libs_available = True
|
146
150
|
except ImportError:
|
147
151
|
pass
|
148
152
|
|
149
|
-
return unittest.skipUnless(
|
150
|
-
|
153
|
+
return unittest.skipUnless(
|
154
|
+
libs_available, "Requires cubinlinker and ptxcompiler"
|
155
|
+
)(fn)
|
151
156
|
|
152
157
|
|
153
158
|
def cc_X_or_above(major, minor):
|
@@ -189,7 +194,7 @@ def cudadevrt_missing():
|
|
189
194
|
if config.ENABLE_CUDASIM:
|
190
195
|
return False
|
191
196
|
try:
|
192
|
-
path = libs.get_cudalib(
|
197
|
+
path = libs.get_cudalib("cudadevrt", static=True)
|
193
198
|
libs.check_static_lib(path)
|
194
199
|
except FileNotFoundError:
|
195
200
|
return True
|
@@ -197,7 +202,7 @@ def cudadevrt_missing():
|
|
197
202
|
|
198
203
|
|
199
204
|
def skip_if_cudadevrt_missing(fn):
|
200
|
-
return unittest.skipIf(cudadevrt_missing(),
|
205
|
+
return unittest.skipIf(cudadevrt_missing(), "cudadevrt missing")(fn)
|
201
206
|
|
202
207
|
|
203
208
|
class ForeignArray(object):
|
@@ -19,18 +19,19 @@ def load_testsuite(loader, dir):
|
|
19
19
|
files = []
|
20
20
|
for f in os.listdir(dir):
|
21
21
|
path = join(dir, f)
|
22
|
-
if isfile(path) and fnmatch(f,
|
22
|
+
if isfile(path) and fnmatch(f, "test_*.py"):
|
23
23
|
files.append(f)
|
24
|
-
elif isfile(join(path,
|
25
|
-
suite.addTests(
|
26
|
-
|
24
|
+
elif isfile(join(path, "__init__.py")):
|
25
|
+
suite.addTests(
|
26
|
+
loader.discover(path, top_level_dir=top_level_dir)
|
27
|
+
)
|
27
28
|
for f in files:
|
28
29
|
# turn 'f' into a filename relative to the toplevel dir and
|
29
30
|
# translate it to a module name. This differs from the
|
30
31
|
# implementation in Numba, because the toplevel dir is the
|
31
32
|
# numba_cuda module location, not the numba one.
|
32
33
|
f = relpath(join(dir, f), top_level_dir)
|
33
|
-
f = splitext(normpath(f.replace(os.path.sep,
|
34
|
+
f = splitext(normpath(f.replace(os.path.sep, ".")))[0]
|
34
35
|
suite.addTests(loader.loadTestsFromName(f))
|
35
36
|
return suite
|
36
37
|
except Exception:
|
@@ -42,16 +43,17 @@ def load_tests(loader, tests, pattern):
|
|
42
43
|
suite = unittest.TestSuite()
|
43
44
|
this_dir = dirname(__file__)
|
44
45
|
ensure_supported_ccs_initialized()
|
45
|
-
suite.addTests(load_testsuite(loader, join(this_dir,
|
46
|
+
suite.addTests(load_testsuite(loader, join(this_dir, "nocuda")))
|
46
47
|
if cuda.is_available():
|
47
|
-
suite.addTests(load_testsuite(loader, join(this_dir,
|
48
|
+
suite.addTests(load_testsuite(loader, join(this_dir, "cudasim")))
|
48
49
|
gpus = cuda.list_devices()
|
49
50
|
if gpus and gpus[0].compute_capability >= (2, 0):
|
50
|
-
suite.addTests(load_testsuite(loader, join(this_dir,
|
51
|
-
suite.addTests(load_testsuite(loader, join(this_dir,
|
52
|
-
suite.addTests(load_testsuite(loader, join(this_dir,
|
53
|
-
suite.addTests(
|
54
|
-
|
51
|
+
suite.addTests(load_testsuite(loader, join(this_dir, "cudadrv")))
|
52
|
+
suite.addTests(load_testsuite(loader, join(this_dir, "cudapy")))
|
53
|
+
suite.addTests(load_testsuite(loader, join(this_dir, "nrt")))
|
54
|
+
suite.addTests(
|
55
|
+
load_testsuite(loader, join(this_dir, "doc_examples"))
|
56
|
+
)
|
55
57
|
else:
|
56
58
|
print("skipped CUDA tests because GPU CC < 2.0")
|
57
59
|
else:
|
@@ -4,7 +4,6 @@ from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
|
|
4
4
|
|
5
5
|
|
6
6
|
class TestArrayAttr(CUDATestCase):
|
7
|
-
|
8
7
|
def test_contigous_2d(self):
|
9
8
|
ary = np.arange(10)
|
10
9
|
cary = ary.reshape(2, 5)
|
@@ -44,7 +43,7 @@ class TestArrayAttr(CUDATestCase):
|
|
44
43
|
def test_ravel_1d(self):
|
45
44
|
ary = np.arange(60)
|
46
45
|
dary = cuda.to_device(ary)
|
47
|
-
for order in
|
46
|
+
for order in "CFA":
|
48
47
|
expect = ary.ravel(order=order)
|
49
48
|
dflat = dary.ravel(order=order)
|
50
49
|
flat = dflat.copy_to_host()
|
@@ -52,14 +51,14 @@ class TestArrayAttr(CUDATestCase):
|
|
52
51
|
self.assertEqual(flat.ndim, 1)
|
53
52
|
self.assertPreciseEqual(expect, flat)
|
54
53
|
|
55
|
-
@skip_on_cudasim(
|
54
|
+
@skip_on_cudasim("CUDA Array Interface is not supported in the simulator")
|
56
55
|
def test_ravel_stride_1d(self):
|
57
56
|
ary = np.arange(60)
|
58
57
|
dary = cuda.to_device(ary)
|
59
58
|
# No-copy stride device array
|
60
59
|
darystride = dary[::2]
|
61
|
-
dary_data = dary.__cuda_array_interface__[
|
62
|
-
ddarystride_data = darystride.__cuda_array_interface__[
|
60
|
+
dary_data = dary.__cuda_array_interface__["data"][0]
|
61
|
+
ddarystride_data = darystride.__cuda_array_interface__["data"][0]
|
63
62
|
self.assertEqual(dary_data, ddarystride_data)
|
64
63
|
# Fail on ravel on non-contiguous array
|
65
64
|
with self.assertRaises(NotImplementedError):
|
@@ -69,7 +68,7 @@ class TestArrayAttr(CUDATestCase):
|
|
69
68
|
ary = np.arange(60)
|
70
69
|
reshaped = ary.reshape(2, 5, 2, 3)
|
71
70
|
|
72
|
-
expect = reshaped.ravel(order=
|
71
|
+
expect = reshaped.ravel(order="C")
|
73
72
|
dary = cuda.to_device(reshaped)
|
74
73
|
dflat = dary.ravel()
|
75
74
|
flat = dflat.copy_to_host()
|
@@ -78,7 +77,7 @@ class TestArrayAttr(CUDATestCase):
|
|
78
77
|
self.assertPreciseEqual(expect, flat)
|
79
78
|
|
80
79
|
# explicit order kwarg
|
81
|
-
for order in
|
80
|
+
for order in "CA":
|
82
81
|
expect = reshaped.ravel(order=order)
|
83
82
|
dary = cuda.to_device(reshaped)
|
84
83
|
dflat = dary.ravel(order=order)
|
@@ -87,15 +86,15 @@ class TestArrayAttr(CUDATestCase):
|
|
87
86
|
self.assertEqual(flat.ndim, 1)
|
88
87
|
self.assertPreciseEqual(expect, flat)
|
89
88
|
|
90
|
-
@skip_on_cudasim(
|
89
|
+
@skip_on_cudasim("CUDA Array Interface is not supported in the simulator")
|
91
90
|
def test_ravel_stride_c(self):
|
92
91
|
ary = np.arange(60)
|
93
92
|
reshaped = ary.reshape(2, 5, 2, 3)
|
94
93
|
|
95
94
|
dary = cuda.to_device(reshaped)
|
96
95
|
darystride = dary[::2, ::2, ::2, ::2]
|
97
|
-
dary_data = dary.__cuda_array_interface__[
|
98
|
-
ddarystride_data = darystride.__cuda_array_interface__[
|
96
|
+
dary_data = dary.__cuda_array_interface__["data"][0]
|
97
|
+
ddarystride_data = darystride.__cuda_array_interface__["data"][0]
|
99
98
|
self.assertEqual(dary_data, ddarystride_data)
|
100
99
|
with self.assertRaises(NotImplementedError):
|
101
100
|
darystride.ravel()
|
@@ -103,7 +102,7 @@ class TestArrayAttr(CUDATestCase):
|
|
103
102
|
def test_ravel_f(self):
|
104
103
|
ary = np.arange(60)
|
105
104
|
reshaped = np.asfortranarray(ary.reshape(2, 5, 2, 3))
|
106
|
-
for order in
|
105
|
+
for order in "FA":
|
107
106
|
expect = reshaped.ravel(order=order)
|
108
107
|
dary = cuda.to_device(reshaped)
|
109
108
|
dflat = dary.ravel(order=order)
|
@@ -112,14 +111,14 @@ class TestArrayAttr(CUDATestCase):
|
|
112
111
|
self.assertEqual(flat.ndim, 1)
|
113
112
|
self.assertPreciseEqual(expect, flat)
|
114
113
|
|
115
|
-
@skip_on_cudasim(
|
114
|
+
@skip_on_cudasim("CUDA Array Interface is not supported in the simulator")
|
116
115
|
def test_ravel_stride_f(self):
|
117
116
|
ary = np.arange(60)
|
118
117
|
reshaped = np.asfortranarray(ary.reshape(2, 5, 2, 3))
|
119
118
|
dary = cuda.to_device(reshaped)
|
120
119
|
darystride = dary[::2, ::2, ::2, ::2]
|
121
|
-
dary_data = dary.__cuda_array_interface__[
|
122
|
-
ddarystride_data = darystride.__cuda_array_interface__[
|
120
|
+
dary_data = dary.__cuda_array_interface__["data"][0]
|
121
|
+
ddarystride_data = darystride.__cuda_array_interface__["data"][0]
|
123
122
|
self.assertEqual(dary_data, ddarystride_data)
|
124
123
|
with self.assertRaises(NotImplementedError):
|
125
124
|
darystride.ravel()
|
@@ -134,12 +133,12 @@ class TestArrayAttr(CUDATestCase):
|
|
134
133
|
|
135
134
|
def test_reshape_f(self):
|
136
135
|
ary = np.arange(10)
|
137
|
-
expect = ary.reshape(2, 5, order=
|
136
|
+
expect = ary.reshape(2, 5, order="F")
|
138
137
|
dary = cuda.to_device(ary)
|
139
|
-
dary_reshaped = dary.reshape(2, 5, order=
|
138
|
+
dary_reshaped = dary.reshape(2, 5, order="F")
|
140
139
|
got = dary_reshaped.copy_to_host()
|
141
140
|
self.assertPreciseEqual(expect, got)
|
142
141
|
|
143
142
|
|
144
|
-
if __name__ ==
|
143
|
+
if __name__ == "__main__":
|
145
144
|
unittest.main()
|
@@ -27,7 +27,6 @@ class TestContextStack(CUDATestCase):
|
|
27
27
|
|
28
28
|
|
29
29
|
class TestContextAPI(CUDATestCase):
|
30
|
-
|
31
30
|
def tearDown(self):
|
32
31
|
super().tearDown()
|
33
32
|
cuda.close()
|
@@ -36,7 +35,7 @@ class TestContextAPI(CUDATestCase):
|
|
36
35
|
try:
|
37
36
|
mem = cuda.current_context().get_memory_info()
|
38
37
|
except NotImplementedError:
|
39
|
-
self.skipTest(
|
38
|
+
self.skipTest("EMM Plugin does not implement get_memory_info()")
|
40
39
|
|
41
40
|
self.assertIsInstance(mem.free, numbers.Number)
|
42
41
|
self.assertEqual(mem.free, mem[0])
|
@@ -47,7 +46,7 @@ class TestContextAPI(CUDATestCase):
|
|
47
46
|
self.assertLessEqual(mem.free, mem.total)
|
48
47
|
|
49
48
|
@unittest.skipIf(len(cuda.gpus) < 2, "need more than 1 gpus")
|
50
|
-
@skip_on_cudasim(
|
49
|
+
@skip_on_cudasim("CUDA HW required")
|
51
50
|
def test_forbidden_context_switch(self):
|
52
51
|
# Cannot switch context inside a `cuda.require_context`
|
53
52
|
@cuda.require_context
|
@@ -72,7 +71,7 @@ class TestContextAPI(CUDATestCase):
|
|
72
71
|
self.assertEqual(int(devid), 1)
|
73
72
|
|
74
73
|
|
75
|
-
@skip_on_cudasim(
|
74
|
+
@skip_on_cudasim("CUDA HW required")
|
76
75
|
class Test3rdPartyContext(CUDATestCase):
|
77
76
|
def tearDown(self):
|
78
77
|
super().tearDown()
|
@@ -118,8 +117,9 @@ class Test3rdPartyContext(CUDATestCase):
|
|
118
117
|
cuda.current_context()
|
119
118
|
except RuntimeError as e:
|
120
119
|
# Expecting an error about non-primary CUDA context
|
121
|
-
self.assertIn(
|
122
|
-
|
120
|
+
self.assertIn(
|
121
|
+
"Numba cannot operate on non-primary CUDA context ", str(e)
|
122
|
+
)
|
123
123
|
else:
|
124
124
|
self.fail("No RuntimeError raised")
|
125
125
|
finally:
|
@@ -141,5 +141,5 @@ class Test3rdPartyContext(CUDATestCase):
|
|
141
141
|
self.test_attached_primary(do)
|
142
142
|
|
143
143
|
|
144
|
-
if __name__ ==
|
144
|
+
if __name__ == "__main__":
|
145
145
|
unittest.main()
|