numba-cuda 0.8.1__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +232 -113
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_fp16.h +661 -661
- numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
- numba_cuda/numba/cuda/cuda_paths.py +291 -99
- numba_cuda/numba/cuda/cudadecl.py +125 -69
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +317 -233
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +8 -6
- numba_cuda/numba/cuda/decorators.py +75 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +69 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +1 -1
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
- numba_cuda/numba/cuda/intrinsics.py +31 -27
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +139 -102
- numba_cuda/numba/cuda/target.py +64 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +2 -2
- numba_cuda/numba/cuda/vectorizers.py +37 -32
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
- numba_cuda-0.9.0.dist-info/RECORD +253 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.1.dist-info/RECORD +0 -251
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
numba_cuda/numba/cuda/target.py
CHANGED
@@ -35,19 +35,21 @@ class CUDATypingContext(typing.BaseContext):
|
|
35
35
|
def resolve_value_type(self, val):
|
36
36
|
# treat other dispatcher object as another device function
|
37
37
|
from numba.cuda.dispatcher import CUDADispatcher
|
38
|
-
|
39
|
-
|
38
|
+
|
39
|
+
if isinstance(val, Dispatcher) and not isinstance(val, CUDADispatcher):
|
40
40
|
try:
|
41
41
|
# use cached device function
|
42
42
|
val = val.__dispatcher
|
43
43
|
except AttributeError:
|
44
44
|
if not val._can_compile:
|
45
|
-
raise ValueError(
|
46
|
-
|
45
|
+
raise ValueError(
|
46
|
+
"using cpu function on device "
|
47
|
+
"but its compilation is disabled"
|
48
|
+
)
|
47
49
|
targetoptions = val.targetoptions.copy()
|
48
|
-
targetoptions[
|
49
|
-
targetoptions[
|
50
|
-
targetoptions[
|
50
|
+
targetoptions["device"] = True
|
51
|
+
targetoptions["debug"] = targetoptions.get("debug", False)
|
52
|
+
targetoptions["opt"] = targetoptions.get("opt", True)
|
51
53
|
disp = CUDADispatcher(val.py_func, targetoptions)
|
52
54
|
# cache the device function for future use and to avoid
|
53
55
|
# duplicated copy of the same function.
|
@@ -57,18 +59,19 @@ class CUDATypingContext(typing.BaseContext):
|
|
57
59
|
# continue with parent logic
|
58
60
|
return super(CUDATypingContext, self).resolve_value_type(val)
|
59
61
|
|
62
|
+
|
60
63
|
# -----------------------------------------------------------------------------
|
61
64
|
# Implementation
|
62
65
|
|
63
66
|
|
64
|
-
VALID_CHARS = re.compile(r
|
67
|
+
VALID_CHARS = re.compile(r"[^a-z0-9]", re.I)
|
65
68
|
|
66
69
|
|
67
70
|
class CUDATargetContext(BaseContext):
|
68
71
|
implement_powi_as_math_call = True
|
69
72
|
strict_alignment = True
|
70
73
|
|
71
|
-
def __init__(self, typingctx, target=
|
74
|
+
def __init__(self, typingctx, target="cuda"):
|
72
75
|
super().__init__(typingctx, target)
|
73
76
|
self.data_model_manager = cuda_data_manager.chain(
|
74
77
|
datamodel.default_manager
|
@@ -76,7 +79,7 @@ class CUDATargetContext(BaseContext):
|
|
76
79
|
|
77
80
|
@property
|
78
81
|
def enable_nrt(self):
|
79
|
-
return getattr(config,
|
82
|
+
return getattr(config, "CUDA_ENABLE_NRT", False)
|
80
83
|
|
81
84
|
@property
|
82
85
|
def DIBuilder(self):
|
@@ -98,18 +101,17 @@ class CUDATargetContext(BaseContext):
|
|
98
101
|
def load_additional_registries(self):
|
99
102
|
# side effect of import needed for numba.cpython.*, the builtins
|
100
103
|
# registry is updated at import time.
|
101
|
-
from numba.cpython import numbers, tupleobj, slicing
|
102
|
-
from numba.cpython import rangeobj, iterators, enumimpl
|
103
|
-
from numba.cpython import unicode, charseq
|
104
|
+
from numba.cpython import numbers, tupleobj, slicing # noqa: F401
|
105
|
+
from numba.cpython import rangeobj, iterators, enumimpl # noqa: F401
|
106
|
+
from numba.cpython import unicode, charseq # noqa: F401
|
104
107
|
from numba.cpython import cmathimpl
|
105
108
|
from numba.misc import cffiimpl
|
106
|
-
from numba.np import arrayobj
|
107
|
-
from numba.np import npdatetime
|
108
|
-
from . import
|
109
|
-
|
110
|
-
)
|
109
|
+
from numba.np import arrayobj # noqa: F401
|
110
|
+
from numba.np import npdatetime # noqa: F401
|
111
|
+
from . import cudaimpl, printimpl, libdeviceimpl, mathimpl, vector_types
|
112
|
+
|
111
113
|
# fix for #8940
|
112
|
-
from numba.np.unsafe import ndarray
|
114
|
+
from numba.np.unsafe import ndarray # noqa F401
|
113
115
|
|
114
116
|
self.install_registry(cudaimpl.registry)
|
115
117
|
self.install_registry(cffiimpl.registry)
|
@@ -136,10 +138,18 @@ class CUDATargetContext(BaseContext):
|
|
136
138
|
These include threadIdx, blockDim, etc.
|
137
139
|
"""
|
138
140
|
from numba import cuda
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
141
|
+
|
142
|
+
nonconsts = (
|
143
|
+
"threadIdx",
|
144
|
+
"blockDim",
|
145
|
+
"blockIdx",
|
146
|
+
"gridDim",
|
147
|
+
"laneid",
|
148
|
+
"warpsize",
|
149
|
+
)
|
150
|
+
nonconsts_with_mod = tuple(
|
151
|
+
[(types.Module(cuda), nc) for nc in nonconsts]
|
152
|
+
)
|
143
153
|
return nonconsts_with_mod
|
144
154
|
|
145
155
|
@cached_property
|
@@ -147,8 +157,9 @@ class CUDATargetContext(BaseContext):
|
|
147
157
|
return CUDACallConv(self)
|
148
158
|
|
149
159
|
def mangler(self, name, argtypes, *, abi_tags=(), uid=None):
|
150
|
-
return itanium_mangler.mangle(
|
151
|
-
|
160
|
+
return itanium_mangler.mangle(
|
161
|
+
name, argtypes, abi_tags=abi_tags, uid=uid
|
162
|
+
)
|
152
163
|
|
153
164
|
def make_constant_array(self, builder, aryty, arr):
|
154
165
|
"""
|
@@ -160,15 +171,16 @@ class CUDATargetContext(BaseContext):
|
|
160
171
|
|
161
172
|
constvals = [
|
162
173
|
self.get_constant(types.byte, i)
|
163
|
-
for i in iter(arr.tobytes(order=
|
174
|
+
for i in iter(arr.tobytes(order="A"))
|
164
175
|
]
|
165
176
|
constaryty = ir.ArrayType(ir.IntType(8), len(constvals))
|
166
177
|
constary = ir.Constant(constaryty, constvals)
|
167
178
|
|
168
179
|
addrspace = nvvm.ADDRSPACE_CONSTANT
|
169
|
-
gv = cgutils.add_global_variable(
|
170
|
-
|
171
|
-
|
180
|
+
gv = cgutils.add_global_variable(
|
181
|
+
lmod, constary.type, "_cudapy_cmem", addrspace=addrspace
|
182
|
+
)
|
183
|
+
gv.linkage = "internal"
|
172
184
|
gv.global_constant = True
|
173
185
|
gv.initializer = constary
|
174
186
|
|
@@ -179,17 +191,21 @@ class CUDATargetContext(BaseContext):
|
|
179
191
|
|
180
192
|
# Convert to generic address-space
|
181
193
|
ptrty = ir.PointerType(ir.IntType(8))
|
182
|
-
genptr = builder.addrspacecast(gv, ptrty,
|
194
|
+
genptr = builder.addrspacecast(gv, ptrty, "generic")
|
183
195
|
|
184
196
|
# Create array object
|
185
197
|
ary = self.make_array(aryty)(self, builder)
|
186
198
|
kshape = [self.get_constant(types.intp, s) for s in arr.shape]
|
187
199
|
kstrides = [self.get_constant(types.intp, s) for s in arr.strides]
|
188
|
-
self.populate_array(
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
200
|
+
self.populate_array(
|
201
|
+
ary,
|
202
|
+
data=builder.bitcast(genptr, ary.data.type),
|
203
|
+
shape=kshape,
|
204
|
+
strides=kstrides,
|
205
|
+
itemsize=ary.itemsize,
|
206
|
+
parent=ary.parent,
|
207
|
+
meminfo=None,
|
208
|
+
)
|
193
209
|
|
194
210
|
return ary._getvalue()
|
195
211
|
|
@@ -199,15 +215,17 @@ class CUDATargetContext(BaseContext):
|
|
199
215
|
addrspace.
|
200
216
|
"""
|
201
217
|
text = cgutils.make_bytearray(string.encode("utf-8") + b"\x00")
|
202
|
-
name =
|
203
|
-
|
218
|
+
name = "$".join(
|
219
|
+
["__conststring__", itanium_mangler.mangle_identifier(string)]
|
220
|
+
)
|
204
221
|
# Try to reuse existing global
|
205
222
|
gv = mod.globals.get(name)
|
206
223
|
if gv is None:
|
207
224
|
# Not defined yet
|
208
|
-
gv = cgutils.add_global_variable(
|
209
|
-
|
210
|
-
|
225
|
+
gv = cgutils.add_global_variable(
|
226
|
+
mod, text.type, name, addrspace=nvvm.ADDRSPACE_CONSTANT
|
227
|
+
)
|
228
|
+
gv.linkage = "internal"
|
211
229
|
gv.global_constant = True
|
212
230
|
gv.initializer = text
|
213
231
|
|
@@ -225,11 +243,10 @@ class CUDATargetContext(BaseContext):
|
|
225
243
|
lmod = builder.module
|
226
244
|
gv = self.insert_const_string(lmod, string)
|
227
245
|
charptrty = ir.PointerType(ir.IntType(8))
|
228
|
-
return builder.addrspacecast(gv, charptrty,
|
246
|
+
return builder.addrspacecast(gv, charptrty, "generic")
|
229
247
|
|
230
248
|
def optimize_function(self, func):
|
231
|
-
"""Run O1 function passes
|
232
|
-
"""
|
249
|
+
"""Run O1 function passes"""
|
233
250
|
pass
|
234
251
|
## XXX skipped for now
|
235
252
|
# fpm = lp.FunctionPassManager.new(func.module)
|
@@ -266,8 +283,9 @@ class CUDACABICallConv(BaseCallConv):
|
|
266
283
|
def return_value(self, builder, retval):
|
267
284
|
return builder.ret(retval)
|
268
285
|
|
269
|
-
def return_user_exc(
|
270
|
-
|
286
|
+
def return_user_exc(
|
287
|
+
self, builder, exc, exc_args=None, loc=None, func_name=None
|
288
|
+
):
|
271
289
|
msg = "Python exceptions are unsupported in the CUDA C/C++ ABI"
|
272
290
|
raise NotImplementedError(msg)
|
273
291
|
|
@@ -290,8 +308,7 @@ class CUDACABICallConv(BaseCallConv):
|
|
290
308
|
"""
|
291
309
|
assert not noalias
|
292
310
|
arginfo = self._get_arg_packer(fe_argtypes)
|
293
|
-
arginfo.assign_names(self.get_arguments(fn),
|
294
|
-
['arg.' + a for a in args])
|
311
|
+
arginfo.assign_names(self.get_arguments(fn), ["arg." + a for a in args])
|
295
312
|
|
296
313
|
def get_arguments(self, func):
|
297
314
|
"""
|
numba_cuda/numba/cuda/testing.py
CHANGED
@@ -11,7 +11,7 @@ from pathlib import Path
|
|
11
11
|
import unittest
|
12
12
|
|
13
13
|
numba_cuda_dir = Path(__file__).parent
|
14
|
-
test_data_dir = numba_cuda_dir /
|
14
|
+
test_data_dir = numba_cuda_dir / "tests" / "data"
|
15
15
|
|
16
16
|
|
17
17
|
class CUDATestCase(SerialMixin, TestCase):
|
@@ -55,6 +55,7 @@ class ContextResettingTestCase(CUDATestCase):
|
|
55
55
|
def tearDown(self):
|
56
56
|
super().tearDown()
|
57
57
|
from numba.cuda.cudadrv.devices import reset
|
58
|
+
|
58
59
|
reset()
|
59
60
|
|
60
61
|
|
@@ -89,26 +90,26 @@ def skip_unless_conda_cudatoolkit(reason):
|
|
89
90
|
|
90
91
|
def skip_if_external_memmgr(reason):
|
91
92
|
"""Skip test if an EMM Plugin is in use"""
|
92
|
-
return unittest.skipIf(config.CUDA_MEMORY_MANAGER !=
|
93
|
+
return unittest.skipIf(config.CUDA_MEMORY_MANAGER != "default", reason)
|
93
94
|
|
94
95
|
|
95
96
|
def skip_under_cuda_memcheck(reason):
|
96
|
-
return unittest.skipIf(os.environ.get(
|
97
|
+
return unittest.skipIf(os.environ.get("CUDA_MEMCHECK") is not None, reason)
|
97
98
|
|
98
99
|
|
99
100
|
def skip_without_nvdisasm(reason):
|
100
|
-
nvdisasm_path = shutil.which(
|
101
|
+
nvdisasm_path = shutil.which("nvdisasm")
|
101
102
|
return unittest.skipIf(nvdisasm_path is None, reason)
|
102
103
|
|
103
104
|
|
104
105
|
def skip_with_nvdisasm(reason):
|
105
|
-
nvdisasm_path = shutil.which(
|
106
|
+
nvdisasm_path = shutil.which("nvdisasm")
|
106
107
|
return unittest.skipIf(nvdisasm_path is not None, reason)
|
107
108
|
|
108
109
|
|
109
110
|
def skip_on_arm(reason):
|
110
111
|
cpu = platform.processor()
|
111
|
-
is_arm = cpu.startswith(
|
112
|
+
is_arm = cpu.startswith("arm") or cpu.startswith("aarch")
|
112
113
|
return unittest.skipIf(is_arm, reason)
|
113
114
|
|
114
115
|
|
@@ -116,25 +117,27 @@ def skip_if_cuda_includes_missing(fn):
|
|
116
117
|
# Skip when cuda.h is not available - generally this should indicate
|
117
118
|
# whether the CUDA includes are available or not
|
118
119
|
cuda_include_path = libs.get_cuda_include_dir()
|
119
|
-
cuda_h = os.path.join(cuda_include_path,
|
120
|
-
cuda_h_file =
|
121
|
-
reason =
|
120
|
+
cuda_h = os.path.join(cuda_include_path, "cuda.h")
|
121
|
+
cuda_h_file = os.path.exists(cuda_h) and os.path.isfile(cuda_h)
|
122
|
+
reason = "CUDA include dir not available on this system"
|
122
123
|
return unittest.skipUnless(cuda_h_file, reason)(fn)
|
123
124
|
|
124
125
|
|
125
126
|
def skip_if_curand_kernel_missing(fn):
|
126
127
|
cuda_include_path = libs.get_cuda_include_dir()
|
127
|
-
curand_kernel_h = os.path.join(cuda_include_path,
|
128
|
-
curand_kernel_h_file =
|
129
|
-
|
130
|
-
|
128
|
+
curand_kernel_h = os.path.join(cuda_include_path, "curand_kernel.h")
|
129
|
+
curand_kernel_h_file = os.path.exists(curand_kernel_h) and os.path.isfile(
|
130
|
+
curand_kernel_h
|
131
|
+
)
|
132
|
+
reason = "curand_kernel.h not available on this system"
|
131
133
|
return unittest.skipUnless(curand_kernel_h_file, reason)(fn)
|
132
134
|
|
133
135
|
|
134
136
|
def skip_if_mvc_enabled(reason):
|
135
137
|
"""Skip a test if Minor Version Compatibility is enabled"""
|
136
|
-
return unittest.skipIf(
|
137
|
-
|
138
|
+
return unittest.skipIf(
|
139
|
+
config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY, reason
|
140
|
+
)
|
138
141
|
|
139
142
|
|
140
143
|
def skip_if_mvc_libraries_unavailable(fn):
|
@@ -142,12 +145,14 @@ def skip_if_mvc_libraries_unavailable(fn):
|
|
142
145
|
try:
|
143
146
|
import cubinlinker # noqa: F401
|
144
147
|
import ptxcompiler # noqa: F401
|
148
|
+
|
145
149
|
libs_available = True
|
146
150
|
except ImportError:
|
147
151
|
pass
|
148
152
|
|
149
|
-
return unittest.skipUnless(
|
150
|
-
|
153
|
+
return unittest.skipUnless(
|
154
|
+
libs_available, "Requires cubinlinker and ptxcompiler"
|
155
|
+
)(fn)
|
151
156
|
|
152
157
|
|
153
158
|
def cc_X_or_above(major, minor):
|
@@ -189,7 +194,7 @@ def cudadevrt_missing():
|
|
189
194
|
if config.ENABLE_CUDASIM:
|
190
195
|
return False
|
191
196
|
try:
|
192
|
-
path = libs.get_cudalib(
|
197
|
+
path = libs.get_cudalib("cudadevrt", static=True)
|
193
198
|
libs.check_static_lib(path)
|
194
199
|
except FileNotFoundError:
|
195
200
|
return True
|
@@ -197,7 +202,7 @@ def cudadevrt_missing():
|
|
197
202
|
|
198
203
|
|
199
204
|
def skip_if_cudadevrt_missing(fn):
|
200
|
-
return unittest.skipIf(cudadevrt_missing(),
|
205
|
+
return unittest.skipIf(cudadevrt_missing(), "cudadevrt missing")(fn)
|
201
206
|
|
202
207
|
|
203
208
|
class ForeignArray(object):
|
@@ -19,18 +19,19 @@ def load_testsuite(loader, dir):
|
|
19
19
|
files = []
|
20
20
|
for f in os.listdir(dir):
|
21
21
|
path = join(dir, f)
|
22
|
-
if isfile(path) and fnmatch(f,
|
22
|
+
if isfile(path) and fnmatch(f, "test_*.py"):
|
23
23
|
files.append(f)
|
24
|
-
elif isfile(join(path,
|
25
|
-
suite.addTests(
|
26
|
-
|
24
|
+
elif isfile(join(path, "__init__.py")):
|
25
|
+
suite.addTests(
|
26
|
+
loader.discover(path, top_level_dir=top_level_dir)
|
27
|
+
)
|
27
28
|
for f in files:
|
28
29
|
# turn 'f' into a filename relative to the toplevel dir and
|
29
30
|
# translate it to a module name. This differs from the
|
30
31
|
# implementation in Numba, because the toplevel dir is the
|
31
32
|
# numba_cuda module location, not the numba one.
|
32
33
|
f = relpath(join(dir, f), top_level_dir)
|
33
|
-
f = splitext(normpath(f.replace(os.path.sep,
|
34
|
+
f = splitext(normpath(f.replace(os.path.sep, ".")))[0]
|
34
35
|
suite.addTests(loader.loadTestsFromName(f))
|
35
36
|
return suite
|
36
37
|
except Exception:
|
@@ -42,16 +43,17 @@ def load_tests(loader, tests, pattern):
|
|
42
43
|
suite = unittest.TestSuite()
|
43
44
|
this_dir = dirname(__file__)
|
44
45
|
ensure_supported_ccs_initialized()
|
45
|
-
suite.addTests(load_testsuite(loader, join(this_dir,
|
46
|
+
suite.addTests(load_testsuite(loader, join(this_dir, "nocuda")))
|
46
47
|
if cuda.is_available():
|
47
|
-
suite.addTests(load_testsuite(loader, join(this_dir,
|
48
|
+
suite.addTests(load_testsuite(loader, join(this_dir, "cudasim")))
|
48
49
|
gpus = cuda.list_devices()
|
49
50
|
if gpus and gpus[0].compute_capability >= (2, 0):
|
50
|
-
suite.addTests(load_testsuite(loader, join(this_dir,
|
51
|
-
suite.addTests(load_testsuite(loader, join(this_dir,
|
52
|
-
suite.addTests(load_testsuite(loader, join(this_dir,
|
53
|
-
suite.addTests(
|
54
|
-
|
51
|
+
suite.addTests(load_testsuite(loader, join(this_dir, "cudadrv")))
|
52
|
+
suite.addTests(load_testsuite(loader, join(this_dir, "cudapy")))
|
53
|
+
suite.addTests(load_testsuite(loader, join(this_dir, "nrt")))
|
54
|
+
suite.addTests(
|
55
|
+
load_testsuite(loader, join(this_dir, "doc_examples"))
|
56
|
+
)
|
55
57
|
else:
|
56
58
|
print("skipped CUDA tests because GPU CC < 2.0")
|
57
59
|
else:
|
@@ -4,7 +4,6 @@ from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
|
|
4
4
|
|
5
5
|
|
6
6
|
class TestArrayAttr(CUDATestCase):
|
7
|
-
|
8
7
|
def test_contigous_2d(self):
|
9
8
|
ary = np.arange(10)
|
10
9
|
cary = ary.reshape(2, 5)
|
@@ -44,7 +43,7 @@ class TestArrayAttr(CUDATestCase):
|
|
44
43
|
def test_ravel_1d(self):
|
45
44
|
ary = np.arange(60)
|
46
45
|
dary = cuda.to_device(ary)
|
47
|
-
for order in
|
46
|
+
for order in "CFA":
|
48
47
|
expect = ary.ravel(order=order)
|
49
48
|
dflat = dary.ravel(order=order)
|
50
49
|
flat = dflat.copy_to_host()
|
@@ -52,14 +51,14 @@ class TestArrayAttr(CUDATestCase):
|
|
52
51
|
self.assertEqual(flat.ndim, 1)
|
53
52
|
self.assertPreciseEqual(expect, flat)
|
54
53
|
|
55
|
-
@skip_on_cudasim(
|
54
|
+
@skip_on_cudasim("CUDA Array Interface is not supported in the simulator")
|
56
55
|
def test_ravel_stride_1d(self):
|
57
56
|
ary = np.arange(60)
|
58
57
|
dary = cuda.to_device(ary)
|
59
58
|
# No-copy stride device array
|
60
59
|
darystride = dary[::2]
|
61
|
-
dary_data = dary.__cuda_array_interface__[
|
62
|
-
ddarystride_data = darystride.__cuda_array_interface__[
|
60
|
+
dary_data = dary.__cuda_array_interface__["data"][0]
|
61
|
+
ddarystride_data = darystride.__cuda_array_interface__["data"][0]
|
63
62
|
self.assertEqual(dary_data, ddarystride_data)
|
64
63
|
# Fail on ravel on non-contiguous array
|
65
64
|
with self.assertRaises(NotImplementedError):
|
@@ -69,7 +68,7 @@ class TestArrayAttr(CUDATestCase):
|
|
69
68
|
ary = np.arange(60)
|
70
69
|
reshaped = ary.reshape(2, 5, 2, 3)
|
71
70
|
|
72
|
-
expect = reshaped.ravel(order=
|
71
|
+
expect = reshaped.ravel(order="C")
|
73
72
|
dary = cuda.to_device(reshaped)
|
74
73
|
dflat = dary.ravel()
|
75
74
|
flat = dflat.copy_to_host()
|
@@ -78,7 +77,7 @@ class TestArrayAttr(CUDATestCase):
|
|
78
77
|
self.assertPreciseEqual(expect, flat)
|
79
78
|
|
80
79
|
# explicit order kwarg
|
81
|
-
for order in
|
80
|
+
for order in "CA":
|
82
81
|
expect = reshaped.ravel(order=order)
|
83
82
|
dary = cuda.to_device(reshaped)
|
84
83
|
dflat = dary.ravel(order=order)
|
@@ -87,15 +86,15 @@ class TestArrayAttr(CUDATestCase):
|
|
87
86
|
self.assertEqual(flat.ndim, 1)
|
88
87
|
self.assertPreciseEqual(expect, flat)
|
89
88
|
|
90
|
-
@skip_on_cudasim(
|
89
|
+
@skip_on_cudasim("CUDA Array Interface is not supported in the simulator")
|
91
90
|
def test_ravel_stride_c(self):
|
92
91
|
ary = np.arange(60)
|
93
92
|
reshaped = ary.reshape(2, 5, 2, 3)
|
94
93
|
|
95
94
|
dary = cuda.to_device(reshaped)
|
96
95
|
darystride = dary[::2, ::2, ::2, ::2]
|
97
|
-
dary_data = dary.__cuda_array_interface__[
|
98
|
-
ddarystride_data = darystride.__cuda_array_interface__[
|
96
|
+
dary_data = dary.__cuda_array_interface__["data"][0]
|
97
|
+
ddarystride_data = darystride.__cuda_array_interface__["data"][0]
|
99
98
|
self.assertEqual(dary_data, ddarystride_data)
|
100
99
|
with self.assertRaises(NotImplementedError):
|
101
100
|
darystride.ravel()
|
@@ -103,7 +102,7 @@ class TestArrayAttr(CUDATestCase):
|
|
103
102
|
def test_ravel_f(self):
|
104
103
|
ary = np.arange(60)
|
105
104
|
reshaped = np.asfortranarray(ary.reshape(2, 5, 2, 3))
|
106
|
-
for order in
|
105
|
+
for order in "FA":
|
107
106
|
expect = reshaped.ravel(order=order)
|
108
107
|
dary = cuda.to_device(reshaped)
|
109
108
|
dflat = dary.ravel(order=order)
|
@@ -112,14 +111,14 @@ class TestArrayAttr(CUDATestCase):
|
|
112
111
|
self.assertEqual(flat.ndim, 1)
|
113
112
|
self.assertPreciseEqual(expect, flat)
|
114
113
|
|
115
|
-
@skip_on_cudasim(
|
114
|
+
@skip_on_cudasim("CUDA Array Interface is not supported in the simulator")
|
116
115
|
def test_ravel_stride_f(self):
|
117
116
|
ary = np.arange(60)
|
118
117
|
reshaped = np.asfortranarray(ary.reshape(2, 5, 2, 3))
|
119
118
|
dary = cuda.to_device(reshaped)
|
120
119
|
darystride = dary[::2, ::2, ::2, ::2]
|
121
|
-
dary_data = dary.__cuda_array_interface__[
|
122
|
-
ddarystride_data = darystride.__cuda_array_interface__[
|
120
|
+
dary_data = dary.__cuda_array_interface__["data"][0]
|
121
|
+
ddarystride_data = darystride.__cuda_array_interface__["data"][0]
|
123
122
|
self.assertEqual(dary_data, ddarystride_data)
|
124
123
|
with self.assertRaises(NotImplementedError):
|
125
124
|
darystride.ravel()
|
@@ -134,12 +133,12 @@ class TestArrayAttr(CUDATestCase):
|
|
134
133
|
|
135
134
|
def test_reshape_f(self):
|
136
135
|
ary = np.arange(10)
|
137
|
-
expect = ary.reshape(2, 5, order=
|
136
|
+
expect = ary.reshape(2, 5, order="F")
|
138
137
|
dary = cuda.to_device(ary)
|
139
|
-
dary_reshaped = dary.reshape(2, 5, order=
|
138
|
+
dary_reshaped = dary.reshape(2, 5, order="F")
|
140
139
|
got = dary_reshaped.copy_to_host()
|
141
140
|
self.assertPreciseEqual(expect, got)
|
142
141
|
|
143
142
|
|
144
|
-
if __name__ ==
|
143
|
+
if __name__ == "__main__":
|
145
144
|
unittest.main()
|
@@ -27,7 +27,6 @@ class TestContextStack(CUDATestCase):
|
|
27
27
|
|
28
28
|
|
29
29
|
class TestContextAPI(CUDATestCase):
|
30
|
-
|
31
30
|
def tearDown(self):
|
32
31
|
super().tearDown()
|
33
32
|
cuda.close()
|
@@ -36,7 +35,7 @@ class TestContextAPI(CUDATestCase):
|
|
36
35
|
try:
|
37
36
|
mem = cuda.current_context().get_memory_info()
|
38
37
|
except NotImplementedError:
|
39
|
-
self.skipTest(
|
38
|
+
self.skipTest("EMM Plugin does not implement get_memory_info()")
|
40
39
|
|
41
40
|
self.assertIsInstance(mem.free, numbers.Number)
|
42
41
|
self.assertEqual(mem.free, mem[0])
|
@@ -47,7 +46,7 @@ class TestContextAPI(CUDATestCase):
|
|
47
46
|
self.assertLessEqual(mem.free, mem.total)
|
48
47
|
|
49
48
|
@unittest.skipIf(len(cuda.gpus) < 2, "need more than 1 gpus")
|
50
|
-
@skip_on_cudasim(
|
49
|
+
@skip_on_cudasim("CUDA HW required")
|
51
50
|
def test_forbidden_context_switch(self):
|
52
51
|
# Cannot switch context inside a `cuda.require_context`
|
53
52
|
@cuda.require_context
|
@@ -72,7 +71,7 @@ class TestContextAPI(CUDATestCase):
|
|
72
71
|
self.assertEqual(int(devid), 1)
|
73
72
|
|
74
73
|
|
75
|
-
@skip_on_cudasim(
|
74
|
+
@skip_on_cudasim("CUDA HW required")
|
76
75
|
class Test3rdPartyContext(CUDATestCase):
|
77
76
|
def tearDown(self):
|
78
77
|
super().tearDown()
|
@@ -118,8 +117,9 @@ class Test3rdPartyContext(CUDATestCase):
|
|
118
117
|
cuda.current_context()
|
119
118
|
except RuntimeError as e:
|
120
119
|
# Expecting an error about non-primary CUDA context
|
121
|
-
self.assertIn(
|
122
|
-
|
120
|
+
self.assertIn(
|
121
|
+
"Numba cannot operate on non-primary CUDA context ", str(e)
|
122
|
+
)
|
123
123
|
else:
|
124
124
|
self.fail("No RuntimeError raised")
|
125
125
|
finally:
|
@@ -141,5 +141,5 @@ class Test3rdPartyContext(CUDATestCase):
|
|
141
141
|
self.test_attached_primary(do)
|
142
142
|
|
143
143
|
|
144
|
-
if __name__ ==
|
144
|
+
if __name__ == "__main__":
|
145
145
|
unittest.main()
|