numba-cuda 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +232 -113
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_fp16.h +661 -661
- numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
- numba_cuda/numba/cuda/cuda_paths.py +291 -99
- numba_cuda/numba/cuda/cudadecl.py +125 -69
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +463 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +317 -233
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +8 -6
- numba_cuda/numba/cuda/decorators.py +75 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +69 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +1 -1
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
- numba_cuda/numba/cuda/intrinsics.py +31 -27
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +139 -102
- numba_cuda/numba/cuda/target.py +64 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +2 -2
- numba_cuda/numba/cuda/vectorizers.py +37 -32
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
- numba_cuda-0.9.0.dist-info/RECORD +253 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.0.dist-info/RECORD +0 -251
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
|
-
|
1
|
+
"""
|
2
2
|
Contains CUDA API functions
|
3
|
-
|
3
|
+
"""
|
4
4
|
|
5
5
|
# Imports here bring together parts of the API from other modules, so some of
|
6
6
|
# them appear unused.
|
@@ -15,7 +15,7 @@ from ..args import In, Out, InOut # noqa: F401
|
|
15
15
|
|
16
16
|
|
17
17
|
def select_device(dev=0):
|
18
|
-
assert dev == 0,
|
18
|
+
assert dev == 0, "Only a single device supported by the simulator"
|
19
19
|
|
20
20
|
|
21
21
|
def is_float16_supported():
|
@@ -23,10 +23,11 @@ def is_float16_supported():
|
|
23
23
|
|
24
24
|
|
25
25
|
class stream(object):
|
26
|
-
|
26
|
+
"""
|
27
27
|
The stream API is supported in the simulator - however, all execution
|
28
28
|
occurs synchronously, so synchronization requires no operation.
|
29
|
-
|
29
|
+
"""
|
30
|
+
|
30
31
|
@contextmanager
|
31
32
|
def auto_synchronize(self):
|
32
33
|
yield
|
@@ -62,9 +63,9 @@ def declare_device(*args, **kwargs):
|
|
62
63
|
|
63
64
|
|
64
65
|
def detect():
|
65
|
-
print(
|
66
|
-
print(
|
67
|
-
print(
|
66
|
+
print("Found 1 CUDA devices")
|
67
|
+
print("id %d %20s %40s" % (0, "SIMULATOR", "[SUPPORTED]"))
|
68
|
+
print("%40s: 5.0" % "compute capability")
|
68
69
|
|
69
70
|
|
70
71
|
def list_devices():
|
@@ -73,11 +74,13 @@ def list_devices():
|
|
73
74
|
|
74
75
|
# Events
|
75
76
|
|
77
|
+
|
76
78
|
class Event(object):
|
77
|
-
|
79
|
+
"""
|
78
80
|
The simulator supports the event API, but they do not record timing info,
|
79
81
|
and all simulation is synchronous. Execution time is not recorded.
|
80
|
-
|
82
|
+
"""
|
83
|
+
|
81
84
|
def record(self, stream=0):
|
82
85
|
pass
|
83
86
|
|
@@ -88,35 +91,48 @@ class Event(object):
|
|
88
91
|
pass
|
89
92
|
|
90
93
|
def elapsed_time(self, event):
|
91
|
-
warn(
|
94
|
+
warn("Simulator timings are bogus")
|
92
95
|
return 0.0
|
93
96
|
|
94
97
|
|
95
98
|
event = Event
|
96
99
|
|
97
100
|
|
98
|
-
def jit(
|
99
|
-
|
100
|
-
|
101
|
-
|
101
|
+
def jit(
|
102
|
+
func_or_sig=None,
|
103
|
+
device=False,
|
104
|
+
debug=None,
|
105
|
+
argtypes=None,
|
106
|
+
inline=False,
|
107
|
+
restype=None,
|
108
|
+
fastmath=False,
|
109
|
+
link=None,
|
110
|
+
boundscheck=None,
|
111
|
+
opt=None,
|
112
|
+
cache=None,
|
113
|
+
):
|
102
114
|
# Here for API compatibility
|
103
115
|
if boundscheck:
|
104
116
|
raise NotImplementedError("bounds checking is not supported for CUDA")
|
105
117
|
|
106
118
|
if link is not None:
|
107
|
-
raise NotImplementedError(
|
119
|
+
raise NotImplementedError("Cannot link PTX in the simulator")
|
108
120
|
|
109
121
|
debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug
|
110
122
|
|
111
123
|
# Check for first argument specifying types - in that case the
|
112
124
|
# decorator is not being passed a function
|
113
|
-
if (
|
114
|
-
|
125
|
+
if (
|
126
|
+
func_or_sig is None
|
127
|
+
or is_signature(func_or_sig)
|
128
|
+
or isinstance(func_or_sig, list)
|
129
|
+
):
|
130
|
+
|
115
131
|
def jitwrapper(fn):
|
116
|
-
return FakeCUDAKernel(
|
117
|
-
|
118
|
-
|
119
|
-
|
132
|
+
return FakeCUDAKernel(
|
133
|
+
fn, device=device, fastmath=fastmath, debug=debug
|
134
|
+
)
|
135
|
+
|
120
136
|
return jitwrapper
|
121
137
|
return FakeCUDAKernel(func_or_sig, device=device, debug=debug)
|
122
138
|
|
@@ -1,7 +1,8 @@
|
|
1
|
-
|
1
|
+
"""
|
2
2
|
The Device Array API is not implemented in the simulator. This module provides
|
3
3
|
stubs to allow tests to import correctly.
|
4
|
-
|
4
|
+
"""
|
5
|
+
|
5
6
|
from contextlib import contextmanager
|
6
7
|
from numba.np.numpy_support import numpy_version
|
7
8
|
|
@@ -12,37 +13,39 @@ DeviceRecord = None
|
|
12
13
|
from_record_like = None
|
13
14
|
|
14
15
|
|
15
|
-
errmsg_contiguous_buffer = (
|
16
|
-
|
17
|
-
|
18
|
-
|
16
|
+
errmsg_contiguous_buffer = (
|
17
|
+
"Array contains non-contiguous buffer and cannot "
|
18
|
+
"be transferred as a single memory region. Please "
|
19
|
+
"ensure contiguous buffer with numpy "
|
20
|
+
".ascontiguousarray()"
|
21
|
+
)
|
19
22
|
|
20
23
|
|
21
24
|
class FakeShape(tuple):
|
22
|
-
|
25
|
+
"""
|
23
26
|
The FakeShape class is used to provide a shape which does not allow negative
|
24
27
|
indexing, similar to the shape in CUDA Python. (Numpy shape arrays allow
|
25
28
|
negative indexing)
|
26
|
-
|
29
|
+
"""
|
27
30
|
|
28
31
|
def __getitem__(self, k):
|
29
32
|
if isinstance(k, int) and k < 0:
|
30
|
-
raise IndexError(
|
33
|
+
raise IndexError("tuple index out of range")
|
31
34
|
return super(FakeShape, self).__getitem__(k)
|
32
35
|
|
33
36
|
|
34
37
|
class FakeWithinKernelCUDAArray(object):
|
35
|
-
|
38
|
+
"""
|
36
39
|
Created to emulate the behavior of arrays within kernels, where either
|
37
40
|
array.item or array['item'] is valid (that is, give all structured
|
38
41
|
arrays `numpy.recarray`-like semantics). This behaviour does not follow
|
39
42
|
the semantics of Python and NumPy with non-jitted code, and will be
|
40
43
|
deprecated and removed.
|
41
|
-
|
44
|
+
"""
|
42
45
|
|
43
46
|
def __init__(self, item):
|
44
47
|
assert isinstance(item, FakeCUDAArray)
|
45
|
-
self.__dict__[
|
48
|
+
self.__dict__["_item"] = item
|
46
49
|
|
47
50
|
def __wrap_if_fake(self, item):
|
48
51
|
if isinstance(item, FakeCUDAArray):
|
@@ -84,18 +87,18 @@ class FakeWithinKernelCUDAArray(object):
|
|
84
87
|
|
85
88
|
return obj
|
86
89
|
|
87
|
-
out = kwargs.get(
|
90
|
+
out = kwargs.get("out")
|
88
91
|
if out:
|
89
|
-
kwargs[
|
92
|
+
kwargs["out"] = tuple(convert_fakes(o) for o in out)
|
90
93
|
args = tuple(convert_fakes(a) for a in args)
|
91
94
|
return call(*args, **kwargs)
|
92
95
|
|
93
96
|
|
94
97
|
class FakeCUDAArray(object):
|
95
|
-
|
98
|
+
"""
|
96
99
|
Implements the interface of a DeviceArray/DeviceRecord, but mostly just
|
97
100
|
wraps a NumPy array.
|
98
|
-
|
101
|
+
"""
|
99
102
|
|
100
103
|
__cuda_ndarray__ = True # There must be gpu_data attribute
|
101
104
|
|
@@ -149,13 +152,13 @@ class FakeCUDAArray(object):
|
|
149
152
|
return ary
|
150
153
|
|
151
154
|
def copy_to_device(self, ary, stream=0):
|
152
|
-
|
155
|
+
"""
|
153
156
|
Copy from the provided array into this array.
|
154
157
|
|
155
158
|
This may be less forgiving than the CUDA Python implementation, which
|
156
159
|
will copy data up to the length of the smallest of the two arrays,
|
157
160
|
whereas this expects the size of the arrays to be equal.
|
158
|
-
|
161
|
+
"""
|
159
162
|
sentry_contiguous(self)
|
160
163
|
self_core, ary_core = array_core(self), array_core(ary)
|
161
164
|
if isinstance(ary, FakeCUDAArray):
|
@@ -164,9 +167,10 @@ class FakeCUDAArray(object):
|
|
164
167
|
else:
|
165
168
|
ary_core = np.array(
|
166
169
|
ary_core,
|
167
|
-
order=
|
170
|
+
order="C" if self_core.flags["C_CONTIGUOUS"] else "F",
|
168
171
|
subok=True,
|
169
|
-
copy=False if numpy_version < (2, 0) else None
|
172
|
+
copy=False if numpy_version < (2, 0) else None,
|
173
|
+
)
|
170
174
|
check_array_compatibility(self_core, ary_core)
|
171
175
|
np.copyto(self_core._ary, ary_core)
|
172
176
|
|
@@ -237,7 +241,7 @@ class FakeCUDAArray(object):
|
|
237
241
|
return FakeCUDAArray(self._ary % other)
|
238
242
|
|
239
243
|
def __pow__(self, other):
|
240
|
-
return FakeCUDAArray(self._ary
|
244
|
+
return FakeCUDAArray(self._ary**other)
|
241
245
|
|
242
246
|
def split(self, section, stream=0):
|
243
247
|
return [
|
@@ -282,30 +286,33 @@ def is_contiguous(ary):
|
|
282
286
|
|
283
287
|
def sentry_contiguous(ary):
|
284
288
|
core = array_core(ary)
|
285
|
-
if not core.flags[
|
289
|
+
if not core.flags["C_CONTIGUOUS"] and not core.flags["F_CONTIGUOUS"]:
|
286
290
|
raise ValueError(errmsg_contiguous_buffer)
|
287
291
|
|
288
292
|
|
289
293
|
def check_array_compatibility(ary1, ary2):
|
290
294
|
ary1sq, ary2sq = ary1.squeeze(), ary2.squeeze()
|
291
295
|
if ary1.dtype != ary2.dtype:
|
292
|
-
raise TypeError(
|
293
|
-
|
296
|
+
raise TypeError(
|
297
|
+
"incompatible dtype: %s vs. %s" % (ary1.dtype, ary2.dtype)
|
298
|
+
)
|
294
299
|
if ary1sq.shape != ary2sq.shape:
|
295
|
-
raise ValueError(
|
296
|
-
|
300
|
+
raise ValueError(
|
301
|
+
"incompatible shape: %s vs. %s" % (ary1.shape, ary2.shape)
|
302
|
+
)
|
297
303
|
if ary1sq.strides != ary2sq.strides:
|
298
|
-
raise ValueError(
|
299
|
-
|
304
|
+
raise ValueError(
|
305
|
+
"incompatible strides: %s vs. %s" % (ary1.strides, ary2.strides)
|
306
|
+
)
|
300
307
|
|
301
308
|
|
302
309
|
def to_device(ary, stream=0, copy=True, to=None):
|
303
|
-
ary = np.array(
|
304
|
-
|
305
|
-
|
310
|
+
ary = np.array(
|
311
|
+
ary, copy=False if numpy_version < (2, 0) else None, subok=True
|
312
|
+
)
|
306
313
|
sentry_contiguous(ary)
|
307
314
|
if to is None:
|
308
|
-
buffer_dtype = np.int64 if ary.dtype.char in
|
315
|
+
buffer_dtype = np.int64 if ary.dtype.char in "Mm" else ary.dtype
|
309
316
|
return FakeCUDAArray(
|
310
317
|
np.ndarray(
|
311
318
|
buffer=np.copy(array_core(ary)).view(buffer_dtype),
|
@@ -324,22 +331,22 @@ def pinned(arg):
|
|
324
331
|
|
325
332
|
|
326
333
|
def mapped_array(*args, **kwargs):
|
327
|
-
for unused_arg in (
|
334
|
+
for unused_arg in ("portable", "wc"):
|
328
335
|
if unused_arg in kwargs:
|
329
336
|
kwargs.pop(unused_arg)
|
330
337
|
return device_array(*args, **kwargs)
|
331
338
|
|
332
339
|
|
333
|
-
def pinned_array(shape, dtype=np.float64, strides=None, order=
|
340
|
+
def pinned_array(shape, dtype=np.float64, strides=None, order="C"):
|
334
341
|
return np.ndarray(shape=shape, strides=strides, dtype=dtype, order=order)
|
335
342
|
|
336
343
|
|
337
|
-
def managed_array(shape, dtype=np.float64, strides=None, order=
|
344
|
+
def managed_array(shape, dtype=np.float64, strides=None, order="C"):
|
338
345
|
return np.ndarray(shape=shape, strides=strides, dtype=dtype, order=order)
|
339
346
|
|
340
347
|
|
341
348
|
def device_array(*args, **kwargs):
|
342
|
-
stream = kwargs.pop(
|
349
|
+
stream = kwargs.pop("stream") if "stream" in kwargs else 0
|
343
350
|
return FakeCUDAArray(np.ndarray(*args, **kwargs), stream=stream)
|
344
351
|
|
345
352
|
|
@@ -350,7 +357,7 @@ def _contiguous_strides_like_array(ary):
|
|
350
357
|
"""
|
351
358
|
# Don't recompute strides if the default strides will be sufficient to
|
352
359
|
# create a contiguous array.
|
353
|
-
if ary.flags[
|
360
|
+
if ary.flags["C_CONTIGUOUS"] or ary.flags["F_CONTIGUOUS"] or ary.ndim <= 1:
|
354
361
|
return None
|
355
362
|
|
356
363
|
# Otherwise, we need to compute new strides using an algorithm adapted from
|
@@ -360,7 +367,7 @@ def _contiguous_strides_like_array(ary):
|
|
360
367
|
|
361
368
|
# Stride permutation. E.g. a stride array (4, -2, 12) becomes
|
362
369
|
# [(1, -2), (0, 4), (2, 12)]
|
363
|
-
strideperm = [
|
370
|
+
strideperm = [x for x in enumerate(ary.strides)]
|
364
371
|
strideperm.sort(key=lambda x: x[1])
|
365
372
|
|
366
373
|
# Compute new strides using permutation
|
@@ -373,24 +380,26 @@ def _contiguous_strides_like_array(ary):
|
|
373
380
|
|
374
381
|
|
375
382
|
def _order_like_array(ary):
|
376
|
-
if ary.flags[
|
377
|
-
return
|
383
|
+
if ary.flags["F_CONTIGUOUS"] and not ary.flags["C_CONTIGUOUS"]:
|
384
|
+
return "F"
|
378
385
|
else:
|
379
|
-
return
|
386
|
+
return "C"
|
380
387
|
|
381
388
|
|
382
389
|
def device_array_like(ary, stream=0):
|
383
390
|
strides = _contiguous_strides_like_array(ary)
|
384
391
|
order = _order_like_array(ary)
|
385
|
-
return device_array(
|
386
|
-
|
392
|
+
return device_array(
|
393
|
+
shape=ary.shape, dtype=ary.dtype, strides=strides, order=order
|
394
|
+
)
|
387
395
|
|
388
396
|
|
389
397
|
def pinned_array_like(ary):
|
390
398
|
strides = _contiguous_strides_like_array(ary)
|
391
399
|
order = _order_like_array(ary)
|
392
|
-
return pinned_array(
|
393
|
-
|
400
|
+
return pinned_array(
|
401
|
+
shape=ary.shape, dtype=ary.dtype, strides=strides, order=order
|
402
|
+
)
|
394
403
|
|
395
404
|
|
396
405
|
def auto_device(ary, stream=0, copy=True):
|
@@ -399,15 +408,14 @@ def auto_device(ary, stream=0, copy=True):
|
|
399
408
|
|
400
409
|
if not isinstance(ary, np.void):
|
401
410
|
ary = np.array(
|
402
|
-
ary,
|
403
|
-
|
404
|
-
subok=True)
|
411
|
+
ary, copy=False if numpy_version < (2, 0) else None, subok=True
|
412
|
+
)
|
405
413
|
return to_device(ary, stream, copy), True
|
406
414
|
|
407
415
|
|
408
416
|
def is_cuda_ndarray(obj):
|
409
417
|
"Check if an object is a CUDA ndarray"
|
410
|
-
return getattr(obj,
|
418
|
+
return getattr(obj, "__cuda_ndarray__", False)
|
411
419
|
|
412
420
|
|
413
421
|
def verify_cuda_ndarray_interface(obj):
|
@@ -418,15 +426,15 @@ def verify_cuda_ndarray_interface(obj):
|
|
418
426
|
if not hasattr(obj, attr):
|
419
427
|
raise AttributeError(attr)
|
420
428
|
if not isinstance(getattr(obj, attr), typ):
|
421
|
-
raise AttributeError(
|
429
|
+
raise AttributeError("%s must be of type %s" % (attr, typ))
|
422
430
|
|
423
|
-
requires_attr(
|
424
|
-
requires_attr(
|
425
|
-
requires_attr(
|
426
|
-
requires_attr(
|
431
|
+
requires_attr("shape", tuple)
|
432
|
+
requires_attr("strides", tuple)
|
433
|
+
requires_attr("dtype", np.dtype)
|
434
|
+
requires_attr("size", int)
|
427
435
|
|
428
436
|
|
429
437
|
def require_cuda_ndarray(obj):
|
430
438
|
"Raises ValueError is is_cuda_ndarray(obj) evaluates False"
|
431
439
|
if not is_cuda_ndarray(obj):
|
432
|
-
raise ValueError(
|
440
|
+
raise ValueError("require an cuda ndarray object")
|
@@ -8,7 +8,7 @@ _SIMULATOR_CC = (5, 2)
|
|
8
8
|
|
9
9
|
class FakeCUDADevice:
|
10
10
|
def __init__(self):
|
11
|
-
self.uuid =
|
11
|
+
self.uuid = "GPU-00000000-0000-0000-0000-000000000000"
|
12
12
|
|
13
13
|
@property
|
14
14
|
def compute_capability(self):
|
@@ -16,10 +16,11 @@ class FakeCUDADevice:
|
|
16
16
|
|
17
17
|
|
18
18
|
class FakeCUDAContext:
|
19
|
-
|
19
|
+
"""
|
20
20
|
This stub implements functionality only for simulating a single GPU
|
21
21
|
at the moment.
|
22
|
-
|
22
|
+
"""
|
23
|
+
|
23
24
|
def __init__(self, device_id):
|
24
25
|
self._device_id = device_id
|
25
26
|
self._device = FakeCUDADevice()
|
@@ -54,7 +55,7 @@ class FakeCUDAContext:
|
|
54
55
|
dependencies, e.g. `psutil` - so return infinite memory to maintain API
|
55
56
|
type compatibility
|
56
57
|
"""
|
57
|
-
return _MemoryInfo(float(
|
58
|
+
return _MemoryInfo(float("inf"), float("inf"))
|
58
59
|
|
59
60
|
def memalloc(self, sz):
|
60
61
|
"""
|
@@ -62,19 +63,20 @@ class FakeCUDAContext:
|
|
62
63
|
At present, there is no division between simulated
|
63
64
|
host memory and simulated device memory.
|
64
65
|
"""
|
65
|
-
return np.ndarray(sz, dtype=
|
66
|
+
return np.ndarray(sz, dtype="u1")
|
66
67
|
|
67
68
|
def memhostalloc(self, sz, mapped=False, portable=False, wc=False):
|
68
|
-
|
69
|
+
"""Allocates memory on the host"""
|
69
70
|
return self.memalloc(sz)
|
70
71
|
|
71
72
|
|
72
73
|
class FakeDeviceList:
|
73
|
-
|
74
|
+
"""
|
74
75
|
This stub implements a device list containing a single GPU. It also
|
75
76
|
keeps track of the GPU status, i.e. whether the context is closed or not,
|
76
77
|
which may have been set by the user calling reset()
|
77
|
-
|
78
|
+
"""
|
79
|
+
|
78
80
|
def __init__(self):
|
79
81
|
self.lst = (FakeCUDAContext(0),)
|
80
82
|
self.closed = False
|
@@ -84,7 +86,7 @@ class FakeDeviceList:
|
|
84
86
|
return self.lst[devnum]
|
85
87
|
|
86
88
|
def __str__(self):
|
87
|
-
return
|
89
|
+
return ", ".join([str(d) for d in self.lst])
|
88
90
|
|
89
91
|
def __iter__(self):
|
90
92
|
return iter(self.lst)
|
@@ -111,7 +113,7 @@ def get_context(devnum=0):
|
|
111
113
|
|
112
114
|
|
113
115
|
def require_context(func):
|
114
|
-
|
116
|
+
"""
|
115
117
|
In the simulator, a context is always "available", so this is a no-op.
|
116
|
-
|
118
|
+
"""
|
117
119
|
return func
|
@@ -1,15 +1,15 @@
|
|
1
|
-
|
1
|
+
"""
|
2
2
|
Most of the driver API is unsupported in the simulator, but some stubs are
|
3
3
|
provided to allow tests to import correctly.
|
4
|
-
|
4
|
+
"""
|
5
5
|
|
6
6
|
|
7
7
|
def device_memset(dst, val, size, stream=0):
|
8
|
-
dst.view(
|
8
|
+
dst.view("u1")[:size].fill(bytes([val])[0])
|
9
9
|
|
10
10
|
|
11
11
|
def host_to_device(dst, src, size, stream=0):
|
12
|
-
dst.view(
|
12
|
+
dst.view("u1")[:size] = src.view("u1")[:size]
|
13
13
|
|
14
14
|
|
15
15
|
def device_to_host(dst, src, size, stream=0):
|
@@ -55,7 +55,7 @@ class CudaAPIError(RuntimeError):
|
|
55
55
|
|
56
56
|
|
57
57
|
def launch_kernel(*args, **kwargs):
|
58
|
-
msg =
|
58
|
+
msg = "Launching kernels directly is not supported in the simulator"
|
59
59
|
raise RuntimeError(msg)
|
60
60
|
|
61
61
|
|
@@ -1,2 +1,2 @@
|
|
1
1
|
def check_static_lib(lib):
|
2
|
-
raise FileNotFoundError(
|
2
|
+
raise FileNotFoundError("Linking libraries not supported by cudasim")
|
@@ -1,7 +1,7 @@
|
|
1
|
-
|
1
|
+
"""
|
2
2
|
NVVM is not supported in the simulator, but stubs are provided to allow tests
|
3
3
|
to import correctly.
|
4
|
-
|
4
|
+
"""
|
5
5
|
|
6
6
|
|
7
7
|
class NvvmSupportError(ImportError):
|
@@ -10,7 +10,7 @@ class NvvmSupportError(ImportError):
|
|
10
10
|
|
11
11
|
class NVVM(object):
|
12
12
|
def __init__(self):
|
13
|
-
raise NvvmSupportError(
|
13
|
+
raise NvvmSupportError("NVVM not supported in the simulator")
|
14
14
|
|
15
15
|
|
16
16
|
CompilationUnit = None
|
@@ -1,7 +1,7 @@
|
|
1
|
-
|
1
|
+
"""
|
2
2
|
The runtime API is unsupported in the simulator, but some stubs are
|
3
3
|
provided to allow tests to import correctly.
|
4
|
-
|
4
|
+
"""
|
5
5
|
|
6
6
|
|
7
7
|
class FakeRuntime(object):
|
@@ -13,7 +13,7 @@ class FakeRuntime(object):
|
|
13
13
|
|
14
14
|
@property
|
15
15
|
def supported_versions(self):
|
16
|
-
return (-1, -1),
|
16
|
+
return ((-1, -1),)
|
17
17
|
|
18
18
|
|
19
19
|
runtime = FakeRuntime()
|