numba-cuda 0.0.1__py3-none-any.whl → 0.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +1 -0
- _numba_cuda_redirector.py +74 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +5 -0
- numba_cuda/_version.py +19 -0
- numba_cuda/numba/cuda/__init__.py +22 -0
- numba_cuda/numba/cuda/api.py +526 -0
- numba_cuda/numba/cuda/api_util.py +30 -0
- numba_cuda/numba/cuda/args.py +77 -0
- numba_cuda/numba/cuda/cg.py +62 -0
- numba_cuda/numba/cuda/codegen.py +378 -0
- numba_cuda/numba/cuda/compiler.py +422 -0
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
- numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
- numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
- numba_cuda/numba/cuda/cuda_paths.py +258 -0
- numba_cuda/numba/cuda/cudadecl.py +806 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
- numba_cuda/numba/cuda/cudadrv/error.py +36 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
- numba_cuda/numba/cuda/cudaimpl.py +1055 -0
- numba_cuda/numba/cuda/cudamath.py +140 -0
- numba_cuda/numba/cuda/decorators.py +189 -0
- numba_cuda/numba/cuda/descriptor.py +33 -0
- numba_cuda/numba/cuda/device_init.py +89 -0
- numba_cuda/numba/cuda/deviceufunc.py +908 -0
- numba_cuda/numba/cuda/dispatcher.py +1057 -0
- numba_cuda/numba/cuda/errors.py +59 -0
- numba_cuda/numba/cuda/extending.py +7 -0
- numba_cuda/numba/cuda/initialize.py +13 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
- numba_cuda/numba/cuda/intrinsics.py +198 -0
- numba_cuda/numba/cuda/kernels/__init__.py +0 -0
- numba_cuda/numba/cuda/kernels/reduction.py +262 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3382 -0
- numba_cuda/numba/cuda/libdevicedecl.py +17 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
- numba_cuda/numba/cuda/mathimpl.py +448 -0
- numba_cuda/numba/cuda/models.py +48 -0
- numba_cuda/numba/cuda/nvvmutils.py +235 -0
- numba_cuda/numba/cuda/printimpl.py +86 -0
- numba_cuda/numba/cuda/random.py +292 -0
- numba_cuda/numba/cuda/simulator/__init__.py +38 -0
- numba_cuda/numba/cuda/simulator/api.py +110 -0
- numba_cuda/numba/cuda/simulator/compiler.py +9 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
- numba_cuda/numba/cuda/simulator/kernel.py +308 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
- numba_cuda/numba/cuda/simulator/reduction.py +15 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
- numba_cuda/numba/cuda/simulator_init.py +17 -0
- numba_cuda/numba/cuda/stubs.py +902 -0
- numba_cuda/numba/cuda/target.py +440 -0
- numba_cuda/numba/cuda/testing.py +202 -0
- numba_cuda/numba/cuda/tests/__init__.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +7 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
- numba_cuda/numba/cuda/types.py +37 -0
- numba_cuda/numba/cuda/ufuncs.py +662 -0
- numba_cuda/numba/cuda/vector_types.py +209 -0
- numba_cuda/numba/cuda/vectorizers.py +252 -0
- numba_cuda-0.0.13.dist-info/LICENSE +25 -0
- numba_cuda-0.0.13.dist-info/METADATA +69 -0
- numba_cuda-0.0.13.dist-info/RECORD +231 -0
- {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.13.dist-info}/WHEEL +1 -1
- numba_cuda-0.0.1.dist-info/METADATA +0 -10
- numba_cuda-0.0.1.dist-info/RECORD +0 -5
- {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,318 @@
|
|
1
|
+
import multiprocessing as mp
|
2
|
+
import itertools
|
3
|
+
import traceback
|
4
|
+
import pickle
|
5
|
+
|
6
|
+
import numpy as np
|
7
|
+
|
8
|
+
from numba import cuda
|
9
|
+
from numba.cuda.cudadrv import driver
|
10
|
+
from numba.cuda.testing import (skip_on_arm, skip_on_cudasim,
|
11
|
+
skip_under_cuda_memcheck,
|
12
|
+
ContextResettingTestCase, ForeignArray)
|
13
|
+
from numba.tests.support import linux_only, windows_only
|
14
|
+
import unittest
|
15
|
+
|
16
|
+
|
17
|
+
def core_ipc_handle_test(the_work, result_queue):
|
18
|
+
try:
|
19
|
+
arr = the_work()
|
20
|
+
# Catch anything going wrong in the worker function
|
21
|
+
except: # noqa: E722
|
22
|
+
# FAILED. propagate the exception as a string
|
23
|
+
succ = False
|
24
|
+
out = traceback.format_exc()
|
25
|
+
else:
|
26
|
+
# OK. send the ndarray back
|
27
|
+
succ = True
|
28
|
+
out = arr
|
29
|
+
result_queue.put((succ, out))
|
30
|
+
|
31
|
+
|
32
|
+
def base_ipc_handle_test(handle, size, result_queue):
|
33
|
+
def the_work():
|
34
|
+
dtype = np.dtype(np.intp)
|
35
|
+
with cuda.open_ipc_array(handle, shape=size // dtype.itemsize,
|
36
|
+
dtype=dtype) as darr:
|
37
|
+
# copy the data to host
|
38
|
+
return darr.copy_to_host()
|
39
|
+
|
40
|
+
core_ipc_handle_test(the_work, result_queue)
|
41
|
+
|
42
|
+
|
43
|
+
def serialize_ipc_handle_test(handle, result_queue):
|
44
|
+
def the_work():
|
45
|
+
dtype = np.dtype(np.intp)
|
46
|
+
darr = handle.open_array(cuda.current_context(),
|
47
|
+
shape=handle.size // dtype.itemsize,
|
48
|
+
dtype=dtype)
|
49
|
+
# copy the data to host
|
50
|
+
arr = darr.copy_to_host()
|
51
|
+
handle.close()
|
52
|
+
return arr
|
53
|
+
|
54
|
+
core_ipc_handle_test(the_work, result_queue)
|
55
|
+
|
56
|
+
|
57
|
+
def ipc_array_test(ipcarr, result_queue):
|
58
|
+
try:
|
59
|
+
with ipcarr as darr:
|
60
|
+
arr = darr.copy_to_host()
|
61
|
+
try:
|
62
|
+
# should fail to reopen
|
63
|
+
with ipcarr:
|
64
|
+
pass
|
65
|
+
except ValueError as e:
|
66
|
+
if str(e) != 'IpcHandle is already opened':
|
67
|
+
raise AssertionError('invalid exception message')
|
68
|
+
else:
|
69
|
+
raise AssertionError('did not raise on reopen')
|
70
|
+
# Catch any exception so we can propagate it
|
71
|
+
except: # noqa: E722
|
72
|
+
# FAILED. propagate the exception as a string
|
73
|
+
succ = False
|
74
|
+
out = traceback.format_exc()
|
75
|
+
else:
|
76
|
+
# OK. send the ndarray back
|
77
|
+
succ = True
|
78
|
+
out = arr
|
79
|
+
result_queue.put((succ, out))
|
80
|
+
|
81
|
+
|
82
|
+
@linux_only
|
83
|
+
@skip_under_cuda_memcheck('Hangs cuda-memcheck')
|
84
|
+
@skip_on_cudasim('Ipc not available in CUDASIM')
|
85
|
+
@skip_on_arm('CUDA IPC not supported on ARM in Numba')
|
86
|
+
class TestIpcMemory(ContextResettingTestCase):
|
87
|
+
|
88
|
+
def test_ipc_handle(self):
|
89
|
+
# prepare data for IPC
|
90
|
+
arr = np.arange(10, dtype=np.intp)
|
91
|
+
devarr = cuda.to_device(arr)
|
92
|
+
|
93
|
+
# create IPC handle
|
94
|
+
ctx = cuda.current_context()
|
95
|
+
ipch = ctx.get_ipc_handle(devarr.gpu_data)
|
96
|
+
|
97
|
+
# manually prepare for serialization as bytes
|
98
|
+
if driver.USE_NV_BINDING:
|
99
|
+
handle_bytes = ipch.handle.reserved
|
100
|
+
else:
|
101
|
+
handle_bytes = bytes(ipch.handle)
|
102
|
+
size = ipch.size
|
103
|
+
|
104
|
+
# spawn new process for testing
|
105
|
+
ctx = mp.get_context('spawn')
|
106
|
+
result_queue = ctx.Queue()
|
107
|
+
args = (handle_bytes, size, result_queue)
|
108
|
+
proc = ctx.Process(target=base_ipc_handle_test, args=args)
|
109
|
+
proc.start()
|
110
|
+
succ, out = result_queue.get()
|
111
|
+
if not succ:
|
112
|
+
self.fail(out)
|
113
|
+
else:
|
114
|
+
np.testing.assert_equal(arr, out)
|
115
|
+
proc.join(3)
|
116
|
+
|
117
|
+
def variants(self):
|
118
|
+
# Test with no slicing and various different slices
|
119
|
+
indices = (None, slice(3, None), slice(3, 8), slice(None, 8))
|
120
|
+
# Test with a Numba DeviceNDArray, or an array from elsewhere through
|
121
|
+
# the CUDA Array Interface
|
122
|
+
foreigns = (False, True)
|
123
|
+
return itertools.product(indices, foreigns)
|
124
|
+
|
125
|
+
def check_ipc_handle_serialization(self, index_arg=None, foreign=False):
|
126
|
+
# prepare data for IPC
|
127
|
+
arr = np.arange(10, dtype=np.intp)
|
128
|
+
devarr = cuda.to_device(arr)
|
129
|
+
if index_arg is not None:
|
130
|
+
devarr = devarr[index_arg]
|
131
|
+
if foreign:
|
132
|
+
devarr = cuda.as_cuda_array(ForeignArray(devarr))
|
133
|
+
expect = devarr.copy_to_host()
|
134
|
+
|
135
|
+
# create IPC handle
|
136
|
+
ctx = cuda.current_context()
|
137
|
+
ipch = ctx.get_ipc_handle(devarr.gpu_data)
|
138
|
+
|
139
|
+
# pickle
|
140
|
+
buf = pickle.dumps(ipch)
|
141
|
+
ipch_recon = pickle.loads(buf)
|
142
|
+
self.assertIs(ipch_recon.base, None)
|
143
|
+
self.assertEqual(ipch_recon.size, ipch.size)
|
144
|
+
|
145
|
+
if driver.USE_NV_BINDING:
|
146
|
+
self.assertEqual(ipch_recon.handle.reserved, ipch.handle.reserved)
|
147
|
+
else:
|
148
|
+
self.assertEqual(ipch_recon.handle.reserved[:],
|
149
|
+
ipch.handle.reserved[:])
|
150
|
+
|
151
|
+
# spawn new process for testing
|
152
|
+
ctx = mp.get_context('spawn')
|
153
|
+
result_queue = ctx.Queue()
|
154
|
+
args = (ipch, result_queue)
|
155
|
+
proc = ctx.Process(target=serialize_ipc_handle_test, args=args)
|
156
|
+
proc.start()
|
157
|
+
succ, out = result_queue.get()
|
158
|
+
if not succ:
|
159
|
+
self.fail(out)
|
160
|
+
else:
|
161
|
+
np.testing.assert_equal(expect, out)
|
162
|
+
proc.join(3)
|
163
|
+
|
164
|
+
def test_ipc_handle_serialization(self):
|
165
|
+
for index, foreign, in self.variants():
|
166
|
+
with self.subTest(index=index, foreign=foreign):
|
167
|
+
self.check_ipc_handle_serialization(index, foreign)
|
168
|
+
|
169
|
+
def check_ipc_array(self, index_arg=None, foreign=False):
|
170
|
+
# prepare data for IPC
|
171
|
+
arr = np.arange(10, dtype=np.intp)
|
172
|
+
devarr = cuda.to_device(arr)
|
173
|
+
# Slice
|
174
|
+
if index_arg is not None:
|
175
|
+
devarr = devarr[index_arg]
|
176
|
+
if foreign:
|
177
|
+
devarr = cuda.as_cuda_array(ForeignArray(devarr))
|
178
|
+
expect = devarr.copy_to_host()
|
179
|
+
ipch = devarr.get_ipc_handle()
|
180
|
+
|
181
|
+
# spawn new process for testing
|
182
|
+
ctx = mp.get_context('spawn')
|
183
|
+
result_queue = ctx.Queue()
|
184
|
+
args = (ipch, result_queue)
|
185
|
+
proc = ctx.Process(target=ipc_array_test, args=args)
|
186
|
+
proc.start()
|
187
|
+
succ, out = result_queue.get()
|
188
|
+
if not succ:
|
189
|
+
self.fail(out)
|
190
|
+
else:
|
191
|
+
np.testing.assert_equal(expect, out)
|
192
|
+
proc.join(3)
|
193
|
+
|
194
|
+
def test_ipc_array(self):
|
195
|
+
for index, foreign, in self.variants():
|
196
|
+
with self.subTest(index=index, foreign=foreign):
|
197
|
+
self.check_ipc_array(index, foreign)
|
198
|
+
|
199
|
+
|
200
|
+
def staged_ipc_handle_test(handle, device_num, result_queue):
|
201
|
+
def the_work():
|
202
|
+
with cuda.gpus[device_num]:
|
203
|
+
this_ctx = cuda.devices.get_context()
|
204
|
+
deviceptr = handle.open_staged(this_ctx)
|
205
|
+
arrsize = handle.size // np.dtype(np.intp).itemsize
|
206
|
+
hostarray = np.zeros(arrsize, dtype=np.intp)
|
207
|
+
cuda.driver.device_to_host(
|
208
|
+
hostarray, deviceptr, size=handle.size,
|
209
|
+
)
|
210
|
+
handle.close()
|
211
|
+
return hostarray
|
212
|
+
|
213
|
+
core_ipc_handle_test(the_work, result_queue)
|
214
|
+
|
215
|
+
|
216
|
+
def staged_ipc_array_test(ipcarr, device_num, result_queue):
|
217
|
+
try:
|
218
|
+
with cuda.gpus[device_num]:
|
219
|
+
with ipcarr as darr:
|
220
|
+
arr = darr.copy_to_host()
|
221
|
+
try:
|
222
|
+
# should fail to reopen
|
223
|
+
with ipcarr:
|
224
|
+
pass
|
225
|
+
except ValueError as e:
|
226
|
+
if str(e) != 'IpcHandle is already opened':
|
227
|
+
raise AssertionError('invalid exception message')
|
228
|
+
else:
|
229
|
+
raise AssertionError('did not raise on reopen')
|
230
|
+
# Catch any exception so we can propagate it
|
231
|
+
except: # noqa: E722
|
232
|
+
# FAILED. propagate the exception as a string
|
233
|
+
succ = False
|
234
|
+
out = traceback.format_exc()
|
235
|
+
else:
|
236
|
+
# OK. send the ndarray back
|
237
|
+
succ = True
|
238
|
+
out = arr
|
239
|
+
result_queue.put((succ, out))
|
240
|
+
|
241
|
+
|
242
|
+
@linux_only
|
243
|
+
@skip_under_cuda_memcheck('Hangs cuda-memcheck')
|
244
|
+
@skip_on_cudasim('Ipc not available in CUDASIM')
|
245
|
+
@skip_on_arm('CUDA IPC not supported on ARM in Numba')
|
246
|
+
class TestIpcStaged(ContextResettingTestCase):
|
247
|
+
def test_staged(self):
|
248
|
+
# prepare data for IPC
|
249
|
+
arr = np.arange(10, dtype=np.intp)
|
250
|
+
devarr = cuda.to_device(arr)
|
251
|
+
|
252
|
+
# spawn new process for testing
|
253
|
+
mpctx = mp.get_context('spawn')
|
254
|
+
result_queue = mpctx.Queue()
|
255
|
+
|
256
|
+
# create IPC handle
|
257
|
+
ctx = cuda.current_context()
|
258
|
+
ipch = ctx.get_ipc_handle(devarr.gpu_data)
|
259
|
+
# pickle
|
260
|
+
buf = pickle.dumps(ipch)
|
261
|
+
ipch_recon = pickle.loads(buf)
|
262
|
+
self.assertIs(ipch_recon.base, None)
|
263
|
+
if driver.USE_NV_BINDING:
|
264
|
+
self.assertEqual(ipch_recon.handle.reserved, ipch.handle.reserved)
|
265
|
+
else:
|
266
|
+
self.assertEqual(
|
267
|
+
ipch_recon.handle.reserved[:],
|
268
|
+
ipch.handle.reserved[:]
|
269
|
+
)
|
270
|
+
self.assertEqual(ipch_recon.size, ipch.size)
|
271
|
+
|
272
|
+
# Test on every CUDA devices
|
273
|
+
for device_num in range(len(cuda.gpus)):
|
274
|
+
args = (ipch, device_num, result_queue)
|
275
|
+
proc = mpctx.Process(target=staged_ipc_handle_test, args=args)
|
276
|
+
proc.start()
|
277
|
+
succ, out = result_queue.get()
|
278
|
+
proc.join(3)
|
279
|
+
if not succ:
|
280
|
+
self.fail(out)
|
281
|
+
else:
|
282
|
+
np.testing.assert_equal(arr, out)
|
283
|
+
|
284
|
+
def test_ipc_array(self):
|
285
|
+
for device_num in range(len(cuda.gpus)):
|
286
|
+
# prepare data for IPC
|
287
|
+
arr = np.random.random(10)
|
288
|
+
devarr = cuda.to_device(arr)
|
289
|
+
ipch = devarr.get_ipc_handle()
|
290
|
+
|
291
|
+
# spawn new process for testing
|
292
|
+
ctx = mp.get_context('spawn')
|
293
|
+
result_queue = ctx.Queue()
|
294
|
+
args = (ipch, device_num, result_queue)
|
295
|
+
proc = ctx.Process(target=staged_ipc_array_test, args=args)
|
296
|
+
proc.start()
|
297
|
+
succ, out = result_queue.get()
|
298
|
+
proc.join(3)
|
299
|
+
if not succ:
|
300
|
+
self.fail(out)
|
301
|
+
else:
|
302
|
+
np.testing.assert_equal(arr, out)
|
303
|
+
|
304
|
+
|
305
|
+
@windows_only
|
306
|
+
@skip_on_cudasim('Ipc not available in CUDASIM')
|
307
|
+
class TestIpcNotSupported(ContextResettingTestCase):
|
308
|
+
def test_unsupported(self):
|
309
|
+
arr = np.arange(10, dtype=np.intp)
|
310
|
+
devarr = cuda.to_device(arr)
|
311
|
+
with self.assertRaises(OSError) as raises:
|
312
|
+
devarr.get_ipc_handle()
|
313
|
+
errmsg = str(raises.exception)
|
314
|
+
self.assertIn('OS does not support CUDA IPC', errmsg)
|
315
|
+
|
316
|
+
|
317
|
+
if __name__ == '__main__':
|
318
|
+
unittest.main()
|
@@ -0,0 +1,99 @@
|
|
1
|
+
from numba import cuda
|
2
|
+
from numba.cuda.testing import unittest, CUDATestCase
|
3
|
+
|
4
|
+
import numpy as np
|
5
|
+
|
6
|
+
|
7
|
+
class TestIterators(CUDATestCase):
|
8
|
+
|
9
|
+
def test_enumerate(self):
|
10
|
+
@cuda.jit
|
11
|
+
def enumerator(x, error):
|
12
|
+
count = 0
|
13
|
+
|
14
|
+
for i, v in enumerate(x):
|
15
|
+
if count != i:
|
16
|
+
error[0] = 1
|
17
|
+
if v != x[i]:
|
18
|
+
error[0] = 2
|
19
|
+
|
20
|
+
count += 1
|
21
|
+
|
22
|
+
if count != len(x):
|
23
|
+
error[0] = 3
|
24
|
+
|
25
|
+
x = np.asarray((10, 9, 8, 7, 6))
|
26
|
+
error = np.zeros(1, dtype=np.int32)
|
27
|
+
|
28
|
+
enumerator[1, 1](x, error)
|
29
|
+
self.assertEqual(error[0], 0)
|
30
|
+
|
31
|
+
def _test_twoarg_function(self, f):
|
32
|
+
x = np.asarray((10, 9, 8, 7, 6))
|
33
|
+
y = np.asarray((1, 2, 3, 4, 5))
|
34
|
+
error = np.zeros(1, dtype=np.int32)
|
35
|
+
|
36
|
+
f[1, 1](x, y, error)
|
37
|
+
self.assertEqual(error[0], 0)
|
38
|
+
|
39
|
+
def test_zip(self):
|
40
|
+
@cuda.jit
|
41
|
+
def zipper(x, y, error):
|
42
|
+
i = 0
|
43
|
+
|
44
|
+
for xv, yv in zip(x, y):
|
45
|
+
if xv != x[i]:
|
46
|
+
error[0] = 1
|
47
|
+
if yv != y[i]:
|
48
|
+
error[0] = 2
|
49
|
+
|
50
|
+
i += 1
|
51
|
+
|
52
|
+
if i != len(x):
|
53
|
+
error[0] = 3
|
54
|
+
|
55
|
+
self._test_twoarg_function(zipper)
|
56
|
+
|
57
|
+
def test_enumerate_zip(self):
|
58
|
+
@cuda.jit
|
59
|
+
def enumerator_zipper(x, y, error):
|
60
|
+
count = 0
|
61
|
+
|
62
|
+
for i, (xv, yv) in enumerate(zip(x, y)):
|
63
|
+
if i != count:
|
64
|
+
error[0] = 1
|
65
|
+
if xv != x[i]:
|
66
|
+
error[0] = 2
|
67
|
+
if yv != y[i]:
|
68
|
+
error[0] = 3
|
69
|
+
|
70
|
+
count += 1
|
71
|
+
|
72
|
+
if count != len(x):
|
73
|
+
error[0] = 4
|
74
|
+
|
75
|
+
self._test_twoarg_function(enumerator_zipper)
|
76
|
+
|
77
|
+
def test_zip_enumerate(self):
|
78
|
+
@cuda.jit
|
79
|
+
def zipper_enumerator(x, y, error):
|
80
|
+
count = 0
|
81
|
+
|
82
|
+
for (i, xv), yv in zip(enumerate(x), y):
|
83
|
+
if i != count:
|
84
|
+
error[0] = 1
|
85
|
+
if xv != x[i]:
|
86
|
+
error[0] = 2
|
87
|
+
if yv != y[i]:
|
88
|
+
error[0] = 3
|
89
|
+
|
90
|
+
count += 1
|
91
|
+
|
92
|
+
if count != len(x):
|
93
|
+
error[0] = 4
|
94
|
+
|
95
|
+
self._test_twoarg_function(zipper_enumerator)
|
96
|
+
|
97
|
+
|
98
|
+
if __name__ == '__main__':
|
99
|
+
unittest.main()
|
@@ -0,0 +1,64 @@
|
|
1
|
+
"""
|
2
|
+
Test basic language features
|
3
|
+
|
4
|
+
"""
|
5
|
+
|
6
|
+
import numpy as np
|
7
|
+
from numba import cuda, float64
|
8
|
+
from numba.cuda.testing import unittest, CUDATestCase
|
9
|
+
|
10
|
+
|
11
|
+
class TestLang(CUDATestCase):
|
12
|
+
def test_enumerate(self):
|
13
|
+
tup = (1., 2.5, 3.)
|
14
|
+
|
15
|
+
@cuda.jit("void(float64[:])")
|
16
|
+
def foo(a):
|
17
|
+
for i, v in enumerate(tup):
|
18
|
+
a[i] = v
|
19
|
+
|
20
|
+
a = np.zeros(len(tup))
|
21
|
+
foo[1, 1](a)
|
22
|
+
self.assertTrue(np.all(a == tup))
|
23
|
+
|
24
|
+
def test_zip(self):
|
25
|
+
t1 = (1, 2, 3)
|
26
|
+
t2 = (4.5, 5.6, 6.7)
|
27
|
+
|
28
|
+
@cuda.jit("void(float64[:])")
|
29
|
+
def foo(a):
|
30
|
+
c = 0
|
31
|
+
for i, j in zip(t1, t2):
|
32
|
+
c += i + j
|
33
|
+
a[0] = c
|
34
|
+
|
35
|
+
a = np.zeros(1)
|
36
|
+
foo[1, 1](a)
|
37
|
+
b = np.array(t1)
|
38
|
+
c = np.array(t2)
|
39
|
+
self.assertTrue(np.all(a == (b + c).sum()))
|
40
|
+
|
41
|
+
def test_issue_872(self):
|
42
|
+
'''
|
43
|
+
Ensure that typing and lowering of CUDA kernel API primitives works in
|
44
|
+
more than one block. Was originally to ensure that macro expansion works
|
45
|
+
for more than one block (issue #872), but macro expansion has been
|
46
|
+
replaced by a "proper" implementation of all kernel API functions.
|
47
|
+
'''
|
48
|
+
|
49
|
+
@cuda.jit("void(float64[:,:])")
|
50
|
+
def cuda_kernel_api_in_multiple_blocks(ary):
|
51
|
+
for i in range(2):
|
52
|
+
tx = cuda.threadIdx.x
|
53
|
+
for j in range(3):
|
54
|
+
ty = cuda.threadIdx.y
|
55
|
+
sm = cuda.shared.array((2, 3), float64)
|
56
|
+
sm[tx, ty] = 1.0
|
57
|
+
ary[tx, ty] = sm[tx, ty]
|
58
|
+
|
59
|
+
a = np.zeros((2, 3))
|
60
|
+
cuda_kernel_api_in_multiple_blocks[1, (2, 3)](a)
|
61
|
+
|
62
|
+
|
63
|
+
if __name__ == '__main__':
|
64
|
+
unittest.main()
|
@@ -0,0 +1,119 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from numba import cuda, float64, void
|
3
|
+
from numba.cuda.testing import unittest, CUDATestCase
|
4
|
+
from numba.core import config
|
5
|
+
|
6
|
+
# NOTE: CUDA kernel does not return any value
|
7
|
+
|
8
|
+
if config.ENABLE_CUDASIM:
|
9
|
+
tpb = 4
|
10
|
+
else:
|
11
|
+
tpb = 16
|
12
|
+
SM_SIZE = tpb, tpb
|
13
|
+
|
14
|
+
|
15
|
+
class TestCudaLaplace(CUDATestCase):
|
16
|
+
def test_laplace_small(self):
|
17
|
+
|
18
|
+
@cuda.jit(float64(float64, float64), device=True, inline=True)
|
19
|
+
def get_max(a, b):
|
20
|
+
if a > b:
|
21
|
+
return a
|
22
|
+
else:
|
23
|
+
return b
|
24
|
+
|
25
|
+
@cuda.jit(void(float64[:, :], float64[:, :], float64[:, :]))
|
26
|
+
def jocabi_relax_core(A, Anew, error):
|
27
|
+
err_sm = cuda.shared.array(SM_SIZE, dtype=float64)
|
28
|
+
|
29
|
+
ty = cuda.threadIdx.x
|
30
|
+
tx = cuda.threadIdx.y
|
31
|
+
bx = cuda.blockIdx.x
|
32
|
+
by = cuda.blockIdx.y
|
33
|
+
|
34
|
+
n = A.shape[0]
|
35
|
+
m = A.shape[1]
|
36
|
+
|
37
|
+
i, j = cuda.grid(2)
|
38
|
+
|
39
|
+
err_sm[ty, tx] = 0
|
40
|
+
if j >= 1 and j < n - 1 and i >= 1 and i < m - 1:
|
41
|
+
Anew[j, i] = 0.25 * ( A[j, i + 1] + A[j, i - 1]
|
42
|
+
+ A[j - 1, i] + A[j + 1, i])
|
43
|
+
err_sm[ty, tx] = Anew[j, i] - A[j, i]
|
44
|
+
|
45
|
+
cuda.syncthreads()
|
46
|
+
|
47
|
+
# max-reduce err_sm vertically
|
48
|
+
t = tpb // 2
|
49
|
+
while t > 0:
|
50
|
+
if ty < t:
|
51
|
+
err_sm[ty, tx] = get_max(err_sm[ty, tx], err_sm[ty + t, tx])
|
52
|
+
t //= 2
|
53
|
+
cuda.syncthreads()
|
54
|
+
|
55
|
+
# max-reduce err_sm horizontally
|
56
|
+
t = tpb // 2
|
57
|
+
while t > 0:
|
58
|
+
if tx < t and ty == 0:
|
59
|
+
err_sm[ty, tx] = get_max(err_sm[ty, tx], err_sm[ty, tx + t])
|
60
|
+
t //= 2
|
61
|
+
cuda.syncthreads()
|
62
|
+
|
63
|
+
if tx == 0 and ty == 0:
|
64
|
+
error[by, bx] = err_sm[0, 0]
|
65
|
+
|
66
|
+
if config.ENABLE_CUDASIM:
|
67
|
+
NN, NM = 4, 4
|
68
|
+
iter_max = 20
|
69
|
+
else:
|
70
|
+
NN, NM = 256, 256
|
71
|
+
iter_max = 1000
|
72
|
+
|
73
|
+
A = np.zeros((NN, NM), dtype=np.float64)
|
74
|
+
Anew = np.zeros((NN, NM), dtype=np.float64)
|
75
|
+
|
76
|
+
n = NN
|
77
|
+
|
78
|
+
tol = 1.0e-6
|
79
|
+
error = 1.0
|
80
|
+
|
81
|
+
for j in range(n):
|
82
|
+
A[j, 0] = 1.0
|
83
|
+
Anew[j, 0] = 1.0
|
84
|
+
|
85
|
+
iter = 0
|
86
|
+
|
87
|
+
blockdim = (tpb, tpb)
|
88
|
+
griddim = (NN // blockdim[0], NM // blockdim[1])
|
89
|
+
|
90
|
+
error_grid = np.zeros(griddim)
|
91
|
+
|
92
|
+
stream = cuda.stream()
|
93
|
+
|
94
|
+
dA = cuda.to_device(A, stream) # to device and don't come back
|
95
|
+
dAnew = cuda.to_device(Anew, stream) # to device and don't come back
|
96
|
+
derror_grid = cuda.to_device(error_grid, stream)
|
97
|
+
|
98
|
+
while error > tol and iter < iter_max:
|
99
|
+
self.assertTrue(error_grid.dtype == np.float64)
|
100
|
+
|
101
|
+
jocabi_relax_core[griddim, blockdim, stream](dA, dAnew, derror_grid)
|
102
|
+
|
103
|
+
derror_grid.copy_to_host(error_grid, stream=stream)
|
104
|
+
|
105
|
+
# error_grid is available on host
|
106
|
+
stream.synchronize()
|
107
|
+
|
108
|
+
error = np.abs(error_grid).max()
|
109
|
+
|
110
|
+
# swap dA and dAnew
|
111
|
+
tmp = dA
|
112
|
+
dA = dAnew
|
113
|
+
dAnew = tmp
|
114
|
+
|
115
|
+
iter += 1
|
116
|
+
|
117
|
+
|
118
|
+
if __name__ == '__main__':
|
119
|
+
unittest.main()
|