numba-cuda 0.0.0__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. _numba_cuda_redirector.pth +1 -0
  2. _numba_cuda_redirector.py +74 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +5 -0
  5. numba_cuda/_version.py +19 -0
  6. numba_cuda/numba/cuda/__init__.py +22 -0
  7. numba_cuda/numba/cuda/api.py +526 -0
  8. numba_cuda/numba/cuda/api_util.py +30 -0
  9. numba_cuda/numba/cuda/args.py +77 -0
  10. numba_cuda/numba/cuda/cg.py +62 -0
  11. numba_cuda/numba/cuda/codegen.py +378 -0
  12. numba_cuda/numba/cuda/compiler.py +422 -0
  13. numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
  14. numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
  15. numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
  16. numba_cuda/numba/cuda/cuda_paths.py +258 -0
  17. numba_cuda/numba/cuda/cudadecl.py +806 -0
  18. numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
  19. numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
  20. numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
  21. numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
  22. numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
  23. numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
  24. numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
  25. numba_cuda/numba/cuda/cudadrv/error.py +36 -0
  26. numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
  27. numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
  28. numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
  29. numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
  30. numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
  31. numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
  32. numba_cuda/numba/cuda/cudaimpl.py +1055 -0
  33. numba_cuda/numba/cuda/cudamath.py +140 -0
  34. numba_cuda/numba/cuda/decorators.py +189 -0
  35. numba_cuda/numba/cuda/descriptor.py +33 -0
  36. numba_cuda/numba/cuda/device_init.py +89 -0
  37. numba_cuda/numba/cuda/deviceufunc.py +908 -0
  38. numba_cuda/numba/cuda/dispatcher.py +1057 -0
  39. numba_cuda/numba/cuda/errors.py +59 -0
  40. numba_cuda/numba/cuda/extending.py +7 -0
  41. numba_cuda/numba/cuda/initialize.py +13 -0
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
  43. numba_cuda/numba/cuda/intrinsics.py +198 -0
  44. numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  45. numba_cuda/numba/cuda/kernels/reduction.py +262 -0
  46. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  47. numba_cuda/numba/cuda/libdevice.py +3382 -0
  48. numba_cuda/numba/cuda/libdevicedecl.py +17 -0
  49. numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
  50. numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
  51. numba_cuda/numba/cuda/mathimpl.py +448 -0
  52. numba_cuda/numba/cuda/models.py +48 -0
  53. numba_cuda/numba/cuda/nvvmutils.py +235 -0
  54. numba_cuda/numba/cuda/printimpl.py +86 -0
  55. numba_cuda/numba/cuda/random.py +292 -0
  56. numba_cuda/numba/cuda/simulator/__init__.py +38 -0
  57. numba_cuda/numba/cuda/simulator/api.py +110 -0
  58. numba_cuda/numba/cuda/simulator/compiler.py +9 -0
  59. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
  60. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
  61. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
  62. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
  63. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
  64. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
  65. numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
  66. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
  67. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
  68. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
  69. numba_cuda/numba/cuda/simulator/kernel.py +308 -0
  70. numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
  71. numba_cuda/numba/cuda/simulator/reduction.py +15 -0
  72. numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
  73. numba_cuda/numba/cuda/simulator_init.py +17 -0
  74. numba_cuda/numba/cuda/stubs.py +902 -0
  75. numba_cuda/numba/cuda/target.py +440 -0
  76. numba_cuda/numba/cuda/testing.py +202 -0
  77. numba_cuda/numba/cuda/tests/__init__.py +58 -0
  78. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
  79. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
  80. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
  81. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
  88. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
  89. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
  90. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
  91. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
  92. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
  93. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
  94. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
  95. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
  98. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
  100. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
  101. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
  102. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
  103. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
  104. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
  105. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
  106. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
  107. numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
  108. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
  109. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
  110. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
  111. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
  112. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
  113. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
  115. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
  117. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
  118. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
  120. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
  121. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
  123. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
  124. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
  126. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
  127. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
  129. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
  131. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
  132. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
  133. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
  134. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
  136. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
  137. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
  138. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
  139. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
  141. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
  143. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
  144. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
  145. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
  148. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
  149. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
  150. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
  151. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
  152. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
  153. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
  154. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
  155. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
  156. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
  157. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
  158. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
  159. numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
  161. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
  162. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
  163. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
  164. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
  165. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
  166. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
  167. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
  168. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
  169. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
  170. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
  171. numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
  172. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
  173. numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
  174. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
  175. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
  176. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
  177. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  178. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
  179. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
  180. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
  182. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
  183. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
  184. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
  185. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
  186. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
  187. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
  188. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
  192. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
  193. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
  194. numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
  195. numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
  197. numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  198. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  199. numba_cuda/numba/cuda/tests/data/error.cu +7 -0
  200. numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
  201. numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
  202. numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
  203. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
  204. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
  206. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
  207. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
  208. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
  209. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
  210. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
  211. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
  212. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
  213. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
  214. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
  216. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
  217. numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
  218. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
  219. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
  220. numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
  221. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
  222. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
  223. numba_cuda/numba/cuda/types.py +37 -0
  224. numba_cuda/numba/cuda/ufuncs.py +662 -0
  225. numba_cuda/numba/cuda/vector_types.py +209 -0
  226. numba_cuda/numba/cuda/vectorizers.py +252 -0
  227. numba_cuda-0.0.12.dist-info/LICENSE +25 -0
  228. numba_cuda-0.0.12.dist-info/METADATA +68 -0
  229. numba_cuda-0.0.12.dist-info/RECORD +231 -0
  230. {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
  231. numba_cuda-0.0.0.dist-info/METADATA +0 -6
  232. numba_cuda-0.0.0.dist-info/RECORD +0 -5
  233. {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,318 @@
1
+ import multiprocessing as mp
2
+ import itertools
3
+ import traceback
4
+ import pickle
5
+
6
+ import numpy as np
7
+
8
+ from numba import cuda
9
+ from numba.cuda.cudadrv import driver
10
+ from numba.cuda.testing import (skip_on_arm, skip_on_cudasim,
11
+ skip_under_cuda_memcheck,
12
+ ContextResettingTestCase, ForeignArray)
13
+ from numba.tests.support import linux_only, windows_only
14
+ import unittest
15
+
16
+
17
+ def core_ipc_handle_test(the_work, result_queue):
18
+ try:
19
+ arr = the_work()
20
+ # Catch anything going wrong in the worker function
21
+ except: # noqa: E722
22
+ # FAILED. propagate the exception as a string
23
+ succ = False
24
+ out = traceback.format_exc()
25
+ else:
26
+ # OK. send the ndarray back
27
+ succ = True
28
+ out = arr
29
+ result_queue.put((succ, out))
30
+
31
+
32
+ def base_ipc_handle_test(handle, size, result_queue):
33
+ def the_work():
34
+ dtype = np.dtype(np.intp)
35
+ with cuda.open_ipc_array(handle, shape=size // dtype.itemsize,
36
+ dtype=dtype) as darr:
37
+ # copy the data to host
38
+ return darr.copy_to_host()
39
+
40
+ core_ipc_handle_test(the_work, result_queue)
41
+
42
+
43
+ def serialize_ipc_handle_test(handle, result_queue):
44
+ def the_work():
45
+ dtype = np.dtype(np.intp)
46
+ darr = handle.open_array(cuda.current_context(),
47
+ shape=handle.size // dtype.itemsize,
48
+ dtype=dtype)
49
+ # copy the data to host
50
+ arr = darr.copy_to_host()
51
+ handle.close()
52
+ return arr
53
+
54
+ core_ipc_handle_test(the_work, result_queue)
55
+
56
+
57
+ def ipc_array_test(ipcarr, result_queue):
58
+ try:
59
+ with ipcarr as darr:
60
+ arr = darr.copy_to_host()
61
+ try:
62
+ # should fail to reopen
63
+ with ipcarr:
64
+ pass
65
+ except ValueError as e:
66
+ if str(e) != 'IpcHandle is already opened':
67
+ raise AssertionError('invalid exception message')
68
+ else:
69
+ raise AssertionError('did not raise on reopen')
70
+ # Catch any exception so we can propagate it
71
+ except: # noqa: E722
72
+ # FAILED. propagate the exception as a string
73
+ succ = False
74
+ out = traceback.format_exc()
75
+ else:
76
+ # OK. send the ndarray back
77
+ succ = True
78
+ out = arr
79
+ result_queue.put((succ, out))
80
+
81
+
82
+ @linux_only
83
+ @skip_under_cuda_memcheck('Hangs cuda-memcheck')
84
+ @skip_on_cudasim('Ipc not available in CUDASIM')
85
+ @skip_on_arm('CUDA IPC not supported on ARM in Numba')
86
+ class TestIpcMemory(ContextResettingTestCase):
87
+
88
+ def test_ipc_handle(self):
89
+ # prepare data for IPC
90
+ arr = np.arange(10, dtype=np.intp)
91
+ devarr = cuda.to_device(arr)
92
+
93
+ # create IPC handle
94
+ ctx = cuda.current_context()
95
+ ipch = ctx.get_ipc_handle(devarr.gpu_data)
96
+
97
+ # manually prepare for serialization as bytes
98
+ if driver.USE_NV_BINDING:
99
+ handle_bytes = ipch.handle.reserved
100
+ else:
101
+ handle_bytes = bytes(ipch.handle)
102
+ size = ipch.size
103
+
104
+ # spawn new process for testing
105
+ ctx = mp.get_context('spawn')
106
+ result_queue = ctx.Queue()
107
+ args = (handle_bytes, size, result_queue)
108
+ proc = ctx.Process(target=base_ipc_handle_test, args=args)
109
+ proc.start()
110
+ succ, out = result_queue.get()
111
+ if not succ:
112
+ self.fail(out)
113
+ else:
114
+ np.testing.assert_equal(arr, out)
115
+ proc.join(3)
116
+
117
+ def variants(self):
118
+ # Test with no slicing and various different slices
119
+ indices = (None, slice(3, None), slice(3, 8), slice(None, 8))
120
+ # Test with a Numba DeviceNDArray, or an array from elsewhere through
121
+ # the CUDA Array Interface
122
+ foreigns = (False, True)
123
+ return itertools.product(indices, foreigns)
124
+
125
+ def check_ipc_handle_serialization(self, index_arg=None, foreign=False):
126
+ # prepare data for IPC
127
+ arr = np.arange(10, dtype=np.intp)
128
+ devarr = cuda.to_device(arr)
129
+ if index_arg is not None:
130
+ devarr = devarr[index_arg]
131
+ if foreign:
132
+ devarr = cuda.as_cuda_array(ForeignArray(devarr))
133
+ expect = devarr.copy_to_host()
134
+
135
+ # create IPC handle
136
+ ctx = cuda.current_context()
137
+ ipch = ctx.get_ipc_handle(devarr.gpu_data)
138
+
139
+ # pickle
140
+ buf = pickle.dumps(ipch)
141
+ ipch_recon = pickle.loads(buf)
142
+ self.assertIs(ipch_recon.base, None)
143
+ self.assertEqual(ipch_recon.size, ipch.size)
144
+
145
+ if driver.USE_NV_BINDING:
146
+ self.assertEqual(ipch_recon.handle.reserved, ipch.handle.reserved)
147
+ else:
148
+ self.assertEqual(ipch_recon.handle.reserved[:],
149
+ ipch.handle.reserved[:])
150
+
151
+ # spawn new process for testing
152
+ ctx = mp.get_context('spawn')
153
+ result_queue = ctx.Queue()
154
+ args = (ipch, result_queue)
155
+ proc = ctx.Process(target=serialize_ipc_handle_test, args=args)
156
+ proc.start()
157
+ succ, out = result_queue.get()
158
+ if not succ:
159
+ self.fail(out)
160
+ else:
161
+ np.testing.assert_equal(expect, out)
162
+ proc.join(3)
163
+
164
+ def test_ipc_handle_serialization(self):
165
+ for index, foreign, in self.variants():
166
+ with self.subTest(index=index, foreign=foreign):
167
+ self.check_ipc_handle_serialization(index, foreign)
168
+
169
+ def check_ipc_array(self, index_arg=None, foreign=False):
170
+ # prepare data for IPC
171
+ arr = np.arange(10, dtype=np.intp)
172
+ devarr = cuda.to_device(arr)
173
+ # Slice
174
+ if index_arg is not None:
175
+ devarr = devarr[index_arg]
176
+ if foreign:
177
+ devarr = cuda.as_cuda_array(ForeignArray(devarr))
178
+ expect = devarr.copy_to_host()
179
+ ipch = devarr.get_ipc_handle()
180
+
181
+ # spawn new process for testing
182
+ ctx = mp.get_context('spawn')
183
+ result_queue = ctx.Queue()
184
+ args = (ipch, result_queue)
185
+ proc = ctx.Process(target=ipc_array_test, args=args)
186
+ proc.start()
187
+ succ, out = result_queue.get()
188
+ if not succ:
189
+ self.fail(out)
190
+ else:
191
+ np.testing.assert_equal(expect, out)
192
+ proc.join(3)
193
+
194
+ def test_ipc_array(self):
195
+ for index, foreign, in self.variants():
196
+ with self.subTest(index=index, foreign=foreign):
197
+ self.check_ipc_array(index, foreign)
198
+
199
+
200
+ def staged_ipc_handle_test(handle, device_num, result_queue):
201
+ def the_work():
202
+ with cuda.gpus[device_num]:
203
+ this_ctx = cuda.devices.get_context()
204
+ deviceptr = handle.open_staged(this_ctx)
205
+ arrsize = handle.size // np.dtype(np.intp).itemsize
206
+ hostarray = np.zeros(arrsize, dtype=np.intp)
207
+ cuda.driver.device_to_host(
208
+ hostarray, deviceptr, size=handle.size,
209
+ )
210
+ handle.close()
211
+ return hostarray
212
+
213
+ core_ipc_handle_test(the_work, result_queue)
214
+
215
+
216
+ def staged_ipc_array_test(ipcarr, device_num, result_queue):
217
+ try:
218
+ with cuda.gpus[device_num]:
219
+ with ipcarr as darr:
220
+ arr = darr.copy_to_host()
221
+ try:
222
+ # should fail to reopen
223
+ with ipcarr:
224
+ pass
225
+ except ValueError as e:
226
+ if str(e) != 'IpcHandle is already opened':
227
+ raise AssertionError('invalid exception message')
228
+ else:
229
+ raise AssertionError('did not raise on reopen')
230
+ # Catch any exception so we can propagate it
231
+ except: # noqa: E722
232
+ # FAILED. propagate the exception as a string
233
+ succ = False
234
+ out = traceback.format_exc()
235
+ else:
236
+ # OK. send the ndarray back
237
+ succ = True
238
+ out = arr
239
+ result_queue.put((succ, out))
240
+
241
+
242
+ @linux_only
243
+ @skip_under_cuda_memcheck('Hangs cuda-memcheck')
244
+ @skip_on_cudasim('Ipc not available in CUDASIM')
245
+ @skip_on_arm('CUDA IPC not supported on ARM in Numba')
246
+ class TestIpcStaged(ContextResettingTestCase):
247
+ def test_staged(self):
248
+ # prepare data for IPC
249
+ arr = np.arange(10, dtype=np.intp)
250
+ devarr = cuda.to_device(arr)
251
+
252
+ # spawn new process for testing
253
+ mpctx = mp.get_context('spawn')
254
+ result_queue = mpctx.Queue()
255
+
256
+ # create IPC handle
257
+ ctx = cuda.current_context()
258
+ ipch = ctx.get_ipc_handle(devarr.gpu_data)
259
+ # pickle
260
+ buf = pickle.dumps(ipch)
261
+ ipch_recon = pickle.loads(buf)
262
+ self.assertIs(ipch_recon.base, None)
263
+ if driver.USE_NV_BINDING:
264
+ self.assertEqual(ipch_recon.handle.reserved, ipch.handle.reserved)
265
+ else:
266
+ self.assertEqual(
267
+ ipch_recon.handle.reserved[:],
268
+ ipch.handle.reserved[:]
269
+ )
270
+ self.assertEqual(ipch_recon.size, ipch.size)
271
+
272
+ # Test on every CUDA devices
273
+ for device_num in range(len(cuda.gpus)):
274
+ args = (ipch, device_num, result_queue)
275
+ proc = mpctx.Process(target=staged_ipc_handle_test, args=args)
276
+ proc.start()
277
+ succ, out = result_queue.get()
278
+ proc.join(3)
279
+ if not succ:
280
+ self.fail(out)
281
+ else:
282
+ np.testing.assert_equal(arr, out)
283
+
284
+ def test_ipc_array(self):
285
+ for device_num in range(len(cuda.gpus)):
286
+ # prepare data for IPC
287
+ arr = np.random.random(10)
288
+ devarr = cuda.to_device(arr)
289
+ ipch = devarr.get_ipc_handle()
290
+
291
+ # spawn new process for testing
292
+ ctx = mp.get_context('spawn')
293
+ result_queue = ctx.Queue()
294
+ args = (ipch, device_num, result_queue)
295
+ proc = ctx.Process(target=staged_ipc_array_test, args=args)
296
+ proc.start()
297
+ succ, out = result_queue.get()
298
+ proc.join(3)
299
+ if not succ:
300
+ self.fail(out)
301
+ else:
302
+ np.testing.assert_equal(arr, out)
303
+
304
+
305
+ @windows_only
306
+ @skip_on_cudasim('Ipc not available in CUDASIM')
307
+ class TestIpcNotSupported(ContextResettingTestCase):
308
+ def test_unsupported(self):
309
+ arr = np.arange(10, dtype=np.intp)
310
+ devarr = cuda.to_device(arr)
311
+ with self.assertRaises(OSError) as raises:
312
+ devarr.get_ipc_handle()
313
+ errmsg = str(raises.exception)
314
+ self.assertIn('OS does not support CUDA IPC', errmsg)
315
+
316
+
317
+ if __name__ == '__main__':
318
+ unittest.main()
@@ -0,0 +1,99 @@
1
+ from numba import cuda
2
+ from numba.cuda.testing import unittest, CUDATestCase
3
+
4
+ import numpy as np
5
+
6
+
7
+ class TestIterators(CUDATestCase):
8
+
9
+ def test_enumerate(self):
10
+ @cuda.jit
11
+ def enumerator(x, error):
12
+ count = 0
13
+
14
+ for i, v in enumerate(x):
15
+ if count != i:
16
+ error[0] = 1
17
+ if v != x[i]:
18
+ error[0] = 2
19
+
20
+ count += 1
21
+
22
+ if count != len(x):
23
+ error[0] = 3
24
+
25
+ x = np.asarray((10, 9, 8, 7, 6))
26
+ error = np.zeros(1, dtype=np.int32)
27
+
28
+ enumerator[1, 1](x, error)
29
+ self.assertEqual(error[0], 0)
30
+
31
+ def _test_twoarg_function(self, f):
32
+ x = np.asarray((10, 9, 8, 7, 6))
33
+ y = np.asarray((1, 2, 3, 4, 5))
34
+ error = np.zeros(1, dtype=np.int32)
35
+
36
+ f[1, 1](x, y, error)
37
+ self.assertEqual(error[0], 0)
38
+
39
+ def test_zip(self):
40
+ @cuda.jit
41
+ def zipper(x, y, error):
42
+ i = 0
43
+
44
+ for xv, yv in zip(x, y):
45
+ if xv != x[i]:
46
+ error[0] = 1
47
+ if yv != y[i]:
48
+ error[0] = 2
49
+
50
+ i += 1
51
+
52
+ if i != len(x):
53
+ error[0] = 3
54
+
55
+ self._test_twoarg_function(zipper)
56
+
57
+ def test_enumerate_zip(self):
58
+ @cuda.jit
59
+ def enumerator_zipper(x, y, error):
60
+ count = 0
61
+
62
+ for i, (xv, yv) in enumerate(zip(x, y)):
63
+ if i != count:
64
+ error[0] = 1
65
+ if xv != x[i]:
66
+ error[0] = 2
67
+ if yv != y[i]:
68
+ error[0] = 3
69
+
70
+ count += 1
71
+
72
+ if count != len(x):
73
+ error[0] = 4
74
+
75
+ self._test_twoarg_function(enumerator_zipper)
76
+
77
+ def test_zip_enumerate(self):
78
+ @cuda.jit
79
+ def zipper_enumerator(x, y, error):
80
+ count = 0
81
+
82
+ for (i, xv), yv in zip(enumerate(x), y):
83
+ if i != count:
84
+ error[0] = 1
85
+ if xv != x[i]:
86
+ error[0] = 2
87
+ if yv != y[i]:
88
+ error[0] = 3
89
+
90
+ count += 1
91
+
92
+ if count != len(x):
93
+ error[0] = 4
94
+
95
+ self._test_twoarg_function(zipper_enumerator)
96
+
97
+
98
+ if __name__ == '__main__':
99
+ unittest.main()
@@ -0,0 +1,64 @@
1
+ """
2
+ Test basic language features
3
+
4
+ """
5
+
6
+ import numpy as np
7
+ from numba import cuda, float64
8
+ from numba.cuda.testing import unittest, CUDATestCase
9
+
10
+
11
+ class TestLang(CUDATestCase):
12
+ def test_enumerate(self):
13
+ tup = (1., 2.5, 3.)
14
+
15
+ @cuda.jit("void(float64[:])")
16
+ def foo(a):
17
+ for i, v in enumerate(tup):
18
+ a[i] = v
19
+
20
+ a = np.zeros(len(tup))
21
+ foo[1, 1](a)
22
+ self.assertTrue(np.all(a == tup))
23
+
24
+ def test_zip(self):
25
+ t1 = (1, 2, 3)
26
+ t2 = (4.5, 5.6, 6.7)
27
+
28
+ @cuda.jit("void(float64[:])")
29
+ def foo(a):
30
+ c = 0
31
+ for i, j in zip(t1, t2):
32
+ c += i + j
33
+ a[0] = c
34
+
35
+ a = np.zeros(1)
36
+ foo[1, 1](a)
37
+ b = np.array(t1)
38
+ c = np.array(t2)
39
+ self.assertTrue(np.all(a == (b + c).sum()))
40
+
41
+ def test_issue_872(self):
42
+ '''
43
+ Ensure that typing and lowering of CUDA kernel API primitives works in
44
+ more than one block. Was originally to ensure that macro expansion works
45
+ for more than one block (issue #872), but macro expansion has been
46
+ replaced by a "proper" implementation of all kernel API functions.
47
+ '''
48
+
49
+ @cuda.jit("void(float64[:,:])")
50
+ def cuda_kernel_api_in_multiple_blocks(ary):
51
+ for i in range(2):
52
+ tx = cuda.threadIdx.x
53
+ for j in range(3):
54
+ ty = cuda.threadIdx.y
55
+ sm = cuda.shared.array((2, 3), float64)
56
+ sm[tx, ty] = 1.0
57
+ ary[tx, ty] = sm[tx, ty]
58
+
59
+ a = np.zeros((2, 3))
60
+ cuda_kernel_api_in_multiple_blocks[1, (2, 3)](a)
61
+
62
+
63
+ if __name__ == '__main__':
64
+ unittest.main()
@@ -0,0 +1,119 @@
1
+ import numpy as np
2
+ from numba import cuda, float64, void
3
+ from numba.cuda.testing import unittest, CUDATestCase
4
+ from numba.core import config
5
+
6
+ # NOTE: CUDA kernel does not return any value
7
+
8
+ if config.ENABLE_CUDASIM:
9
+ tpb = 4
10
+ else:
11
+ tpb = 16
12
+ SM_SIZE = tpb, tpb
13
+
14
+
15
+ class TestCudaLaplace(CUDATestCase):
16
+ def test_laplace_small(self):
17
+
18
+ @cuda.jit(float64(float64, float64), device=True, inline=True)
19
+ def get_max(a, b):
20
+ if a > b:
21
+ return a
22
+ else:
23
+ return b
24
+
25
+ @cuda.jit(void(float64[:, :], float64[:, :], float64[:, :]))
26
+ def jocabi_relax_core(A, Anew, error):
27
+ err_sm = cuda.shared.array(SM_SIZE, dtype=float64)
28
+
29
+ ty = cuda.threadIdx.x
30
+ tx = cuda.threadIdx.y
31
+ bx = cuda.blockIdx.x
32
+ by = cuda.blockIdx.y
33
+
34
+ n = A.shape[0]
35
+ m = A.shape[1]
36
+
37
+ i, j = cuda.grid(2)
38
+
39
+ err_sm[ty, tx] = 0
40
+ if j >= 1 and j < n - 1 and i >= 1 and i < m - 1:
41
+ Anew[j, i] = 0.25 * ( A[j, i + 1] + A[j, i - 1]
42
+ + A[j - 1, i] + A[j + 1, i])
43
+ err_sm[ty, tx] = Anew[j, i] - A[j, i]
44
+
45
+ cuda.syncthreads()
46
+
47
+ # max-reduce err_sm vertically
48
+ t = tpb // 2
49
+ while t > 0:
50
+ if ty < t:
51
+ err_sm[ty, tx] = get_max(err_sm[ty, tx], err_sm[ty + t, tx])
52
+ t //= 2
53
+ cuda.syncthreads()
54
+
55
+ # max-reduce err_sm horizontally
56
+ t = tpb // 2
57
+ while t > 0:
58
+ if tx < t and ty == 0:
59
+ err_sm[ty, tx] = get_max(err_sm[ty, tx], err_sm[ty, tx + t])
60
+ t //= 2
61
+ cuda.syncthreads()
62
+
63
+ if tx == 0 and ty == 0:
64
+ error[by, bx] = err_sm[0, 0]
65
+
66
+ if config.ENABLE_CUDASIM:
67
+ NN, NM = 4, 4
68
+ iter_max = 20
69
+ else:
70
+ NN, NM = 256, 256
71
+ iter_max = 1000
72
+
73
+ A = np.zeros((NN, NM), dtype=np.float64)
74
+ Anew = np.zeros((NN, NM), dtype=np.float64)
75
+
76
+ n = NN
77
+
78
+ tol = 1.0e-6
79
+ error = 1.0
80
+
81
+ for j in range(n):
82
+ A[j, 0] = 1.0
83
+ Anew[j, 0] = 1.0
84
+
85
+ iter = 0
86
+
87
+ blockdim = (tpb, tpb)
88
+ griddim = (NN // blockdim[0], NM // blockdim[1])
89
+
90
+ error_grid = np.zeros(griddim)
91
+
92
+ stream = cuda.stream()
93
+
94
+ dA = cuda.to_device(A, stream) # to device and don't come back
95
+ dAnew = cuda.to_device(Anew, stream) # to device and don't come back
96
+ derror_grid = cuda.to_device(error_grid, stream)
97
+
98
+ while error > tol and iter < iter_max:
99
+ self.assertTrue(error_grid.dtype == np.float64)
100
+
101
+ jocabi_relax_core[griddim, blockdim, stream](dA, dAnew, derror_grid)
102
+
103
+ derror_grid.copy_to_host(error_grid, stream=stream)
104
+
105
+ # error_grid is available on host
106
+ stream.synchronize()
107
+
108
+ error = np.abs(error_grid).max()
109
+
110
+ # swap dA and dAnew
111
+ tmp = dA
112
+ dA = dAnew
113
+ dAnew = tmp
114
+
115
+ iter += 1
116
+
117
+
118
+ if __name__ == '__main__':
119
+ unittest.main()