numba-cuda 0.0.1__py3-none-any.whl → 0.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. _numba_cuda_redirector.pth +1 -0
  2. _numba_cuda_redirector.py +74 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +5 -0
  5. numba_cuda/_version.py +19 -0
  6. numba_cuda/numba/cuda/__init__.py +22 -0
  7. numba_cuda/numba/cuda/api.py +526 -0
  8. numba_cuda/numba/cuda/api_util.py +30 -0
  9. numba_cuda/numba/cuda/args.py +77 -0
  10. numba_cuda/numba/cuda/cg.py +62 -0
  11. numba_cuda/numba/cuda/codegen.py +378 -0
  12. numba_cuda/numba/cuda/compiler.py +422 -0
  13. numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
  14. numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
  15. numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
  16. numba_cuda/numba/cuda/cuda_paths.py +258 -0
  17. numba_cuda/numba/cuda/cudadecl.py +806 -0
  18. numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
  19. numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
  20. numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
  21. numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
  22. numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
  23. numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
  24. numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
  25. numba_cuda/numba/cuda/cudadrv/error.py +36 -0
  26. numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
  27. numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
  28. numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
  29. numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
  30. numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
  31. numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
  32. numba_cuda/numba/cuda/cudaimpl.py +1055 -0
  33. numba_cuda/numba/cuda/cudamath.py +140 -0
  34. numba_cuda/numba/cuda/decorators.py +189 -0
  35. numba_cuda/numba/cuda/descriptor.py +33 -0
  36. numba_cuda/numba/cuda/device_init.py +89 -0
  37. numba_cuda/numba/cuda/deviceufunc.py +908 -0
  38. numba_cuda/numba/cuda/dispatcher.py +1057 -0
  39. numba_cuda/numba/cuda/errors.py +59 -0
  40. numba_cuda/numba/cuda/extending.py +7 -0
  41. numba_cuda/numba/cuda/initialize.py +13 -0
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
  43. numba_cuda/numba/cuda/intrinsics.py +198 -0
  44. numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  45. numba_cuda/numba/cuda/kernels/reduction.py +262 -0
  46. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  47. numba_cuda/numba/cuda/libdevice.py +3382 -0
  48. numba_cuda/numba/cuda/libdevicedecl.py +17 -0
  49. numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
  50. numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
  51. numba_cuda/numba/cuda/mathimpl.py +448 -0
  52. numba_cuda/numba/cuda/models.py +48 -0
  53. numba_cuda/numba/cuda/nvvmutils.py +235 -0
  54. numba_cuda/numba/cuda/printimpl.py +86 -0
  55. numba_cuda/numba/cuda/random.py +292 -0
  56. numba_cuda/numba/cuda/simulator/__init__.py +38 -0
  57. numba_cuda/numba/cuda/simulator/api.py +110 -0
  58. numba_cuda/numba/cuda/simulator/compiler.py +9 -0
  59. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
  60. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
  61. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
  62. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
  63. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
  64. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
  65. numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
  66. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
  67. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
  68. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
  69. numba_cuda/numba/cuda/simulator/kernel.py +308 -0
  70. numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
  71. numba_cuda/numba/cuda/simulator/reduction.py +15 -0
  72. numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
  73. numba_cuda/numba/cuda/simulator_init.py +17 -0
  74. numba_cuda/numba/cuda/stubs.py +902 -0
  75. numba_cuda/numba/cuda/target.py +440 -0
  76. numba_cuda/numba/cuda/testing.py +202 -0
  77. numba_cuda/numba/cuda/tests/__init__.py +58 -0
  78. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
  79. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
  80. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
  81. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
  88. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
  89. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
  90. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
  91. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
  92. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
  93. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
  94. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
  95. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
  98. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
  100. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
  101. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
  102. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
  103. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
  104. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
  105. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
  106. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
  107. numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
  108. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
  109. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
  110. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
  111. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
  112. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
  113. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
  115. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
  117. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
  118. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
  120. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
  121. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
  123. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
  124. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
  126. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
  127. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
  129. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
  131. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
  132. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
  133. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
  134. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
  136. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
  137. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
  138. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
  139. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
  141. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
  143. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
  144. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
  145. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
  148. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
  149. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
  150. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
  151. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
  152. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
  153. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
  154. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
  155. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
  156. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
  157. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
  158. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
  159. numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
  161. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
  162. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
  163. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
  164. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
  165. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
  166. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
  167. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
  168. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
  169. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
  170. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
  171. numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
  172. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
  173. numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
  174. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
  175. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
  176. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
  177. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  178. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
  179. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
  180. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
  182. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
  183. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
  184. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
  185. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
  186. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
  187. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
  188. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
  192. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
  193. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
  194. numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
  195. numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
  197. numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  198. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  199. numba_cuda/numba/cuda/tests/data/error.cu +7 -0
  200. numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
  201. numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
  202. numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
  203. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
  204. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
  206. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
  207. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
  208. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
  209. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
  210. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
  211. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
  212. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
  213. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
  214. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
  216. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
  217. numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
  218. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
  219. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
  220. numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
  221. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
  222. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
  223. numba_cuda/numba/cuda/types.py +37 -0
  224. numba_cuda/numba/cuda/ufuncs.py +662 -0
  225. numba_cuda/numba/cuda/vector_types.py +209 -0
  226. numba_cuda/numba/cuda/vectorizers.py +252 -0
  227. numba_cuda-0.0.13.dist-info/LICENSE +25 -0
  228. numba_cuda-0.0.13.dist-info/METADATA +69 -0
  229. numba_cuda-0.0.13.dist-info/RECORD +231 -0
  230. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.13.dist-info}/WHEEL +1 -1
  231. numba_cuda-0.0.1.dist-info/METADATA +0 -10
  232. numba_cuda-0.0.1.dist-info/RECORD +0 -5
  233. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,147 @@
1
+ from __future__ import print_function
2
+
3
+ import numpy as np
4
+
5
+ from numba import config, cuda, int32
6
+ from numba.cuda.testing import (unittest, CUDATestCase, skip_on_cudasim,
7
+ skip_unless_cc_60, skip_if_cudadevrt_missing,
8
+ skip_if_mvc_enabled)
9
+
10
+
11
+ @cuda.jit
12
+ def this_grid(A):
13
+ cuda.cg.this_grid()
14
+ A[0] = 1.0
15
+
16
+
17
+ @cuda.jit
18
+ def sync_group(A):
19
+ g = cuda.cg.this_grid()
20
+ g.sync()
21
+ A[0] = 1.0
22
+
23
+
24
+ @cuda.jit
25
+ def no_sync(A):
26
+ A[0] = cuda.grid(1)
27
+
28
+
29
+ def sequential_rows(M):
30
+ # The grid writes rows one at a time. Each thread reads an element from
31
+ # the previous row written by its "opposite" thread.
32
+ #
33
+ # A failure to sync the grid at each row would result in an incorrect
34
+ # result as some threads could run ahead of threads in other blocks, or
35
+ # fail to see the update to the previous row from their opposite thread.
36
+
37
+ col = cuda.grid(1)
38
+ g = cuda.cg.this_grid()
39
+
40
+ rows = M.shape[0]
41
+ cols = M.shape[1]
42
+
43
+ for row in range(1, rows):
44
+ opposite = cols - col - 1
45
+ M[row, col] = M[row - 1, opposite] + 1
46
+ g.sync()
47
+
48
+
49
+ @skip_if_cudadevrt_missing
50
+ @skip_if_mvc_enabled('CG not supported with MVC')
51
+ class TestCudaCooperativeGroups(CUDATestCase):
52
+ @skip_unless_cc_60
53
+ def test_this_grid(self):
54
+ A = np.full(1, fill_value=np.nan)
55
+ this_grid[1, 1](A)
56
+
57
+ # Ensure the kernel executed beyond the call to cuda.this_grid()
58
+ self.assertFalse(np.isnan(A[0]), 'Value was not set')
59
+
60
+ @skip_unless_cc_60
61
+ @skip_on_cudasim("Simulator doesn't differentiate between normal and "
62
+ "cooperative kernels")
63
+ def test_this_grid_is_cooperative(self):
64
+ A = np.full(1, fill_value=np.nan)
65
+ this_grid[1, 1](A)
66
+
67
+ # this_grid should have been determined to be cooperative
68
+ for key, overload in this_grid.overloads.items():
69
+ self.assertTrue(overload.cooperative)
70
+
71
+ @skip_unless_cc_60
72
+ def test_sync_group(self):
73
+ A = np.full(1, fill_value=np.nan)
74
+ sync_group[1, 1](A)
75
+
76
+ # Ensure the kernel executed beyond the call to cuda.sync_group()
77
+ self.assertFalse(np.isnan(A[0]), 'Value was not set')
78
+
79
+ @skip_unless_cc_60
80
+ @skip_on_cudasim("Simulator doesn't differentiate between normal and "
81
+ "cooperative kernels")
82
+ def test_sync_group_is_cooperative(self):
83
+ A = np.full(1, fill_value=np.nan)
84
+ sync_group[1, 1](A)
85
+ # sync_group should have been determined to be cooperative
86
+ for key, overload in sync_group.overloads.items():
87
+ self.assertTrue(overload.cooperative)
88
+
89
+ @skip_on_cudasim("Simulator does not implement linking")
90
+ def test_false_cooperative_doesnt_link_cudadevrt(self):
91
+ """
92
+ We should only mark a kernel as cooperative and link cudadevrt if the
93
+ kernel uses grid sync. Here we ensure that one that doesn't use grid
94
+ synsync isn't marked as such.
95
+ """
96
+ A = np.full(1, fill_value=np.nan)
97
+ no_sync[1, 1](A)
98
+
99
+ for key, overload in no_sync.overloads.items():
100
+ self.assertFalse(overload.cooperative)
101
+ for link in overload._codelibrary._linking_files:
102
+ self.assertNotIn('cudadevrt', link)
103
+
104
+ @skip_unless_cc_60
105
+ def test_sync_at_matrix_row(self):
106
+ if config.ENABLE_CUDASIM:
107
+ # Use a small matrix to compute using a single block in a
108
+ # reasonable amount of time
109
+ shape = (32, 32)
110
+ else:
111
+ shape = (1024, 1024)
112
+ A = np.zeros(shape, dtype=np.int32)
113
+ blockdim = 32
114
+ griddim = A.shape[1] // blockdim
115
+
116
+ sig = (int32[:,::1],)
117
+ c_sequential_rows = cuda.jit(sig)(sequential_rows)
118
+
119
+ overload = c_sequential_rows.overloads[sig]
120
+ mb = overload.max_cooperative_grid_blocks(blockdim)
121
+ if griddim > mb:
122
+ unittest.skip("GPU cannot support enough cooperative grid blocks")
123
+
124
+ c_sequential_rows[griddim, blockdim](A)
125
+
126
+ reference = np.tile(np.arange(shape[0]), (shape[1], 1)).T
127
+ np.testing.assert_equal(A, reference)
128
+
129
+ @skip_unless_cc_60
130
+ def test_max_cooperative_grid_blocks(self):
131
+ # The maximum number of blocks will vary based on the device so we
132
+ # can't test for an expected value, but we can check that the function
133
+ # doesn't error, and that varying the number of dimensions of the block
134
+ # whilst keeping the total number of threads constant doesn't change
135
+ # the maximum to validate some of the logic.
136
+ sig = (int32[:,::1],)
137
+ c_sequential_rows = cuda.jit(sig)(sequential_rows)
138
+ overload = c_sequential_rows.overloads[sig]
139
+ blocks1d = overload.max_cooperative_grid_blocks(256)
140
+ blocks2d = overload.max_cooperative_grid_blocks((16, 16))
141
+ blocks3d = overload.max_cooperative_grid_blocks((16, 4, 4))
142
+ self.assertEqual(blocks1d, blocks2d)
143
+ self.assertEqual(blocks1d, blocks3d)
144
+
145
+
146
+ if __name__ == '__main__':
147
+ unittest.main()
@@ -0,0 +1,435 @@
1
+ import numpy as np
2
+
3
+ from numba import vectorize, guvectorize
4
+ from numba import cuda
5
+ from numba.cuda.cudadrv import driver
6
+ from numba.cuda.testing import unittest, ContextResettingTestCase, ForeignArray
7
+ from numba.cuda.testing import skip_on_cudasim, skip_if_external_memmgr
8
+ from numba.tests.support import linux_only, override_config
9
+ from unittest.mock import call, patch
10
+
11
+
12
+ @skip_on_cudasim('CUDA Array Interface is not supported in the simulator')
13
+ class TestCudaArrayInterface(ContextResettingTestCase):
14
+ def assertPointersEqual(self, a, b):
15
+ if driver.USE_NV_BINDING:
16
+ self.assertEqual(int(a.device_ctypes_pointer),
17
+ int(b.device_ctypes_pointer))
18
+
19
+ def test_as_cuda_array(self):
20
+ h_arr = np.arange(10)
21
+ self.assertFalse(cuda.is_cuda_array(h_arr))
22
+ d_arr = cuda.to_device(h_arr)
23
+ self.assertTrue(cuda.is_cuda_array(d_arr))
24
+ my_arr = ForeignArray(d_arr)
25
+ self.assertTrue(cuda.is_cuda_array(my_arr))
26
+ wrapped = cuda.as_cuda_array(my_arr)
27
+ self.assertTrue(cuda.is_cuda_array(wrapped))
28
+ # Their values must equal the original array
29
+ np.testing.assert_array_equal(wrapped.copy_to_host(), h_arr)
30
+ np.testing.assert_array_equal(d_arr.copy_to_host(), h_arr)
31
+ # d_arr and wrapped must be the same buffer
32
+ self.assertPointersEqual(wrapped, d_arr)
33
+
34
+ def get_stream_value(self, stream):
35
+ if driver.USE_NV_BINDING:
36
+ return int(stream.handle)
37
+ else:
38
+ return stream.handle.value
39
+
40
+ @skip_if_external_memmgr('Ownership not relevant with external memmgr')
41
+ def test_ownership(self):
42
+ # Get the deallocation queue
43
+ ctx = cuda.current_context()
44
+ deallocs = ctx.memory_manager.deallocations
45
+ # Flush all deallocations
46
+ deallocs.clear()
47
+ self.assertEqual(len(deallocs), 0)
48
+ # Make new device array
49
+ d_arr = cuda.to_device(np.arange(100))
50
+ # Convert it
51
+ cvted = cuda.as_cuda_array(d_arr)
52
+ # Drop reference to the original object such that
53
+ # only `cvted` has a reference to it.
54
+ del d_arr
55
+ # There shouldn't be any new deallocations
56
+ self.assertEqual(len(deallocs), 0)
57
+ # Try to access the memory and verify its content
58
+ np.testing.assert_equal(cvted.copy_to_host(), np.arange(100))
59
+ # Drop last reference to the memory
60
+ del cvted
61
+ self.assertEqual(len(deallocs), 1)
62
+ # Flush
63
+ deallocs.clear()
64
+
65
+ def test_kernel_arg(self):
66
+ h_arr = np.arange(10)
67
+ d_arr = cuda.to_device(h_arr)
68
+ my_arr = ForeignArray(d_arr)
69
+ wrapped = cuda.as_cuda_array(my_arr)
70
+
71
+ @cuda.jit
72
+ def mutate(arr, val):
73
+ i = cuda.grid(1)
74
+ if i >= len(arr):
75
+ return
76
+ arr[i] += val
77
+
78
+ val = 7
79
+ mutate.forall(wrapped.size)(wrapped, val)
80
+
81
+ np.testing.assert_array_equal(wrapped.copy_to_host(), h_arr + val)
82
+ np.testing.assert_array_equal(d_arr.copy_to_host(), h_arr + val)
83
+
84
+ def test_ufunc_arg(self):
85
+ @vectorize(['f8(f8, f8)'], target='cuda')
86
+ def vadd(a, b):
87
+ return a + b
88
+
89
+ # Case 1: use custom array as argument
90
+ h_arr = np.random.random(10)
91
+ arr = ForeignArray(cuda.to_device(h_arr))
92
+ val = 6
93
+ out = vadd(arr, val)
94
+ np.testing.assert_array_equal(out.copy_to_host(), h_arr + val)
95
+
96
+ # Case 2: use custom array as return
97
+ out = ForeignArray(cuda.device_array(h_arr.shape))
98
+ returned = vadd(h_arr, val, out=out)
99
+ np.testing.assert_array_equal(returned.copy_to_host(), h_arr + val)
100
+
101
+ def test_gufunc_arg(self):
102
+ @guvectorize(['(f8, f8, f8[:])'], '(),()->()', target='cuda')
103
+ def vadd(inp, val, out):
104
+ out[0] = inp + val
105
+
106
+ # Case 1: use custom array as argument
107
+ h_arr = np.random.random(10)
108
+ arr = ForeignArray(cuda.to_device(h_arr))
109
+ val = np.float64(7)
110
+ out = vadd(arr, val)
111
+ np.testing.assert_array_equal(out.copy_to_host(), h_arr + val)
112
+
113
+ # Case 2: use custom array as return
114
+ out = ForeignArray(cuda.device_array(h_arr.shape))
115
+ returned = vadd(h_arr, val, out=out)
116
+ np.testing.assert_array_equal(returned.copy_to_host(), h_arr + val)
117
+ self.assertPointersEqual(returned, out._arr)
118
+
119
+ def test_array_views(self):
120
+ """Views created via array interface support:
121
+ - Strided slices
122
+ - Strided slices
123
+ """
124
+ h_arr = np.random.random(10)
125
+ c_arr = cuda.to_device(h_arr)
126
+
127
+ arr = cuda.as_cuda_array(c_arr)
128
+
129
+ # __getitem__ interface accesses expected data
130
+
131
+ # Direct views
132
+ np.testing.assert_array_equal(arr.copy_to_host(), h_arr)
133
+ np.testing.assert_array_equal(arr[:].copy_to_host(), h_arr)
134
+
135
+ # Slicing
136
+ np.testing.assert_array_equal(arr[:5].copy_to_host(), h_arr[:5])
137
+
138
+ # Strided view
139
+ np.testing.assert_array_equal(arr[::2].copy_to_host(), h_arr[::2])
140
+
141
+ # View of strided array
142
+ arr_strided = cuda.as_cuda_array(c_arr[::2])
143
+ np.testing.assert_array_equal(arr_strided.copy_to_host(), h_arr[::2])
144
+
145
+ # A strided-view-of-array and view-of-strided-array have the same
146
+ # shape, strides, itemsize, and alloc_size
147
+ self.assertEqual(arr[::2].shape, arr_strided.shape)
148
+ self.assertEqual(arr[::2].strides, arr_strided.strides)
149
+ self.assertEqual(arr[::2].dtype.itemsize, arr_strided.dtype.itemsize)
150
+ self.assertEqual(arr[::2].alloc_size, arr_strided.alloc_size)
151
+ self.assertEqual(arr[::2].nbytes,
152
+ arr_strided.size * arr_strided.dtype.itemsize)
153
+
154
+ # __setitem__ interface propagates into external array
155
+
156
+ # Writes to a slice
157
+ arr[:5] = np.pi
158
+ np.testing.assert_array_equal(
159
+ c_arr.copy_to_host(),
160
+ np.concatenate((np.full(5, np.pi), h_arr[5:]))
161
+ )
162
+
163
+ # Writes to a slice from a view
164
+ arr[:5] = arr[5:]
165
+ np.testing.assert_array_equal(
166
+ c_arr.copy_to_host(),
167
+ np.concatenate((h_arr[5:], h_arr[5:]))
168
+ )
169
+
170
+ # Writes through a view
171
+ arr[:] = cuda.to_device(h_arr)
172
+ np.testing.assert_array_equal(c_arr.copy_to_host(), h_arr)
173
+
174
+ # Writes to a strided slice
175
+ arr[::2] = np.pi
176
+ np.testing.assert_array_equal(
177
+ c_arr.copy_to_host()[::2],
178
+ np.full(5, np.pi),
179
+ )
180
+ np.testing.assert_array_equal(
181
+ c_arr.copy_to_host()[1::2],
182
+ h_arr[1::2]
183
+ )
184
+
185
+ def test_negative_strided_issue(self):
186
+ # issue #3705
187
+ h_arr = np.random.random(10)
188
+ c_arr = cuda.to_device(h_arr)
189
+
190
+ def base_offset(orig, sliced):
191
+ return sliced['data'][0] - orig['data'][0]
192
+
193
+ h_ai = h_arr.__array_interface__
194
+ c_ai = c_arr.__cuda_array_interface__
195
+
196
+ h_ai_sliced = h_arr[::-1].__array_interface__
197
+ c_ai_sliced = c_arr[::-1].__cuda_array_interface__
198
+
199
+ # Check data offset is correct
200
+ self.assertEqual(
201
+ base_offset(h_ai, h_ai_sliced),
202
+ base_offset(c_ai, c_ai_sliced),
203
+ )
204
+ # Check shape and strides are correct
205
+ self.assertEqual(h_ai_sliced['shape'], c_ai_sliced['shape'])
206
+ self.assertEqual(h_ai_sliced['strides'], c_ai_sliced['strides'])
207
+
208
+ def test_negative_strided_copy_to_host(self):
209
+ # issue #3705
210
+ h_arr = np.random.random(10)
211
+ c_arr = cuda.to_device(h_arr)
212
+ sliced = c_arr[::-1]
213
+ with self.assertRaises(NotImplementedError) as raises:
214
+ sliced.copy_to_host()
215
+ expected_msg = 'D->H copy not implemented for negative strides'
216
+ self.assertIn(expected_msg, str(raises.exception))
217
+
218
+ def test_masked_array(self):
219
+ h_arr = np.random.random(10)
220
+ h_mask = np.random.randint(2, size=10, dtype='bool')
221
+ c_arr = cuda.to_device(h_arr)
222
+ c_mask = cuda.to_device(h_mask)
223
+
224
+ # Manually create a masked CUDA Array Interface dictionary
225
+ masked_cuda_array_interface = c_arr.__cuda_array_interface__.copy()
226
+ masked_cuda_array_interface['mask'] = c_mask
227
+
228
+ with self.assertRaises(NotImplementedError) as raises:
229
+ cuda.from_cuda_array_interface(masked_cuda_array_interface)
230
+ expected_msg = 'Masked arrays are not supported'
231
+ self.assertIn(expected_msg, str(raises.exception))
232
+
233
+ def test_zero_size_array(self):
234
+ # for #4175
235
+ c_arr = cuda.device_array(0)
236
+ self.assertEqual(c_arr.__cuda_array_interface__['data'][0], 0)
237
+
238
+ @cuda.jit
239
+ def add_one(arr):
240
+ x = cuda.grid(1)
241
+ N = arr.shape[0]
242
+ if x < N:
243
+ arr[x] += 1
244
+
245
+ d_arr = ForeignArray(c_arr)
246
+ add_one[1, 10](d_arr) # this should pass
247
+
248
+ def test_strides(self):
249
+ # for #4175
250
+ # First, test C-contiguous array
251
+ c_arr = cuda.device_array((2, 3, 4))
252
+ self.assertEqual(c_arr.__cuda_array_interface__['strides'], None)
253
+
254
+ # Second, test non C-contiguous array
255
+ c_arr = c_arr[:, 1, :]
256
+ self.assertNotEqual(c_arr.__cuda_array_interface__['strides'], None)
257
+
258
+ def test_consuming_strides(self):
259
+ hostarray = np.arange(10).reshape(2, 5)
260
+ devarray = cuda.to_device(hostarray)
261
+ face = devarray.__cuda_array_interface__
262
+ self.assertIsNone(face['strides'])
263
+ got = cuda.from_cuda_array_interface(face).copy_to_host()
264
+ np.testing.assert_array_equal(got, hostarray)
265
+ self.assertTrue(got.flags['C_CONTIGUOUS'])
266
+ # Try non-NULL strides
267
+ face['strides'] = hostarray.strides
268
+ self.assertIsNotNone(face['strides'])
269
+ got = cuda.from_cuda_array_interface(face).copy_to_host()
270
+ np.testing.assert_array_equal(got, hostarray)
271
+ self.assertTrue(got.flags['C_CONTIGUOUS'])
272
+
273
+ def test_produce_no_stream(self):
274
+ c_arr = cuda.device_array(10)
275
+ self.assertIsNone(c_arr.__cuda_array_interface__['stream'])
276
+
277
+ mapped_arr = cuda.mapped_array(10)
278
+ self.assertIsNone(mapped_arr.__cuda_array_interface__['stream'])
279
+
280
+ @linux_only
281
+ def test_produce_managed_no_stream(self):
282
+ managed_arr = cuda.managed_array(10)
283
+ self.assertIsNone(managed_arr.__cuda_array_interface__['stream'])
284
+
285
+ def test_produce_stream(self):
286
+ s = cuda.stream()
287
+ c_arr = cuda.device_array(10, stream=s)
288
+ cai_stream = c_arr.__cuda_array_interface__['stream']
289
+ stream_value = self.get_stream_value(s)
290
+ self.assertEqual(stream_value, cai_stream)
291
+
292
+ s = cuda.stream()
293
+ mapped_arr = cuda.mapped_array(10, stream=s)
294
+ cai_stream = mapped_arr.__cuda_array_interface__['stream']
295
+ stream_value = self.get_stream_value(s)
296
+ self.assertEqual(stream_value, cai_stream)
297
+
298
+ @linux_only
299
+ def test_produce_managed_stream(self):
300
+ s = cuda.stream()
301
+ managed_arr = cuda.managed_array(10, stream=s)
302
+ cai_stream = managed_arr.__cuda_array_interface__['stream']
303
+ stream_value = self.get_stream_value(s)
304
+ self.assertEqual(stream_value, cai_stream)
305
+
306
+ def test_consume_no_stream(self):
307
+ # Create a foreign array with no stream
308
+ f_arr = ForeignArray(cuda.device_array(10))
309
+
310
+ # Ensure that the imported array has no default stream
311
+ c_arr = cuda.as_cuda_array(f_arr)
312
+ self.assertEqual(c_arr.stream, 0)
313
+
314
+ def test_consume_stream(self):
315
+ # Create a foreign array with a stream
316
+ s = cuda.stream()
317
+ f_arr = ForeignArray(cuda.device_array(10, stream=s))
318
+
319
+ # Ensure that an imported array has the stream as its default stream
320
+ c_arr = cuda.as_cuda_array(f_arr)
321
+ self.assertTrue(c_arr.stream.external)
322
+ stream_value = self.get_stream_value(s)
323
+ imported_stream_value = self.get_stream_value(c_arr.stream)
324
+ self.assertEqual(stream_value, imported_stream_value)
325
+
326
+ def test_consume_no_sync(self):
327
+ # Create a foreign array with no stream
328
+ f_arr = ForeignArray(cuda.device_array(10))
329
+
330
+ with patch.object(cuda.cudadrv.driver.Stream, 'synchronize',
331
+ return_value=None) as mock_sync:
332
+ cuda.as_cuda_array(f_arr)
333
+
334
+ # Ensure the synchronize method of a stream was not called
335
+ mock_sync.assert_not_called()
336
+
337
+ def test_consume_sync(self):
338
+ # Create a foreign array with a stream
339
+ s = cuda.stream()
340
+ f_arr = ForeignArray(cuda.device_array(10, stream=s))
341
+
342
+ with patch.object(cuda.cudadrv.driver.Stream, 'synchronize',
343
+ return_value=None) as mock_sync:
344
+ cuda.as_cuda_array(f_arr)
345
+
346
+ # Ensure the synchronize method of a stream was called
347
+ mock_sync.assert_called_once_with()
348
+
349
+ def test_consume_sync_disabled(self):
350
+ # Create a foreign array with a stream
351
+ s = cuda.stream()
352
+ f_arr = ForeignArray(cuda.device_array(10, stream=s))
353
+
354
+ # Set sync to false before testing. The test suite should generally be
355
+ # run with sync enabled, but stash the old value just in case it is
356
+ # not.
357
+ with override_config('CUDA_ARRAY_INTERFACE_SYNC', False):
358
+ with patch.object(cuda.cudadrv.driver.Stream, 'synchronize',
359
+ return_value=None) as mock_sync:
360
+ cuda.as_cuda_array(f_arr)
361
+
362
+ # Ensure the synchronize method of a stream was not called
363
+ mock_sync.assert_not_called()
364
+
365
+ def test_launch_no_sync(self):
366
+ # Create a foreign array with no stream
367
+ f_arr = ForeignArray(cuda.device_array(10))
368
+
369
+ @cuda.jit
370
+ def f(x):
371
+ pass
372
+
373
+ with patch.object(cuda.cudadrv.driver.Stream, 'synchronize',
374
+ return_value=None) as mock_sync:
375
+ f[1, 1](f_arr)
376
+
377
+ # Ensure the synchronize method of a stream was not called
378
+ mock_sync.assert_not_called()
379
+
380
+ def test_launch_sync(self):
381
+ # Create a foreign array with a stream
382
+ s = cuda.stream()
383
+ f_arr = ForeignArray(cuda.device_array(10, stream=s))
384
+
385
+ @cuda.jit
386
+ def f(x):
387
+ pass
388
+
389
+ with patch.object(cuda.cudadrv.driver.Stream, 'synchronize',
390
+ return_value=None) as mock_sync:
391
+ f[1, 1](f_arr)
392
+
393
+ # Ensure the synchronize method of a stream was called
394
+ mock_sync.assert_called_once_with()
395
+
396
+ def test_launch_sync_two_streams(self):
397
+ # Create two foreign arrays with streams
398
+ s1 = cuda.stream()
399
+ s2 = cuda.stream()
400
+ f_arr1 = ForeignArray(cuda.device_array(10, stream=s1))
401
+ f_arr2 = ForeignArray(cuda.device_array(10, stream=s2))
402
+
403
+ @cuda.jit
404
+ def f(x, y):
405
+ pass
406
+
407
+ with patch.object(cuda.cudadrv.driver.Stream, 'synchronize',
408
+ return_value=None) as mock_sync:
409
+ f[1, 1](f_arr1, f_arr2)
410
+
411
+ # Ensure that synchronize was called twice
412
+ mock_sync.assert_has_calls([call(), call()])
413
+
414
+ def test_launch_sync_disabled(self):
415
+ # Create two foreign arrays with streams
416
+ s1 = cuda.stream()
417
+ s2 = cuda.stream()
418
+ f_arr1 = ForeignArray(cuda.device_array(10, stream=s1))
419
+ f_arr2 = ForeignArray(cuda.device_array(10, stream=s2))
420
+
421
+ with override_config('CUDA_ARRAY_INTERFACE_SYNC', False):
422
+ @cuda.jit
423
+ def f(x, y):
424
+ pass
425
+
426
+ with patch.object(cuda.cudadrv.driver.Stream, 'synchronize',
427
+ return_value=None) as mock_sync:
428
+ f[1, 1](f_arr1, f_arr2)
429
+
430
+ # Ensure that synchronize was not called
431
+ mock_sync.assert_not_called()
432
+
433
+
434
+ if __name__ == "__main__":
435
+ unittest.main()
@@ -0,0 +1,90 @@
1
+ from numba import cuda
2
+ import numpy as np
3
+ from numba.cuda.testing import CUDATestCase
4
+ from numba.tests.support import override_config
5
+ import unittest
6
+
7
+
8
+ class TestCudaJitNoTypes(CUDATestCase):
9
+ """
10
+ Tests the jit decorator with no types provided.
11
+ """
12
+
13
+ def test_device_array(self):
14
+ @cuda.jit
15
+ def foo(x, y):
16
+ i = cuda.grid(1)
17
+ y[i] = x[i]
18
+
19
+ x = np.arange(10)
20
+ y = np.empty_like(x)
21
+
22
+ dx = cuda.to_device(x)
23
+ dy = cuda.to_device(y)
24
+
25
+ foo[10, 1](dx, dy)
26
+
27
+ dy.copy_to_host(y)
28
+
29
+ self.assertTrue(np.all(x == y))
30
+
31
+ def test_device_jit(self):
32
+ @cuda.jit(device=True)
33
+ def mapper(args):
34
+ a, b, c = args
35
+ return a + b + c
36
+
37
+ @cuda.jit(device=True)
38
+ def reducer(a, b):
39
+ return a + b
40
+
41
+ @cuda.jit
42
+ def driver(A, B):
43
+ i = cuda.grid(1)
44
+ if i < B.size:
45
+ args = A[i], A[i] + B[i], B[i]
46
+ B[i] = reducer(mapper(args), 1)
47
+
48
+ A = np.arange(100, dtype=np.float32)
49
+ B = np.arange(100, dtype=np.float32)
50
+
51
+ Acopy = A.copy()
52
+ Bcopy = B.copy()
53
+
54
+ driver[1, 100](A, B)
55
+
56
+ np.testing.assert_allclose(Acopy + Acopy + Bcopy + Bcopy + 1, B)
57
+
58
+ def test_device_jit_2(self):
59
+ @cuda.jit(device=True)
60
+ def inner(arg):
61
+ return arg + 1
62
+
63
+ @cuda.jit
64
+ def outer(argin, argout):
65
+ argout[0] = inner(argin[0]) + inner(2)
66
+
67
+ a = np.zeros(1)
68
+ b = np.zeros(1)
69
+
70
+ stream = cuda.stream()
71
+ d_a = cuda.to_device(a, stream)
72
+ d_b = cuda.to_device(b, stream)
73
+
74
+ outer[1, 1, stream](d_a, d_b)
75
+
76
+ d_b.copy_to_host(b, stream)
77
+
78
+ self.assertEqual(b[0], (a[0] + 1) + (2 + 1))
79
+
80
+ def test_jit_debug_simulator(self):
81
+ # Ensure that the jit decorator accepts the debug kwarg when the
82
+ # simulator is in use - see Issue #6615.
83
+ with override_config('ENABLE_CUDASIM', 1):
84
+ @cuda.jit(debug=True)
85
+ def f(x):
86
+ pass
87
+
88
+
89
+ if __name__ == '__main__':
90
+ unittest.main()