numba-cuda 0.0.1__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. _numba_cuda_redirector.pth +1 -0
  2. _numba_cuda_redirector.py +74 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +5 -0
  5. numba_cuda/_version.py +19 -0
  6. numba_cuda/numba/cuda/__init__.py +22 -0
  7. numba_cuda/numba/cuda/api.py +526 -0
  8. numba_cuda/numba/cuda/api_util.py +30 -0
  9. numba_cuda/numba/cuda/args.py +77 -0
  10. numba_cuda/numba/cuda/cg.py +62 -0
  11. numba_cuda/numba/cuda/codegen.py +378 -0
  12. numba_cuda/numba/cuda/compiler.py +422 -0
  13. numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
  14. numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
  15. numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
  16. numba_cuda/numba/cuda/cuda_paths.py +258 -0
  17. numba_cuda/numba/cuda/cudadecl.py +806 -0
  18. numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
  19. numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
  20. numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
  21. numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
  22. numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
  23. numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
  24. numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
  25. numba_cuda/numba/cuda/cudadrv/error.py +36 -0
  26. numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
  27. numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
  28. numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
  29. numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
  30. numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
  31. numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
  32. numba_cuda/numba/cuda/cudaimpl.py +1055 -0
  33. numba_cuda/numba/cuda/cudamath.py +140 -0
  34. numba_cuda/numba/cuda/decorators.py +189 -0
  35. numba_cuda/numba/cuda/descriptor.py +33 -0
  36. numba_cuda/numba/cuda/device_init.py +89 -0
  37. numba_cuda/numba/cuda/deviceufunc.py +908 -0
  38. numba_cuda/numba/cuda/dispatcher.py +1057 -0
  39. numba_cuda/numba/cuda/errors.py +59 -0
  40. numba_cuda/numba/cuda/extending.py +7 -0
  41. numba_cuda/numba/cuda/initialize.py +13 -0
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
  43. numba_cuda/numba/cuda/intrinsics.py +198 -0
  44. numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  45. numba_cuda/numba/cuda/kernels/reduction.py +262 -0
  46. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  47. numba_cuda/numba/cuda/libdevice.py +3382 -0
  48. numba_cuda/numba/cuda/libdevicedecl.py +17 -0
  49. numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
  50. numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
  51. numba_cuda/numba/cuda/mathimpl.py +448 -0
  52. numba_cuda/numba/cuda/models.py +48 -0
  53. numba_cuda/numba/cuda/nvvmutils.py +235 -0
  54. numba_cuda/numba/cuda/printimpl.py +86 -0
  55. numba_cuda/numba/cuda/random.py +292 -0
  56. numba_cuda/numba/cuda/simulator/__init__.py +38 -0
  57. numba_cuda/numba/cuda/simulator/api.py +110 -0
  58. numba_cuda/numba/cuda/simulator/compiler.py +9 -0
  59. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
  60. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
  61. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
  62. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
  63. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
  64. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
  65. numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
  66. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
  67. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
  68. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
  69. numba_cuda/numba/cuda/simulator/kernel.py +308 -0
  70. numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
  71. numba_cuda/numba/cuda/simulator/reduction.py +15 -0
  72. numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
  73. numba_cuda/numba/cuda/simulator_init.py +17 -0
  74. numba_cuda/numba/cuda/stubs.py +902 -0
  75. numba_cuda/numba/cuda/target.py +440 -0
  76. numba_cuda/numba/cuda/testing.py +202 -0
  77. numba_cuda/numba/cuda/tests/__init__.py +58 -0
  78. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
  79. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
  80. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
  81. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
  88. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
  89. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
  90. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
  91. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
  92. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
  93. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
  94. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
  95. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
  98. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
  100. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
  101. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
  102. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
  103. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
  104. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
  105. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
  106. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
  107. numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
  108. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
  109. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
  110. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
  111. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
  112. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
  113. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
  115. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
  117. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
  118. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
  120. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
  121. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
  123. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
  124. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
  126. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
  127. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
  129. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
  131. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
  132. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
  133. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
  134. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
  136. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
  137. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
  138. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
  139. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
  141. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
  143. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
  144. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
  145. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
  148. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
  149. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
  150. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
  151. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
  152. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
  153. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
  154. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
  155. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
  156. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
  157. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
  158. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
  159. numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
  161. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
  162. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
  163. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
  164. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
  165. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
  166. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
  167. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
  168. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
  169. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
  170. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
  171. numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
  172. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
  173. numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
  174. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
  175. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
  176. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
  177. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  178. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
  179. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
  180. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
  182. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
  183. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
  184. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
  185. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
  186. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
  187. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
  188. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
  192. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
  193. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
  194. numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
  195. numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
  197. numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  198. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  199. numba_cuda/numba/cuda/tests/data/error.cu +7 -0
  200. numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
  201. numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
  202. numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
  203. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
  204. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
  206. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
  207. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
  208. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
  209. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
  210. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
  211. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
  212. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
  213. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
  214. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
  216. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
  217. numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
  218. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
  219. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
  220. numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
  221. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
  222. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
  223. numba_cuda/numba/cuda/types.py +37 -0
  224. numba_cuda/numba/cuda/ufuncs.py +662 -0
  225. numba_cuda/numba/cuda/vector_types.py +209 -0
  226. numba_cuda/numba/cuda/vectorizers.py +252 -0
  227. numba_cuda-0.0.12.dist-info/LICENSE +25 -0
  228. numba_cuda-0.0.12.dist-info/METADATA +68 -0
  229. numba_cuda-0.0.12.dist-info/RECORD +231 -0
  230. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
  231. numba_cuda-0.0.1.dist-info/METADATA +0 -10
  232. numba_cuda-0.0.1.dist-info/RECORD +0 -5
  233. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,547 @@
1
+ import itertools
2
+ import numpy as np
3
+ from numba.cuda.cudadrv import devicearray
4
+ from numba import cuda
5
+ from numba.cuda.testing import unittest, CUDATestCase
6
+ from numba.cuda.testing import skip_on_cudasim
7
+
8
+
9
+ class TestCudaNDArray(CUDATestCase):
10
+ def test_device_array_interface(self):
11
+ dary = cuda.device_array(shape=100)
12
+ devicearray.verify_cuda_ndarray_interface(dary)
13
+
14
+ ary = np.empty(100)
15
+ dary = cuda.to_device(ary)
16
+ devicearray.verify_cuda_ndarray_interface(dary)
17
+
18
+ ary = np.asarray(1.234)
19
+ dary = cuda.to_device(ary)
20
+ self.assertEqual(dary.ndim, 0)
21
+ devicearray.verify_cuda_ndarray_interface(dary)
22
+
23
+ def test_device_array_from_readonly(self):
24
+ ary = np.arange(100, dtype=np.float32)
25
+ # Make the array readonly
26
+ ary.flags.writeable = False
27
+ self.assertFalse(ary.flags.writeable)
28
+ # Ensure that we can copy the readonly array
29
+ dary = cuda.to_device(ary)
30
+ retr = dary.copy_to_host()
31
+ np.testing.assert_array_equal(retr, ary)
32
+
33
+ def test_devicearray_dtype(self):
34
+ dary = cuda.device_array(shape=(100,), dtype="f4")
35
+ self.assertEqual(dary.dtype, np.dtype("f4"))
36
+
37
+ def test_devicearray_no_copy(self):
38
+ array = np.arange(100, dtype=np.float32)
39
+ cuda.to_device(array, copy=False)
40
+
41
+ def test_devicearray_shape(self):
42
+ ary = np.arange(2 * 3 * 4).reshape(2, 3, 4)
43
+ dary = cuda.to_device(ary)
44
+ self.assertEqual(ary.shape, dary.shape)
45
+ self.assertEqual(ary.shape[1:], dary.shape[1:])
46
+
47
+ def test_devicearray(self):
48
+ array = np.arange(100, dtype=np.int32)
49
+ original = array.copy()
50
+ gpumem = cuda.to_device(array)
51
+ array[:] = 0
52
+ gpumem.copy_to_host(array)
53
+
54
+ np.testing.assert_array_equal(array, original)
55
+
56
+ def test_stream_bind(self):
57
+ stream = cuda.stream()
58
+ with stream.auto_synchronize():
59
+ arr = cuda.device_array(
60
+ (3, 3),
61
+ dtype=np.float64,
62
+ stream=stream)
63
+ self.assertEqual(arr.bind(stream).stream, stream)
64
+ self.assertEqual(arr.stream, stream)
65
+
66
+ def test_len_1d(self):
67
+ ary = np.empty((3,))
68
+ dary = cuda.device_array(3)
69
+ self.assertEqual(len(ary), len(dary))
70
+
71
+ def test_len_2d(self):
72
+ ary = np.empty((3, 5))
73
+ dary = cuda.device_array((3, 5))
74
+ self.assertEqual(len(ary), len(dary))
75
+
76
+ def test_len_3d(self):
77
+ ary = np.empty((3, 5, 7))
78
+ dary = cuda.device_array((3, 5, 7))
79
+ self.assertEqual(len(ary), len(dary))
80
+
81
+ def test_devicearray_partition(self):
82
+ N = 100
83
+ array = np.arange(N, dtype=np.int32)
84
+ original = array.copy()
85
+ gpumem = cuda.to_device(array)
86
+ left, right = gpumem.split(N // 2)
87
+
88
+ array[:] = 0
89
+
90
+ self.assertTrue(np.all(array == 0))
91
+
92
+ right.copy_to_host(array[N // 2:])
93
+ left.copy_to_host(array[:N // 2])
94
+
95
+ self.assertTrue(np.all(array == original))
96
+
97
+ def test_devicearray_replace(self):
98
+ N = 100
99
+ array = np.arange(N, dtype=np.int32)
100
+ original = array.copy()
101
+ gpumem = cuda.to_device(array)
102
+ cuda.to_device(array * 2, to=gpumem)
103
+ gpumem.copy_to_host(array)
104
+ np.testing.assert_array_equal(array, original * 2)
105
+
106
+ @skip_on_cudasim('This works in the simulator')
107
+ def test_devicearray_transpose_wrongdim(self):
108
+ gpumem = cuda.to_device(np.array(np.arange(12)).reshape(3, 4, 1))
109
+
110
+ with self.assertRaises(NotImplementedError) as e:
111
+ np.transpose(gpumem)
112
+
113
+ self.assertEqual(
114
+ "transposing a non-2D DeviceNDArray isn't supported",
115
+ str(e.exception))
116
+
117
+ def test_devicearray_transpose_identity(self):
118
+ # any-shape identities should work
119
+ original = np.array(np.arange(24)).reshape(3, 4, 2)
120
+ array = np.transpose(cuda.to_device(original),
121
+ axes=(0, 1, 2)).copy_to_host()
122
+ self.assertTrue(np.all(array == original))
123
+
124
+ def test_devicearray_transpose_duplicatedaxis(self):
125
+ gpumem = cuda.to_device(np.array(np.arange(12)).reshape(3, 4))
126
+
127
+ with self.assertRaises(ValueError) as e:
128
+ np.transpose(gpumem, axes=(0, 0))
129
+
130
+ self.assertIn(
131
+ str(e.exception),
132
+ container=[
133
+ 'invalid axes list (0, 0)', # GPU
134
+ 'repeated axis in transpose', # sim
135
+ ])
136
+
137
+ def test_devicearray_transpose_wrongaxis(self):
138
+ gpumem = cuda.to_device(np.array(np.arange(12)).reshape(3, 4))
139
+
140
+ with self.assertRaises(ValueError) as e:
141
+ np.transpose(gpumem, axes=(0, 2))
142
+
143
+ self.assertIn(
144
+ str(e.exception),
145
+ container=[
146
+ 'invalid axes list (0, 2)', # GPU
147
+ 'invalid axis for this array',
148
+ 'axis 2 is out of bounds for array of dimension 2', # sim
149
+ ])
150
+
151
+ def test_devicearray_view_ok(self):
152
+ original = np.array(np.arange(12), dtype="i2").reshape(3, 4)
153
+ array = cuda.to_device(original)
154
+ for dtype in ("i4", "u4", "i8", "f8"):
155
+ with self.subTest(dtype=dtype):
156
+ np.testing.assert_array_equal(
157
+ array.view(dtype).copy_to_host(),
158
+ original.view(dtype)
159
+ )
160
+
161
+ def test_devicearray_view_ok_not_c_contig(self):
162
+ original = np.array(np.arange(32), dtype="i2").reshape(4, 8)
163
+ array = cuda.to_device(original)[:, ::2]
164
+ original = original[:, ::2]
165
+ np.testing.assert_array_equal(
166
+ array.view("u2").copy_to_host(),
167
+ original.view("u2")
168
+ )
169
+
170
+ def test_devicearray_view_bad_not_c_contig(self):
171
+ original = np.array(np.arange(32), dtype="i2").reshape(4, 8)
172
+ array = cuda.to_device(original)[:, ::2]
173
+ with self.assertRaises(ValueError) as e:
174
+ array.view("i4")
175
+
176
+ msg = str(e.exception)
177
+ self.assertIn('To change to a dtype of a different size,', msg)
178
+
179
+ contiguous_pre_np123 = 'the array must be C-contiguous' in msg
180
+ contiguous_post_np123 = 'the last axis must be contiguous' in msg
181
+ self.assertTrue(contiguous_pre_np123 or contiguous_post_np123,
182
+ 'Expected message to mention contiguity')
183
+
184
+ def test_devicearray_view_bad_itemsize(self):
185
+ original = np.array(np.arange(12), dtype="i2").reshape(4, 3)
186
+ array = cuda.to_device(original)
187
+ with self.assertRaises(ValueError) as e:
188
+ array.view("i4")
189
+ self.assertEqual(
190
+ "When changing to a larger dtype,"
191
+ " its size must be a divisor of the total size in bytes"
192
+ " of the last axis of the array.",
193
+ str(e.exception))
194
+
195
+ def test_devicearray_transpose_ok(self):
196
+ original = np.array(np.arange(12)).reshape(3, 4)
197
+ array = np.transpose(cuda.to_device(original)).copy_to_host()
198
+ self.assertTrue(np.all(array == original.T))
199
+
200
+ def test_devicearray_transpose_T(self):
201
+ original = np.array(np.arange(12)).reshape(3, 4)
202
+ array = cuda.to_device(original).T.copy_to_host()
203
+ self.assertTrue(np.all(array == original.T))
204
+
205
+ def test_devicearray_contiguous_slice(self):
206
+ # memcpys are dumb ranges of bytes, so trying to
207
+ # copy to a non-contiguous range shouldn't work!
208
+ a = np.arange(25).reshape(5, 5, order='F')
209
+ s = np.full(fill_value=5, shape=(5,))
210
+
211
+ d = cuda.to_device(a)
212
+ a[2] = s
213
+
214
+ # d is in F-order (not C-order), so d[2] is not contiguous
215
+ # (40-byte strides). This means we can't memcpy to it!
216
+ with self.assertRaises(ValueError) as e:
217
+ d[2].copy_to_device(s)
218
+ self.assertEqual(
219
+ devicearray.errmsg_contiguous_buffer,
220
+ str(e.exception))
221
+
222
+ # if d[2].copy_to_device(s), then this would pass:
223
+ # self.assertTrue((a == d.copy_to_host()).all())
224
+
225
+ def _test_devicearray_contiguous_host_copy(self, a_c, a_f):
226
+ """
227
+ Checks host->device memcpys
228
+ """
229
+ self.assertTrue(a_c.flags.c_contiguous)
230
+ self.assertTrue(a_f.flags.f_contiguous)
231
+
232
+ for original, copy in [
233
+ (a_f, a_f),
234
+ (a_f, a_c),
235
+ (a_c, a_f),
236
+ (a_c, a_c),
237
+ ]:
238
+ msg = '%s => %s' % (
239
+ 'C' if original.flags.c_contiguous else 'F',
240
+ 'C' if copy.flags.c_contiguous else 'F',
241
+ )
242
+
243
+ d = cuda.to_device(original)
244
+ d.copy_to_device(copy)
245
+ self.assertTrue(np.all(d.copy_to_host() == a_c), msg=msg)
246
+ self.assertTrue(np.all(d.copy_to_host() == a_f), msg=msg)
247
+
248
+ def test_devicearray_contiguous_copy_host_3d(self):
249
+ a_c = np.arange(5 * 5 * 5).reshape(5, 5, 5)
250
+ a_f = np.array(a_c, order='F')
251
+ self._test_devicearray_contiguous_host_copy(a_c, a_f)
252
+
253
+ def test_devicearray_contiguous_copy_host_1d(self):
254
+ a_c = np.arange(5)
255
+ a_f = np.array(a_c, order='F')
256
+ self._test_devicearray_contiguous_host_copy(a_c, a_f)
257
+
258
+ def test_devicearray_contiguous_copy_device(self):
259
+ a_c = np.arange(5 * 5 * 5).reshape(5, 5, 5)
260
+ a_f = np.array(a_c, order='F')
261
+ self.assertTrue(a_c.flags.c_contiguous)
262
+ self.assertTrue(a_f.flags.f_contiguous)
263
+
264
+ d = cuda.to_device(a_c)
265
+
266
+ with self.assertRaises(ValueError) as e:
267
+ d.copy_to_device(cuda.to_device(a_f))
268
+ self.assertEqual(
269
+ "incompatible strides: {} vs. {}".format(a_c.strides, a_f.strides),
270
+ str(e.exception))
271
+
272
+ d.copy_to_device(cuda.to_device(a_c))
273
+ self.assertTrue(np.all(d.copy_to_host() == a_c))
274
+
275
+ d = cuda.to_device(a_f)
276
+
277
+ with self.assertRaises(ValueError) as e:
278
+ d.copy_to_device(cuda.to_device(a_c))
279
+ self.assertEqual(
280
+ "incompatible strides: {} vs. {}".format(a_f.strides, a_c.strides),
281
+ str(e.exception))
282
+
283
+ d.copy_to_device(cuda.to_device(a_f))
284
+ self.assertTrue(np.all(d.copy_to_host() == a_f))
285
+
286
+ def test_devicearray_broadcast_host_copy(self):
287
+ broadsize = 4
288
+ coreshape = (2, 3)
289
+ coresize = np.prod(coreshape)
290
+ core_c = np.arange(coresize).reshape(coreshape, order='C')
291
+ core_f = np.arange(coresize).reshape(coreshape, order='F')
292
+ for dim in range(len(coreshape)):
293
+ newindex = (slice(None),) * dim + (np.newaxis,)
294
+ broadshape = coreshape[:dim] + (broadsize,) + coreshape[dim:]
295
+ broad_c = np.broadcast_to(core_c[newindex], broadshape)
296
+ broad_f = np.broadcast_to(core_f[newindex], broadshape)
297
+ dbroad_c = cuda.to_device(broad_c)
298
+ dbroad_f = cuda.to_device(broad_f)
299
+ np.testing.assert_array_equal(dbroad_c.copy_to_host(), broad_c)
300
+ np.testing.assert_array_equal(dbroad_f.copy_to_host(), broad_f)
301
+ # Also test copying across different core orderings
302
+ dbroad_c.copy_to_device(broad_f)
303
+ dbroad_f.copy_to_device(broad_c)
304
+ np.testing.assert_array_equal(dbroad_c.copy_to_host(), broad_f)
305
+ np.testing.assert_array_equal(dbroad_f.copy_to_host(), broad_c)
306
+
307
+ def test_devicearray_contiguous_host_strided(self):
308
+ a_c = np.arange(10)
309
+ d = cuda.to_device(a_c)
310
+ arr = np.arange(20)[::2]
311
+ d.copy_to_device(arr)
312
+ np.testing.assert_array_equal(d.copy_to_host(), arr)
313
+
314
+ def test_devicearray_contiguous_device_strided(self):
315
+ d = cuda.to_device(np.arange(20))
316
+ arr = np.arange(20)
317
+
318
+ with self.assertRaises(ValueError) as e:
319
+ d.copy_to_device(cuda.to_device(arr)[::2])
320
+ self.assertEqual(
321
+ devicearray.errmsg_contiguous_buffer,
322
+ str(e.exception))
323
+
324
+ @skip_on_cudasim('DeviceNDArray class not present in simulator')
325
+ def test_devicearray_relaxed_strides(self):
326
+ # From the reproducer in Issue #6824.
327
+
328
+ # Construct a device array that is contiguous even though
329
+ # the strides for the first axis (800) are not equal to
330
+ # the strides * size (10 * 8 = 80) for the previous axis,
331
+ # because the first axis size is 1.
332
+ arr = devicearray.DeviceNDArray((1, 10), (800, 8), np.float64)
333
+
334
+ # Ensure we still believe the array to be contiguous because
335
+ # strides checking is relaxed.
336
+ self.assertTrue(arr.flags['C_CONTIGUOUS'])
337
+ self.assertTrue(arr.flags['F_CONTIGUOUS'])
338
+
339
+ def test_c_f_contiguity_matches_numpy(self):
340
+ # From the reproducer in Issue #4943.
341
+
342
+ shapes = ((1, 4), (4, 1))
343
+ orders = ('C', 'F')
344
+
345
+ for shape, order in itertools.product(shapes, orders):
346
+ arr = np.ndarray(shape, order=order)
347
+ d_arr = cuda.to_device(arr)
348
+ self.assertEqual(arr.flags['C_CONTIGUOUS'],
349
+ d_arr.flags['C_CONTIGUOUS'])
350
+ self.assertEqual(arr.flags['F_CONTIGUOUS'],
351
+ d_arr.flags['F_CONTIGUOUS'])
352
+
353
+ @skip_on_cudasim('Typing not done in the simulator')
354
+ def test_devicearray_typing_order_simple_c(self):
355
+ # C-order 1D array
356
+ a = np.zeros(10, order='C')
357
+ d = cuda.to_device(a)
358
+ self.assertEqual(d._numba_type_.layout, 'C')
359
+
360
+ @skip_on_cudasim('Typing not done in the simulator')
361
+ def test_devicearray_typing_order_simple_f(self):
362
+ # F-order array that is also C layout.
363
+ a = np.zeros(10, order='F')
364
+ d = cuda.to_device(a)
365
+ self.assertEqual(d._numba_type_.layout, 'C')
366
+
367
+ @skip_on_cudasim('Typing not done in the simulator')
368
+ def test_devicearray_typing_order_2d_c(self):
369
+ # C-order 2D array
370
+ a = np.zeros((2, 10), order='C')
371
+ d = cuda.to_device(a)
372
+ self.assertEqual(d._numba_type_.layout, 'C')
373
+
374
+ @skip_on_cudasim('Typing not done in the simulator')
375
+ def test_devicearray_typing_order_2d_f(self):
376
+ # F-order array that can only be F layout
377
+ a = np.zeros((2, 10), order='F')
378
+ d = cuda.to_device(a)
379
+ self.assertEqual(d._numba_type_.layout, 'F')
380
+
381
+ @skip_on_cudasim('Typing not done in the simulator')
382
+ def test_devicearray_typing_order_noncontig_slice_c(self):
383
+ # Non-contiguous slice of C-order array
384
+ a = np.zeros((5, 5), order='C')
385
+ d = cuda.to_device(a)[:,2]
386
+ self.assertEqual(d._numba_type_.layout, 'A')
387
+
388
+ @skip_on_cudasim('Typing not done in the simulator')
389
+ def test_devicearray_typing_order_noncontig_slice_f(self):
390
+ # Non-contiguous slice of F-order array
391
+ a = np.zeros((5, 5), order='F')
392
+ d = cuda.to_device(a)[2,:]
393
+ self.assertEqual(d._numba_type_.layout, 'A')
394
+
395
+ @skip_on_cudasim('Typing not done in the simulator')
396
+ def test_devicearray_typing_order_contig_slice_c(self):
397
+ # Contiguous slice of C-order array
398
+ a = np.zeros((5, 5), order='C')
399
+ d = cuda.to_device(a)[2,:]
400
+ self.assertEqual(d._numba_type_.layout, 'C')
401
+
402
+ @skip_on_cudasim('Typing not done in the simulator')
403
+ def test_devicearray_typing_order_contig_slice_f(self):
404
+ # Contiguous slice of F-order array - is both C- and F-contiguous, so
405
+ # types as 'C' layout
406
+ a = np.zeros((5, 5), order='F')
407
+ d = cuda.to_device(a)[:,2]
408
+ self.assertEqual(d._numba_type_.layout, 'C')
409
+
410
+ @skip_on_cudasim('Typing not done in the simulator')
411
+ def test_devicearray_typing_order_broadcasted(self):
412
+ # Broadcasted array, similar to that used for passing scalars to ufuncs
413
+ a = np.broadcast_to(np.array([1]), (10,))
414
+ d = cuda.to_device(a)
415
+ self.assertEqual(d._numba_type_.layout, 'A')
416
+
417
+ def test_bug6697(self):
418
+ ary = np.arange(10, dtype=np.int16)
419
+ dary = cuda.to_device(ary)
420
+ got = np.asarray(dary)
421
+ self.assertEqual(got.dtype, dary.dtype)
422
+
423
+ @skip_on_cudasim('DeviceNDArray class not present in simulator')
424
+ def test_issue_8477(self):
425
+ # Ensure that we can copy a zero-length device array to a zero-length
426
+ # host array when the strides of the device and host arrays differ -
427
+ # this should be possible because the strides are irrelevant when the
428
+ # length is zero. For more info see
429
+ # https://github.com/numba/numba/issues/8477.
430
+
431
+ # Create a device array with shape (0,) and strides (8,)
432
+ dev_array = devicearray.DeviceNDArray(shape=(0,), strides=(8,),
433
+ dtype=np.int8)
434
+
435
+ # Create a host array with shape (0,) and strides (0,)
436
+ host_array = np.ndarray(shape=(0,), strides=(0,), dtype=np.int8)
437
+
438
+ # Sanity check for this test - ensure our destination has the strides
439
+ # we expect, because strides can be ignored in some cases by the
440
+ # ndarray constructor - checking here ensures that we haven't failed to
441
+ # account for unexpected behaviour across different versions of NumPy
442
+ self.assertEqual(host_array.strides, (0,))
443
+
444
+ # Ensure that the copy succeeds in both directions
445
+ dev_array.copy_to_host(host_array)
446
+ dev_array.copy_to_device(host_array)
447
+
448
+ # Ensure that a device-to-device copy also succeeds when the strides
449
+ # differ - one way of doing this is to copy the host array across and
450
+ # use that for copies in both directions.
451
+ dev_array_from_host = cuda.to_device(host_array)
452
+ self.assertEqual(dev_array_from_host.shape, (0,))
453
+ self.assertEqual(dev_array_from_host.strides, (0,))
454
+
455
+ dev_array.copy_to_device(dev_array_from_host)
456
+ dev_array_from_host.copy_to_device(dev_array)
457
+
458
+
459
+ class TestRecarray(CUDATestCase):
460
+ def test_recarray(self):
461
+ # From issue #4111
462
+ a = np.recarray((16,), dtype=[
463
+ ("value1", np.int64),
464
+ ("value2", np.float64),
465
+ ])
466
+ a.value1 = np.arange(a.size, dtype=np.int64)
467
+ a.value2 = np.arange(a.size, dtype=np.float64) / 100
468
+
469
+ expect1 = a.value1
470
+ expect2 = a.value2
471
+
472
+ def test(x, out1, out2):
473
+ i = cuda.grid(1)
474
+ if i < x.size:
475
+ out1[i] = x.value1[i]
476
+ out2[i] = x.value2[i]
477
+
478
+ got1 = np.zeros_like(expect1)
479
+ got2 = np.zeros_like(expect2)
480
+ cuda.jit(test)[1, a.size](a, got1, got2)
481
+
482
+ np.testing.assert_array_equal(expect1, got1)
483
+ np.testing.assert_array_equal(expect2, got2)
484
+
485
+
486
+ class TestCoreContiguous(CUDATestCase):
487
+ def _test_against_array_core(self, view):
488
+ self.assertEqual(
489
+ devicearray.is_contiguous(view),
490
+ devicearray.array_core(view).flags['C_CONTIGUOUS']
491
+ )
492
+
493
+ def test_device_array_like_1d(self):
494
+ d_a = cuda.device_array(10, order='C')
495
+ self._test_against_array_core(d_a)
496
+
497
+ def test_device_array_like_2d(self):
498
+ d_a = cuda.device_array((10, 12), order='C')
499
+ self._test_against_array_core(d_a)
500
+
501
+ def test_device_array_like_2d_transpose(self):
502
+ d_a = cuda.device_array((10, 12), order='C')
503
+ self._test_against_array_core(d_a.T)
504
+
505
+ def test_device_array_like_3d(self):
506
+ d_a = cuda.device_array((10, 12, 14), order='C')
507
+ self._test_against_array_core(d_a)
508
+
509
+ def test_device_array_like_1d_f(self):
510
+ d_a = cuda.device_array(10, order='F')
511
+ self._test_against_array_core(d_a)
512
+
513
+ def test_device_array_like_2d_f(self):
514
+ d_a = cuda.device_array((10, 12), order='F')
515
+ self._test_against_array_core(d_a)
516
+
517
+ def test_device_array_like_2d_f_transpose(self):
518
+ d_a = cuda.device_array((10, 12), order='F')
519
+ self._test_against_array_core(d_a.T)
520
+
521
+ def test_device_array_like_3d_f(self):
522
+ d_a = cuda.device_array((10, 12, 14), order='F')
523
+ self._test_against_array_core(d_a)
524
+
525
+ def test_1d_view(self):
526
+ shape = 10
527
+ view = np.zeros(shape)[::2]
528
+ self._test_against_array_core(view)
529
+
530
+ def test_1d_view_f(self):
531
+ shape = 10
532
+ view = np.zeros(shape, order='F')[::2]
533
+ self._test_against_array_core(view)
534
+
535
+ def test_2d_view(self):
536
+ shape = (10, 12)
537
+ view = np.zeros(shape)[::2, ::2]
538
+ self._test_against_array_core(view)
539
+
540
+ def test_2d_view_f(self):
541
+ shape = (10, 12)
542
+ view = np.zeros(shape, order='F')[::2, ::2]
543
+ self._test_against_array_core(view)
544
+
545
+
546
+ if __name__ == '__main__':
547
+ unittest.main()