numba-cuda 0.0.1__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. _numba_cuda_redirector.pth +1 -0
  2. _numba_cuda_redirector.py +74 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +5 -0
  5. numba_cuda/_version.py +19 -0
  6. numba_cuda/numba/cuda/__init__.py +22 -0
  7. numba_cuda/numba/cuda/api.py +526 -0
  8. numba_cuda/numba/cuda/api_util.py +30 -0
  9. numba_cuda/numba/cuda/args.py +77 -0
  10. numba_cuda/numba/cuda/cg.py +62 -0
  11. numba_cuda/numba/cuda/codegen.py +378 -0
  12. numba_cuda/numba/cuda/compiler.py +422 -0
  13. numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
  14. numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
  15. numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
  16. numba_cuda/numba/cuda/cuda_paths.py +258 -0
  17. numba_cuda/numba/cuda/cudadecl.py +806 -0
  18. numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
  19. numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
  20. numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
  21. numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
  22. numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
  23. numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
  24. numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
  25. numba_cuda/numba/cuda/cudadrv/error.py +36 -0
  26. numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
  27. numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
  28. numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
  29. numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
  30. numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
  31. numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
  32. numba_cuda/numba/cuda/cudaimpl.py +1055 -0
  33. numba_cuda/numba/cuda/cudamath.py +140 -0
  34. numba_cuda/numba/cuda/decorators.py +189 -0
  35. numba_cuda/numba/cuda/descriptor.py +33 -0
  36. numba_cuda/numba/cuda/device_init.py +89 -0
  37. numba_cuda/numba/cuda/deviceufunc.py +908 -0
  38. numba_cuda/numba/cuda/dispatcher.py +1057 -0
  39. numba_cuda/numba/cuda/errors.py +59 -0
  40. numba_cuda/numba/cuda/extending.py +7 -0
  41. numba_cuda/numba/cuda/initialize.py +13 -0
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
  43. numba_cuda/numba/cuda/intrinsics.py +198 -0
  44. numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  45. numba_cuda/numba/cuda/kernels/reduction.py +262 -0
  46. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  47. numba_cuda/numba/cuda/libdevice.py +3382 -0
  48. numba_cuda/numba/cuda/libdevicedecl.py +17 -0
  49. numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
  50. numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
  51. numba_cuda/numba/cuda/mathimpl.py +448 -0
  52. numba_cuda/numba/cuda/models.py +48 -0
  53. numba_cuda/numba/cuda/nvvmutils.py +235 -0
  54. numba_cuda/numba/cuda/printimpl.py +86 -0
  55. numba_cuda/numba/cuda/random.py +292 -0
  56. numba_cuda/numba/cuda/simulator/__init__.py +38 -0
  57. numba_cuda/numba/cuda/simulator/api.py +110 -0
  58. numba_cuda/numba/cuda/simulator/compiler.py +9 -0
  59. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
  60. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
  61. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
  62. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
  63. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
  64. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
  65. numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
  66. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
  67. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
  68. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
  69. numba_cuda/numba/cuda/simulator/kernel.py +308 -0
  70. numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
  71. numba_cuda/numba/cuda/simulator/reduction.py +15 -0
  72. numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
  73. numba_cuda/numba/cuda/simulator_init.py +17 -0
  74. numba_cuda/numba/cuda/stubs.py +902 -0
  75. numba_cuda/numba/cuda/target.py +440 -0
  76. numba_cuda/numba/cuda/testing.py +202 -0
  77. numba_cuda/numba/cuda/tests/__init__.py +58 -0
  78. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
  79. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
  80. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
  81. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
  88. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
  89. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
  90. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
  91. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
  92. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
  93. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
  94. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
  95. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
  98. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
  100. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
  101. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
  102. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
  103. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
  104. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
  105. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
  106. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
  107. numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
  108. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
  109. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
  110. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
  111. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
  112. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
  113. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
  115. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
  117. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
  118. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
  120. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
  121. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
  123. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
  124. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
  126. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
  127. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
  129. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
  131. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
  132. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
  133. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
  134. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
  136. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
  137. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
  138. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
  139. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
  141. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
  143. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
  144. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
  145. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
  148. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
  149. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
  150. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
  151. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
  152. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
  153. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
  154. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
  155. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
  156. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
  157. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
  158. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
  159. numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
  161. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
  162. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
  163. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
  164. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
  165. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
  166. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
  167. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
  168. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
  169. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
  170. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
  171. numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
  172. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
  173. numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
  174. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
  175. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
  176. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
  177. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  178. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
  179. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
  180. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
  182. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
  183. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
  184. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
  185. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
  186. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
  187. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
  188. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
  192. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
  193. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
  194. numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
  195. numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
  197. numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  198. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  199. numba_cuda/numba/cuda/tests/data/error.cu +7 -0
  200. numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
  201. numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
  202. numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
  203. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
  204. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
  206. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
  207. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
  208. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
  209. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
  210. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
  211. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
  212. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
  213. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
  214. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
  216. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
  217. numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
  218. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
  219. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
  220. numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
  221. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
  222. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
  223. numba_cuda/numba/cuda/types.py +37 -0
  224. numba_cuda/numba/cuda/ufuncs.py +662 -0
  225. numba_cuda/numba/cuda/vector_types.py +209 -0
  226. numba_cuda/numba/cuda/vectorizers.py +252 -0
  227. numba_cuda-0.0.12.dist-info/LICENSE +25 -0
  228. numba_cuda-0.0.12.dist-info/METADATA +68 -0
  229. numba_cuda-0.0.12.dist-info/RECORD +231 -0
  230. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
  231. numba_cuda-0.0.1.dist-info/METADATA +0 -10
  232. numba_cuda-0.0.1.dist-info/RECORD +0 -5
  233. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,260 @@
1
+ import numpy as np
2
+
3
+ from numba.cuda.testing import unittest, CUDATestCase
4
+ from numba.cuda.testing import skip_on_cudasim, skip_unless_cudasim
5
+ from numba import config, cuda
6
+
7
+
8
+ if config.ENABLE_CUDASIM:
9
+ ARRAY_LIKE_FUNCTIONS = (cuda.device_array_like, cuda.pinned_array_like)
10
+ else:
11
+ ARRAY_LIKE_FUNCTIONS = (cuda.device_array_like, cuda.mapped_array_like,
12
+ cuda.pinned_array_like)
13
+
14
+
15
+ class TestCudaArray(CUDATestCase):
16
+ def test_gpu_array_zero_length(self):
17
+ x = np.arange(0)
18
+ dx = cuda.to_device(x)
19
+ hx = dx.copy_to_host()
20
+ self.assertEqual(x.shape, dx.shape)
21
+ self.assertEqual(x.size, dx.size)
22
+ self.assertEqual(x.shape, hx.shape)
23
+ self.assertEqual(x.size, hx.size)
24
+
25
+ def test_null_shape(self):
26
+ null_shape = ()
27
+ shape1 = cuda.device_array(()).shape
28
+ shape2 = cuda.device_array_like(np.ndarray(())).shape
29
+ self.assertEqual(shape1, null_shape)
30
+ self.assertEqual(shape2, null_shape)
31
+
32
+ def test_gpu_array_strided(self):
33
+
34
+ @cuda.jit('void(double[:])')
35
+ def kernel(x):
36
+ i = cuda.grid(1)
37
+ if i < x.shape[0]:
38
+ x[i] = i
39
+
40
+ x = np.arange(10, dtype=np.double)
41
+ y = np.ndarray(shape=10 * 8, buffer=x, dtype=np.byte)
42
+ z = np.ndarray(9, buffer=y[4:-4], dtype=np.double)
43
+ kernel[10, 10](z)
44
+ self.assertTrue(np.allclose(z, list(range(9))))
45
+
46
+ def test_gpu_array_interleaved(self):
47
+
48
+ @cuda.jit('void(double[:], double[:])')
49
+ def copykernel(x, y):
50
+ i = cuda.grid(1)
51
+ if i < x.shape[0]:
52
+ x[i] = i
53
+ y[i] = i
54
+
55
+ x = np.arange(10, dtype=np.double)
56
+ y = x[:-1:2]
57
+ # z = x[1::2]
58
+ # n = y.size
59
+ try:
60
+ cuda.devicearray.auto_device(y)
61
+ except ValueError:
62
+ pass
63
+ else:
64
+ raise AssertionError("Should raise exception complaining the "
65
+ "contiguous-ness of the array.")
66
+ # Should we handle this use case?
67
+ # assert z.size == y.size
68
+ # copykernel[1, n](y, x)
69
+ # print(y, z)
70
+ # assert np.all(y == z)
71
+ # assert np.all(y == list(range(n)))
72
+
73
+ def test_auto_device_const(self):
74
+ d, _ = cuda.devicearray.auto_device(2)
75
+ self.assertTrue(np.all(d.copy_to_host() == np.array(2)))
76
+
77
+ def _test_array_like_same(self, like_func, array):
78
+ """
79
+ Tests of *_array_like where shape, strides, dtype, and flags should
80
+ all be equal.
81
+ """
82
+ array_like = like_func(array)
83
+ self.assertEqual(array.shape, array_like.shape)
84
+ self.assertEqual(array.strides, array_like.strides)
85
+ self.assertEqual(array.dtype, array_like.dtype)
86
+ self.assertEqual(array.flags['C_CONTIGUOUS'],
87
+ array_like.flags['C_CONTIGUOUS'])
88
+ self.assertEqual(array.flags['F_CONTIGUOUS'],
89
+ array_like.flags['F_CONTIGUOUS'])
90
+
91
+ def test_array_like_1d(self):
92
+ d_a = cuda.device_array(10, order='C')
93
+ for like_func in ARRAY_LIKE_FUNCTIONS:
94
+ with self.subTest(like_func=like_func):
95
+ self._test_array_like_same(like_func, d_a)
96
+
97
+ def test_array_like_2d(self):
98
+ d_a = cuda.device_array((10, 12), order='C')
99
+ for like_func in ARRAY_LIKE_FUNCTIONS:
100
+ with self.subTest(like_func=like_func):
101
+ self._test_array_like_same(like_func, d_a)
102
+
103
+ def test_array_like_2d_transpose(self):
104
+ d_a = cuda.device_array((10, 12), order='C')
105
+ for like_func in ARRAY_LIKE_FUNCTIONS:
106
+ with self.subTest(like_func=like_func):
107
+ self._test_array_like_same(like_func, d_a)
108
+
109
+ def test_array_like_3d(self):
110
+ d_a = cuda.device_array((10, 12, 14), order='C')
111
+ for like_func in ARRAY_LIKE_FUNCTIONS:
112
+ with self.subTest(like_func=like_func):
113
+ self._test_array_like_same(like_func, d_a)
114
+
115
+ def test_array_like_1d_f(self):
116
+ d_a = cuda.device_array(10, order='F')
117
+ for like_func in ARRAY_LIKE_FUNCTIONS:
118
+ with self.subTest(like_func=like_func):
119
+ self._test_array_like_same(like_func, d_a)
120
+
121
+ def test_array_like_2d_f(self):
122
+ d_a = cuda.device_array((10, 12), order='F')
123
+ for like_func in ARRAY_LIKE_FUNCTIONS:
124
+ with self.subTest(like_func=like_func):
125
+ self._test_array_like_same(like_func, d_a)
126
+
127
+ def test_array_like_2d_f_transpose(self):
128
+ d_a = cuda.device_array((10, 12), order='F')
129
+ for like_func in ARRAY_LIKE_FUNCTIONS:
130
+ with self.subTest(like_func=like_func):
131
+ self._test_array_like_same(like_func, d_a)
132
+
133
+ def test_array_like_3d_f(self):
134
+ d_a = cuda.device_array((10, 12, 14), order='F')
135
+ for like_func in ARRAY_LIKE_FUNCTIONS:
136
+ with self.subTest(like_func=like_func):
137
+ self._test_array_like_same(like_func, d_a)
138
+
139
+ def _test_array_like_view(self, like_func, view, d_view):
140
+ """
141
+ Tests of device_array_like where the original array is a view - the
142
+ strides should not be equal because a contiguous array is expected.
143
+ """
144
+ nb_like = like_func(d_view)
145
+ self.assertEqual(d_view.shape, nb_like.shape)
146
+ self.assertEqual(d_view.dtype, nb_like.dtype)
147
+
148
+ # Use NumPy as a reference for the expected strides
149
+ np_like = np.zeros_like(view)
150
+ self.assertEqual(nb_like.strides, np_like.strides)
151
+ self.assertEqual(nb_like.flags['C_CONTIGUOUS'],
152
+ np_like.flags['C_CONTIGUOUS'])
153
+ self.assertEqual(nb_like.flags['F_CONTIGUOUS'],
154
+ np_like.flags['F_CONTIGUOUS'])
155
+
156
+ def test_array_like_1d_view(self):
157
+ shape = 10
158
+ view = np.zeros(shape)[::2]
159
+ d_view = cuda.device_array(shape)[::2]
160
+ for like_func in ARRAY_LIKE_FUNCTIONS:
161
+ with self.subTest(like_func=like_func):
162
+ self._test_array_like_view(like_func, view, d_view)
163
+
164
+ def test_array_like_1d_view_f(self):
165
+ shape = 10
166
+ view = np.zeros(shape, order='F')[::2]
167
+ d_view = cuda.device_array(shape, order='F')[::2]
168
+ for like_func in ARRAY_LIKE_FUNCTIONS:
169
+ with self.subTest(like_func=like_func):
170
+ self._test_array_like_view(like_func, view, d_view)
171
+
172
+ def test_array_like_2d_view(self):
173
+ shape = (10, 12)
174
+ view = np.zeros(shape)[::2, ::2]
175
+ d_view = cuda.device_array(shape)[::2, ::2]
176
+ for like_func in ARRAY_LIKE_FUNCTIONS:
177
+ with self.subTest(like_func=like_func):
178
+ self._test_array_like_view(like_func, view, d_view)
179
+
180
+ def test_array_like_2d_view_f(self):
181
+ shape = (10, 12)
182
+ view = np.zeros(shape, order='F')[::2, ::2]
183
+ d_view = cuda.device_array(shape, order='F')[::2, ::2]
184
+ for like_func in ARRAY_LIKE_FUNCTIONS:
185
+ with self.subTest(like_func=like_func):
186
+ self._test_array_like_view(like_func, view, d_view)
187
+
188
+ @skip_on_cudasim('Numba and NumPy stride semantics differ for transpose')
189
+ def test_array_like_2d_view_transpose_device(self):
190
+ shape = (10, 12)
191
+ d_view = cuda.device_array(shape)[::2, ::2].T
192
+ for like_func in ARRAY_LIKE_FUNCTIONS:
193
+ with self.subTest(like_func=like_func):
194
+ # This is a special case (see issue #4974) because creating the
195
+ # transpose creates a new contiguous allocation with different
196
+ # strides. In this case, rather than comparing against NumPy,
197
+ # we can only compare against expected values.
198
+ like = like_func(d_view)
199
+ self.assertEqual(d_view.shape, like.shape)
200
+ self.assertEqual(d_view.dtype, like.dtype)
201
+ self.assertEqual((40, 8), like.strides)
202
+ self.assertTrue(like.flags['C_CONTIGUOUS'])
203
+ self.assertFalse(like.flags['F_CONTIGUOUS'])
204
+
205
+ @skip_unless_cudasim('Numba and NumPy stride semantics differ for '
206
+ 'transpose')
207
+ def test_array_like_2d_view_transpose_simulator(self):
208
+ shape = (10, 12)
209
+ view = np.zeros(shape)[::2, ::2].T
210
+ d_view = cuda.device_array(shape)[::2, ::2].T
211
+ for like_func in ARRAY_LIKE_FUNCTIONS:
212
+ with self.subTest(like_func=like_func):
213
+ # On the simulator, the transpose has different strides to on a
214
+ # CUDA device (See issue #4974). Here we can compare strides
215
+ # against NumPy as a reference.
216
+ np_like = np.zeros_like(view)
217
+ nb_like = like_func(d_view)
218
+ self.assertEqual(d_view.shape, nb_like.shape)
219
+ self.assertEqual(d_view.dtype, nb_like.dtype)
220
+ self.assertEqual(np_like.strides, nb_like.strides)
221
+ self.assertEqual(np_like.flags['C_CONTIGUOUS'],
222
+ nb_like.flags['C_CONTIGUOUS'])
223
+ self.assertEqual(np_like.flags['F_CONTIGUOUS'],
224
+ nb_like.flags['F_CONTIGUOUS'])
225
+
226
+ def test_array_like_2d_view_f_transpose(self):
227
+ shape = (10, 12)
228
+ view = np.zeros(shape, order='F')[::2, ::2].T
229
+ d_view = cuda.device_array(shape, order='F')[::2, ::2].T
230
+ for like_func in ARRAY_LIKE_FUNCTIONS:
231
+ with self.subTest(like_func=like_func):
232
+ self._test_array_like_view(like_func, view, d_view)
233
+
234
+ @skip_on_cudasim('Kernel overloads not created in the simulator')
235
+ def test_issue_4628(self):
236
+ # CUDA Device arrays were reported as always being typed with 'A' order
237
+ # so launching the kernel with a host array and then a device array
238
+ # resulted in two overloads being compiled - one for 'C' order from
239
+ # the host array, and one for 'A' order from the device array. With the
240
+ # resolution of this issue, the order of the device array is also 'C',
241
+ # so after the kernel launches there should only be one overload of
242
+ # the function.
243
+ @cuda.jit
244
+ def func(A, out):
245
+ i = cuda.grid(1)
246
+ out[i] = A[i] * 2
247
+
248
+ n = 128
249
+ a = np.ones((n,))
250
+ d_a = cuda.to_device(a)
251
+ result = np.zeros((n,))
252
+
253
+ func[1, 128](a, result)
254
+ func[1, 128](d_a, result)
255
+
256
+ self.assertEqual(1, len(func.overloads))
257
+
258
+
259
+ if __name__ == '__main__':
260
+ unittest.main()
@@ -0,0 +1,201 @@
1
+ import numpy as np
2
+ from collections import namedtuple
3
+
4
+ from numba import cuda
5
+ from numba.cuda.testing import unittest, CUDATestCase
6
+
7
+
8
+ class TestCudaArrayArg(CUDATestCase):
9
+ def test_array_ary(self):
10
+
11
+ @cuda.jit('double(double[:],int64)', device=True, inline=True)
12
+ def device_function(a, c):
13
+ return a[c]
14
+
15
+ @cuda.jit('void(double[:],double[:])')
16
+ def kernel(x, y):
17
+ i = cuda.grid(1)
18
+ y[i] = device_function(x, i)
19
+
20
+ x = np.arange(10, dtype=np.double)
21
+ y = np.zeros_like(x)
22
+ kernel[10, 1](x, y)
23
+ self.assertTrue(np.all(x == y))
24
+
25
+ def test_unituple(self):
26
+ @cuda.jit
27
+ def f(r, x):
28
+ r[0] = x[0]
29
+ r[1] = x[1]
30
+ r[2] = x[2]
31
+
32
+ x = (1, 2, 3)
33
+ r = np.zeros(len(x), dtype=np.int64)
34
+ f[1, 1](r, x)
35
+
36
+ for i in range(len(x)):
37
+ self.assertEqual(r[i], x[i])
38
+
39
+ def test_tuple(self):
40
+ @cuda.jit
41
+ def f(r1, r2, x):
42
+ r1[0] = x[0]
43
+ r1[1] = x[1]
44
+ r1[2] = x[2]
45
+ r2[0] = x[3]
46
+ r2[1] = x[4]
47
+ r2[2] = x[5]
48
+
49
+ x = (1, 2, 3, 4.5, 5.5, 6.5)
50
+ r1 = np.zeros(len(x) // 2, dtype=np.int64)
51
+ r2 = np.zeros(len(x) // 2, dtype=np.float64)
52
+ f[1, 1](r1, r2, x)
53
+
54
+ for i in range(len(r1)):
55
+ self.assertEqual(r1[i], x[i])
56
+
57
+ for i in range(len(r2)):
58
+ self.assertEqual(r2[i], x[i + len(r1)])
59
+
60
+ def test_namedunituple(self):
61
+ @cuda.jit
62
+ def f(r, x):
63
+ r[0] = x.x
64
+ r[1] = x.y
65
+
66
+ Point = namedtuple('Point', ('x', 'y'))
67
+ x = Point(1, 2)
68
+ r = np.zeros(len(x), dtype=np.int64)
69
+ f[1, 1](r, x)
70
+
71
+ self.assertEqual(r[0], x.x)
72
+ self.assertEqual(r[1], x.y)
73
+
74
+ def test_namedtuple(self):
75
+ @cuda.jit
76
+ def f(r1, r2, x):
77
+ r1[0] = x.x
78
+ r1[1] = x.y
79
+ r2[0] = x.r
80
+
81
+ Point = namedtuple('Point', ('x', 'y', 'r'))
82
+ x = Point(1, 2, 2.236)
83
+ r1 = np.zeros(2, dtype=np.int64)
84
+ r2 = np.zeros(1, dtype=np.float64)
85
+ f[1, 1](r1, r2, x)
86
+
87
+ self.assertEqual(r1[0], x.x)
88
+ self.assertEqual(r1[1], x.y)
89
+ self.assertEqual(r2[0], x.r)
90
+
91
+ def test_empty_tuple(self):
92
+ @cuda.jit
93
+ def f(r, x):
94
+ r[0] = len(x)
95
+
96
+ x = tuple()
97
+ r = np.ones(1, dtype=np.int64)
98
+ f[1, 1](r, x)
99
+
100
+ self.assertEqual(r[0], 0)
101
+
102
+ def test_tuple_of_empty_tuples(self):
103
+ @cuda.jit
104
+ def f(r, x):
105
+ r[0] = len(x)
106
+ r[1] = len(x[0])
107
+
108
+ x = ((), (), ())
109
+ r = np.ones(2, dtype=np.int64)
110
+ f[1, 1](r, x)
111
+
112
+ self.assertEqual(r[0], 3)
113
+ self.assertEqual(r[1], 0)
114
+
115
+ def test_tuple_of_tuples(self):
116
+ @cuda.jit
117
+ def f(r, x):
118
+ r[0] = len(x)
119
+ r[1] = len(x[0])
120
+ r[2] = len(x[1])
121
+ r[3] = len(x[2])
122
+ r[4] = x[1][0]
123
+ r[5] = x[1][1]
124
+ r[6] = x[2][0]
125
+ r[7] = x[2][1]
126
+ r[8] = x[2][2]
127
+
128
+ x = ((), (5, 6), (8, 9, 10))
129
+ r = np.ones(9, dtype=np.int64)
130
+ f[1, 1](r, x)
131
+
132
+ self.assertEqual(r[0], 3)
133
+ self.assertEqual(r[1], 0)
134
+ self.assertEqual(r[2], 2)
135
+ self.assertEqual(r[3], 3)
136
+ self.assertEqual(r[4], 5)
137
+ self.assertEqual(r[5], 6)
138
+ self.assertEqual(r[6], 8)
139
+ self.assertEqual(r[7], 9)
140
+ self.assertEqual(r[8], 10)
141
+
142
+ def test_tuple_of_tuples_and_scalars(self):
143
+ @cuda.jit
144
+ def f(r, x):
145
+ r[0] = len(x)
146
+ r[1] = len(x[0])
147
+ r[2] = x[0][0]
148
+ r[3] = x[0][1]
149
+ r[4] = x[0][2]
150
+ r[5] = x[1]
151
+
152
+ x = ((6, 5, 4), 7)
153
+ r = np.ones(9, dtype=np.int64)
154
+ f[1, 1](r, x)
155
+
156
+ self.assertEqual(r[0], 2)
157
+ self.assertEqual(r[1], 3)
158
+ self.assertEqual(r[2], 6)
159
+ self.assertEqual(r[3], 5)
160
+ self.assertEqual(r[4], 4)
161
+ self.assertEqual(r[5], 7)
162
+
163
+ def test_tuple_of_arrays(self):
164
+ @cuda.jit
165
+ def f(x):
166
+ i = cuda.grid(1)
167
+ if i < len(x[0]):
168
+ x[0][i] = x[1][i] + x[2][i]
169
+
170
+ N = 10
171
+ x0 = np.zeros(N)
172
+ x1 = np.ones_like(x0)
173
+ x2 = x1 * 3
174
+ x = (x0, x1, x2)
175
+ f[1, N](x)
176
+
177
+ np.testing.assert_equal(x0, x1 + x2)
178
+
179
+ def test_tuple_of_array_scalar_tuple(self):
180
+ @cuda.jit
181
+ def f(r, x):
182
+ r[0] = x[0][0]
183
+ r[1] = x[0][1]
184
+ r[2] = x[1]
185
+ r[3] = x[2][0]
186
+ r[4] = x[2][1]
187
+
188
+ z = np.arange(2, dtype=np.int64)
189
+ x = (2 * z, 10, (4, 3))
190
+ r = np.zeros(5, dtype=np.int64)
191
+ f[1, 1](r, x)
192
+
193
+ self.assertEqual(r[0], 0)
194
+ self.assertEqual(r[1], 2)
195
+ self.assertEqual(r[2], 10)
196
+ self.assertEqual(r[3], 4)
197
+ self.assertEqual(r[4], 3)
198
+
199
+
200
+ if __name__ == '__main__':
201
+ unittest.main()
@@ -0,0 +1,35 @@
1
+ import numpy as np
2
+ from numba import cuda
3
+ from numba.cuda.testing import CUDATestCase
4
+ import unittest
5
+
6
+
7
+ def reinterpret_array_type(byte_arr, start, stop, output):
8
+ # Tested with just one thread
9
+ val = byte_arr[start:stop].view(np.int32)[0]
10
+ output[0] = val
11
+
12
+
13
+ class TestCudaArrayMethods(CUDATestCase):
14
+ def test_reinterpret_array_type(self):
15
+ """
16
+ Reinterpret byte array as int32 in the GPU.
17
+ """
18
+ pyfunc = reinterpret_array_type
19
+ kernel = cuda.jit(pyfunc)
20
+
21
+ byte_arr = np.arange(256, dtype=np.uint8)
22
+ itemsize = np.dtype(np.int32).itemsize
23
+ for start in range(0, 256, itemsize):
24
+ stop = start + itemsize
25
+ expect = byte_arr[start:stop].view(np.int32)[0]
26
+
27
+ output = np.zeros(1, dtype=np.int32)
28
+ kernel[1, 1](byte_arr, start, stop, output)
29
+
30
+ got = output[0]
31
+ self.assertEqual(expect, got)
32
+
33
+
34
+ if __name__ == '__main__':
35
+ unittest.main()