numba-cuda 0.0.1__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. _numba_cuda_redirector.pth +1 -0
  2. _numba_cuda_redirector.py +74 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +5 -0
  5. numba_cuda/_version.py +19 -0
  6. numba_cuda/numba/cuda/__init__.py +22 -0
  7. numba_cuda/numba/cuda/api.py +526 -0
  8. numba_cuda/numba/cuda/api_util.py +30 -0
  9. numba_cuda/numba/cuda/args.py +77 -0
  10. numba_cuda/numba/cuda/cg.py +62 -0
  11. numba_cuda/numba/cuda/codegen.py +378 -0
  12. numba_cuda/numba/cuda/compiler.py +422 -0
  13. numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
  14. numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
  15. numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
  16. numba_cuda/numba/cuda/cuda_paths.py +258 -0
  17. numba_cuda/numba/cuda/cudadecl.py +806 -0
  18. numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
  19. numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
  20. numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
  21. numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
  22. numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
  23. numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
  24. numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
  25. numba_cuda/numba/cuda/cudadrv/error.py +36 -0
  26. numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
  27. numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
  28. numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
  29. numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
  30. numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
  31. numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
  32. numba_cuda/numba/cuda/cudaimpl.py +1055 -0
  33. numba_cuda/numba/cuda/cudamath.py +140 -0
  34. numba_cuda/numba/cuda/decorators.py +189 -0
  35. numba_cuda/numba/cuda/descriptor.py +33 -0
  36. numba_cuda/numba/cuda/device_init.py +89 -0
  37. numba_cuda/numba/cuda/deviceufunc.py +908 -0
  38. numba_cuda/numba/cuda/dispatcher.py +1057 -0
  39. numba_cuda/numba/cuda/errors.py +59 -0
  40. numba_cuda/numba/cuda/extending.py +7 -0
  41. numba_cuda/numba/cuda/initialize.py +13 -0
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
  43. numba_cuda/numba/cuda/intrinsics.py +198 -0
  44. numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  45. numba_cuda/numba/cuda/kernels/reduction.py +262 -0
  46. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  47. numba_cuda/numba/cuda/libdevice.py +3382 -0
  48. numba_cuda/numba/cuda/libdevicedecl.py +17 -0
  49. numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
  50. numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
  51. numba_cuda/numba/cuda/mathimpl.py +448 -0
  52. numba_cuda/numba/cuda/models.py +48 -0
  53. numba_cuda/numba/cuda/nvvmutils.py +235 -0
  54. numba_cuda/numba/cuda/printimpl.py +86 -0
  55. numba_cuda/numba/cuda/random.py +292 -0
  56. numba_cuda/numba/cuda/simulator/__init__.py +38 -0
  57. numba_cuda/numba/cuda/simulator/api.py +110 -0
  58. numba_cuda/numba/cuda/simulator/compiler.py +9 -0
  59. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
  60. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
  61. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
  62. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
  63. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
  64. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
  65. numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
  66. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
  67. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
  68. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
  69. numba_cuda/numba/cuda/simulator/kernel.py +308 -0
  70. numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
  71. numba_cuda/numba/cuda/simulator/reduction.py +15 -0
  72. numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
  73. numba_cuda/numba/cuda/simulator_init.py +17 -0
  74. numba_cuda/numba/cuda/stubs.py +902 -0
  75. numba_cuda/numba/cuda/target.py +440 -0
  76. numba_cuda/numba/cuda/testing.py +202 -0
  77. numba_cuda/numba/cuda/tests/__init__.py +58 -0
  78. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
  79. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
  80. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
  81. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
  88. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
  89. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
  90. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
  91. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
  92. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
  93. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
  94. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
  95. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
  98. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
  100. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
  101. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
  102. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
  103. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
  104. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
  105. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
  106. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
  107. numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
  108. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
  109. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
  110. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
  111. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
  112. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
  113. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
  115. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
  117. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
  118. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
  120. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
  121. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
  123. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
  124. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
  126. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
  127. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
  129. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
  131. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
  132. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
  133. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
  134. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
  136. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
  137. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
  138. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
  139. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
  141. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
  143. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
  144. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
  145. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
  148. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
  149. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
  150. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
  151. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
  152. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
  153. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
  154. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
  155. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
  156. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
  157. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
  158. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
  159. numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
  161. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
  162. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
  163. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
  164. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
  165. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
  166. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
  167. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
  168. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
  169. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
  170. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
  171. numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
  172. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
  173. numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
  174. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
  175. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
  176. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
  177. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  178. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
  179. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
  180. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
  182. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
  183. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
  184. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
  185. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
  186. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
  187. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
  188. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
  192. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
  193. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
  194. numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
  195. numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
  197. numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  198. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  199. numba_cuda/numba/cuda/tests/data/error.cu +7 -0
  200. numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
  201. numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
  202. numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
  203. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
  204. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
  206. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
  207. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
  208. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
  209. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
  210. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
  211. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
  212. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
  213. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
  214. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
  216. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
  217. numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
  218. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
  219. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
  220. numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
  221. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
  222. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
  223. numba_cuda/numba/cuda/types.py +37 -0
  224. numba_cuda/numba/cuda/ufuncs.py +662 -0
  225. numba_cuda/numba/cuda/vector_types.py +209 -0
  226. numba_cuda/numba/cuda/vectorizers.py +252 -0
  227. numba_cuda-0.0.12.dist-info/LICENSE +25 -0
  228. numba_cuda-0.0.12.dist-info/METADATA +68 -0
  229. numba_cuda-0.0.12.dist-info/RECORD +231 -0
  230. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
  231. numba_cuda-0.0.1.dist-info/METADATA +0 -10
  232. numba_cuda-0.0.1.dist-info/RECORD +0 -5
  233. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,307 @@
1
+ """
2
+ CUDA vector type tests. Note that this test file imports
3
+ `cuda.vector_type` module to programmatically test all the
4
+ vector types. However, `vector_type` module is internal
5
+ and should not be imported by user, user should only import the
6
+ corresponding vector type from `cuda` module in kernel to use them.
7
+ """
8
+
9
+ import numpy as np
10
+
11
+ from numba.core import config
12
+ from numba.cuda.testing import CUDATestCase
13
+
14
+ from numba import cuda
15
+
16
+ if config.ENABLE_CUDASIM:
17
+ from numba.cuda.simulator.vector_types import vector_types
18
+ else:
19
+ from numba.cuda.vector_types import vector_types
20
+
21
+
22
+ def make_kernel(vtype):
23
+ """
24
+ Returns a jit compiled kernel that constructs a vector types of
25
+ the given type, using the exact number of primitive types to
26
+ construct the vector type.
27
+ """
28
+ vobj = vtype.user_facing_object
29
+ base_type = vtype.base_type
30
+
31
+ def kernel_1elem(res):
32
+ v = vobj(base_type(0))
33
+ res[0] = v.x
34
+
35
+ def kernel_2elem(res):
36
+ v = vobj(base_type(0), base_type(1))
37
+ res[0] = v.x
38
+ res[1] = v.y
39
+
40
+ def kernel_3elem(res):
41
+ v = vobj(base_type(0), base_type(1), base_type(2))
42
+ res[0] = v.x
43
+ res[1] = v.y
44
+ res[2] = v.z
45
+
46
+ def kernel_4elem(res):
47
+ v = vobj(
48
+ base_type(0),
49
+ base_type(1),
50
+ base_type(2),
51
+ base_type(3)
52
+ )
53
+ res[0] = v.x
54
+ res[1] = v.y
55
+ res[2] = v.z
56
+ res[3] = v.w
57
+
58
+ host_function = {
59
+ 1: kernel_1elem,
60
+ 2: kernel_2elem,
61
+ 3: kernel_3elem,
62
+ 4: kernel_4elem
63
+ }[vtype.num_elements]
64
+ return cuda.jit(host_function)
65
+
66
+
67
+ def make_fancy_creation_kernel(vtype):
68
+ """
69
+ Returns a jit compiled kernel that constructs a vector type using the
70
+ "fancy" construction, that is, with arbitrary combinations of primitive
71
+ types and vector types, as long as the total element of the construction
72
+ is the same as the number of elements of the vector type.
73
+ """
74
+ base_type = vtype.base_type
75
+ v1 = getattr(cuda, f"{vtype.name[:-1]}1")
76
+ v2 = getattr(cuda, f"{vtype.name[:-1]}2")
77
+ v3 = getattr(cuda, f"{vtype.name[:-1]}3")
78
+ v4 = getattr(cuda, f"{vtype.name[:-1]}4")
79
+
80
+ def kernel(res):
81
+ one = base_type(1.0)
82
+ two = base_type(2.0)
83
+ three = base_type(3.0)
84
+ four = base_type(4.0)
85
+
86
+ j = 0 # index of the result array
87
+
88
+ # Construct a 1-component vector type, possible combination includes:
89
+ # 2C1 = 2 combinations.
90
+
91
+ f1_1 = v1(one) # 1
92
+ f1_2 = v1(f1_1) # 1
93
+
94
+ res[0] = f1_1.x
95
+ res[1] = f1_2.x
96
+ j += 2
97
+
98
+ # Construct a 2-component vector type, possible combination includes:
99
+ # 1 + 2C1 * 2 = 5 combinations
100
+
101
+ f2_1 = v2(two, three) # 2 3
102
+ f2_2 = v2(f1_1, three) # 1 3
103
+ f2_3 = v2(two, f1_1) # 2 1
104
+ f2_4 = v2(f1_1, f1_1) # 1 1
105
+ f2_5 = v2(f2_1) # 2 3
106
+
107
+ for v in (f2_1, f2_2, f2_3, f2_4, f2_5):
108
+ res[j] = v.x
109
+ res[j + 1] = v.y
110
+ j += 2
111
+
112
+ # Construct a 3-component vector type, possible combination includes:
113
+ # 1 + 2C1 * 2 + 2^3 = 13 combinations
114
+
115
+ f3_1 = v3(f2_1, one) # 2 3 1
116
+ f3_2 = v3(f2_1, f1_1) # 2 3 1
117
+ f3_3 = v3(one, f2_1) # 1 2 3
118
+ f3_4 = v3(f1_1, f2_1) # 1 2 3
119
+
120
+ f3_5 = v3(one, two, three) # 1 2 3
121
+ f3_6 = v3(f1_1, two, three) # 1 2 3
122
+ f3_7 = v3(one, f1_1, three) # 1 1 3
123
+ f3_8 = v3(one, two, f1_1) # 1 2 1
124
+ f3_9 = v3(f1_1, f1_1, three) # 1 1 3
125
+ f3_10 = v3(one, f1_1, f1_1) # 1 1 1
126
+ f3_11 = v3(f1_1, two, f1_1) # 1 2 1
127
+ f3_12 = v3(f1_1, f1_1, f1_1) # 1 1 1
128
+
129
+ f3_13 = v3(f3_1) # 2 3 1
130
+
131
+ for v in (f3_1, f3_2, f3_3, f3_4, f3_5, f3_6, f3_7, f3_8, f3_9,
132
+ f3_10, f3_11, f3_12, f3_13):
133
+ res[j] = v.x
134
+ res[j + 1] = v.y
135
+ res[j + 2] = v.z
136
+ j += 3
137
+
138
+ # Construct a 4-component vector type, possible combination includes:
139
+ # 1 + (2C1 * 2 + 1) + 3C1 * 2^2 + 2^4 = 34 combinations
140
+
141
+ f4_1 = v4(one, two, three, four) # 1 2 3 4
142
+ f4_2 = v4(f1_1, two, three, four) # 1 2 3 4
143
+ f4_3 = v4(one, f1_1, three, four) # 1 1 3 4
144
+ f4_4 = v4(one, two, f1_1, four) # 1 2 1 4
145
+ f4_5 = v4(one, two, three, f1_1) # 1 2 3 1
146
+ f4_6 = v4(f1_1, f1_1, three, four) # 1 1 3 4
147
+ f4_7 = v4(f1_1, two, f1_1, four) # 1 2 1 4
148
+ f4_8 = v4(f1_1, two, three, f1_1) # 1 2 3 1
149
+ f4_9 = v4(one, f1_1, f1_1, four) # 1 1 1 4
150
+ f4_10 = v4(one, f1_1, three, f1_1) # 1 1 3 1
151
+ f4_11 = v4(one, two, f1_1, f1_1) # 1 2 1 1
152
+ f4_12 = v4(f1_1, f1_1, f1_1, four) # 1 1 1 4
153
+ f4_13 = v4(f1_1, f1_1, three, f1_1) # 1 1 3 1
154
+ f4_14 = v4(f1_1, two, f1_1, f1_1) # 1 2 1 1
155
+ f4_15 = v4(one, f1_1, f1_1, f1_1) # 1 1 1 1
156
+ f4_16 = v4(f1_1, f1_1, f1_1, f1_1) # 1 1 1 1
157
+
158
+ f4_17 = v4(f2_1, two, three) # 2 3 2 3
159
+ f4_18 = v4(f2_1, f1_1, three) # 2 3 1 3
160
+ f4_19 = v4(f2_1, two, f1_1) # 2 3 2 1
161
+ f4_20 = v4(f2_1, f1_1, f1_1) # 2 3 1 1
162
+ f4_21 = v4(one, f2_1, three) # 1 2 3 3
163
+ f4_22 = v4(f1_1, f2_1, three) # 1 2 3 3
164
+ f4_23 = v4(one, f2_1, f1_1) # 1 2 3 1
165
+ f4_24 = v4(f1_1, f2_1, f1_1) # 1 2 3 1
166
+ f4_25 = v4(one, four, f2_1) # 1 4 2 3
167
+ f4_26 = v4(f1_1, four, f2_1) # 1 4 2 3
168
+ f4_27 = v4(one, f1_1, f2_1) # 1 1 2 3
169
+ f4_28 = v4(f1_1, f1_1, f2_1) # 1 1 2 3
170
+
171
+ f4_29 = v4(f2_1, f2_1) # 2 3 2 3
172
+ f4_30 = v4(f3_1, four) # 2 3 1 4
173
+ f4_31 = v4(f3_1, f1_1) # 2 3 1 1
174
+ f4_32 = v4(four, f3_1) # 4 2 3 1
175
+ f4_33 = v4(f1_1, f3_1) # 1 2 3 1
176
+
177
+ f4_34 = v4(f4_1) # 1 2 3 4
178
+
179
+ for v in (f4_1, f4_2, f4_3, f4_4, f4_5, f4_6, f4_7, f4_8, f4_9, f4_10,
180
+ f4_11, f4_12, f4_13, f4_14, f4_15, f4_16, f4_17, f4_18, f4_19,
181
+ f4_20, f4_21, f4_22, f4_23, f4_24, f4_25, f4_26, f4_27, f4_28,
182
+ f4_29, f4_30, f4_31, f4_32, f4_33, f4_34):
183
+ res[j] = v.x
184
+ res[j + 1] = v.y
185
+ res[j + 2] = v.z
186
+ res[j + 3] = v.w
187
+ j += 4
188
+
189
+ return cuda.jit(kernel)
190
+
191
+
192
+ class TestCudaVectorType(CUDATestCase):
193
+
194
+ def test_basic(self):
195
+ """Basic test that makes sure that vector type and aliases
196
+ are available within the cuda module from both device and
197
+ simulator mode. This is an important sanity check, since other
198
+ tests below tests the vector type objects programmatically.
199
+ """
200
+ @cuda.jit("void(float64[:])")
201
+ def kernel(arr):
202
+ v1 = cuda.float64x4(1.0, 3.0, 5.0, 7.0)
203
+ v2 = cuda.short2(10, 11)
204
+ arr[0] = v1.x
205
+ arr[1] = v1.y
206
+ arr[2] = v1.z
207
+ arr[3] = v1.w
208
+ arr[4] = v2.x
209
+ arr[5] = v2.y
210
+
211
+ res = np.zeros(6, dtype=np.float64)
212
+ kernel[1, 1](res)
213
+ self.assertTrue(np.allclose(res, [1.0, 3.0, 5.0, 7.0, 10, 11]))
214
+
215
+ def test_creation_readout(self):
216
+ for vty in vector_types.values():
217
+ with self.subTest(vty=vty):
218
+ arr = np.zeros((vty.num_elements,))
219
+ kernel = make_kernel(vty)
220
+ kernel[1, 1](arr)
221
+ np.testing.assert_almost_equal(
222
+ arr, np.array(range(vty.num_elements))
223
+ )
224
+
225
+ def test_fancy_creation_readout(self):
226
+ for vty in vector_types.values():
227
+ with self.subTest(vty=vty):
228
+ kernel = make_fancy_creation_kernel(vty)
229
+
230
+ expected = np.array([
231
+ # 1-component vectors
232
+ 1,
233
+ 1,
234
+ # 2-component vectors
235
+ 2, 3,
236
+ 1, 3,
237
+ 2, 1,
238
+ 1, 1,
239
+ 2, 3,
240
+ # 3-component vectors
241
+ 2, 3, 1,
242
+ 2, 3, 1,
243
+ 1, 2, 3,
244
+ 1, 2, 3,
245
+ 1, 2, 3,
246
+ 1, 2, 3,
247
+ 1, 1, 3,
248
+ 1, 2, 1,
249
+ 1, 1, 3,
250
+ 1, 1, 1,
251
+ 1, 2, 1,
252
+ 1, 1, 1,
253
+ 2, 3, 1,
254
+ # 4-component vectors
255
+ 1, 2, 3, 4,
256
+ 1, 2, 3, 4,
257
+ 1, 1, 3, 4,
258
+ 1, 2, 1, 4,
259
+ 1, 2, 3, 1,
260
+ 1, 1, 3, 4,
261
+ 1, 2, 1, 4,
262
+ 1, 2, 3, 1,
263
+ 1, 1, 1, 4,
264
+ 1, 1, 3, 1,
265
+ 1, 2, 1, 1,
266
+ 1, 1, 1, 4,
267
+ 1, 1, 3, 1,
268
+ 1, 2, 1, 1,
269
+ 1, 1, 1, 1,
270
+ 1, 1, 1, 1,
271
+ 2, 3, 2, 3,
272
+ 2, 3, 1, 3,
273
+ 2, 3, 2, 1,
274
+ 2, 3, 1, 1,
275
+ 1, 2, 3, 3,
276
+ 1, 2, 3, 3,
277
+ 1, 2, 3, 1,
278
+ 1, 2, 3, 1,
279
+ 1, 4, 2, 3,
280
+ 1, 4, 2, 3,
281
+ 1, 1, 2, 3,
282
+ 1, 1, 2, 3,
283
+ 2, 3, 2, 3,
284
+ 2, 3, 1, 4,
285
+ 2, 3, 1, 1,
286
+ 4, 2, 3, 1,
287
+ 1, 2, 3, 1,
288
+ 1, 2, 3, 4
289
+ ])
290
+ arr = np.zeros(expected.shape)
291
+ kernel[1, 1](arr)
292
+ np.testing.assert_almost_equal(arr, expected)
293
+
294
+ def test_vector_type_alias(self):
295
+ """Tests that `cuda.<vector_type.alias>` are importable and
296
+ that is the same as `cuda.<vector_type.name>`.
297
+
298
+ `test_fancy_creation_readout` only test vector types imported
299
+ with its name. This test makes sure that construction with
300
+ objects imported with alias should work the same.
301
+ """
302
+ for vty in vector_types.values():
303
+ for alias in vty.user_facing_object.aliases:
304
+ with self.subTest(vty=vty.name, alias=alias):
305
+ self.assertEqual(
306
+ id(getattr(cuda, vty.name)), id(getattr(cuda, alias))
307
+ )
@@ -0,0 +1,283 @@
1
+ import numpy as np
2
+
3
+ from collections import namedtuple
4
+ from itertools import product
5
+ from numba import vectorize
6
+ from numba import cuda, int32, float32, float64
7
+ from numba.cuda.cudadrv.driver import CudaAPIError, driver
8
+ from numba.cuda.testing import skip_on_cudasim
9
+ from numba.cuda.testing import CUDATestCase
10
+ import unittest
11
+
12
+
13
+ # Signatures to test with - these are all homogeneous in dtype, so the output
14
+ # dtype should match the input dtype - the output should not have been cast
15
+ # upwards, as reported in #8400: https://github.com/numba/numba/issues/8400
16
+ signatures = [int32(int32, int32),
17
+ float32(float32, float32),
18
+ float64(float64, float64)]
19
+
20
+ # The order here is chosen such that each subsequent dtype might have been
21
+ # casted to a previously-used dtype. This is unlikely to be an issue for CUDA,
22
+ # but there might be future circumstances in which it becomes relevant, perhaps
23
+ # if it supported Dynamic UFuncs, and we want to ensure that an implementation
24
+ # for a the given dtype is used rather than casting the input upwards.
25
+ dtypes = (np.float64, np.float32, np.int32)
26
+
27
+ # NumPy ndarray orders
28
+ orders = ('C', 'F')
29
+
30
+ # Input sizes corresponding to operations:
31
+ # - Less than one warp,
32
+ # - Less than one block,
33
+ # - Greater than one block (i.e. many blocks)
34
+ input_sizes = (8, 100, 2 ** 10 + 1)
35
+
36
+
37
+ @skip_on_cudasim('ufunc API unsupported in the simulator')
38
+ class TestCUDAVectorize(CUDATestCase):
39
+ # Presumably chosen as an odd number unlikely to coincide with the total
40
+ # thread count, and large enough to ensure a significant number of blocks
41
+ # are used.
42
+ N = 1000001
43
+
44
+ def test_scalar(self):
45
+
46
+ @vectorize(signatures, target='cuda')
47
+ def vector_add(a, b):
48
+ return a + b
49
+
50
+ a = 1.2
51
+ b = 2.3
52
+ c = vector_add(a, b)
53
+ self.assertEqual(c, a + b)
54
+
55
+ def test_1d(self):
56
+
57
+ @vectorize(signatures, target='cuda')
58
+ def vector_add(a, b):
59
+ return a + b
60
+
61
+ for ty in dtypes:
62
+ data = np.array(np.random.random(self.N), dtype=ty)
63
+ expected = np.add(data, data)
64
+ actual = vector_add(data, data)
65
+ np.testing.assert_allclose(expected, actual)
66
+ self.assertEqual(actual.dtype, ty)
67
+
68
+ def test_1d_async(self):
69
+
70
+ @vectorize(signatures, target='cuda')
71
+ def vector_add(a, b):
72
+ return a + b
73
+
74
+ stream = cuda.stream()
75
+
76
+ for ty in dtypes:
77
+ data = np.array(np.random.random(self.N), dtype=ty)
78
+ device_data = cuda.to_device(data, stream)
79
+
80
+ dresult = vector_add(device_data, device_data, stream=stream)
81
+ actual = dresult.copy_to_host()
82
+
83
+ expected = np.add(data, data)
84
+
85
+ np.testing.assert_allclose(expected, actual)
86
+ self.assertEqual(actual.dtype, ty)
87
+
88
+ def test_nd(self):
89
+
90
+ @vectorize(signatures, target='cuda')
91
+ def vector_add(a, b):
92
+ return a + b
93
+
94
+ for nd, dtype, order in product(range(1, 8), dtypes, orders):
95
+ shape = (4,) * nd
96
+ data = np.random.random(shape).astype(dtype)
97
+ data2 = np.array(data.T, order=order)
98
+
99
+ expected = data + data2
100
+ actual = vector_add(data, data2)
101
+ np.testing.assert_allclose(expected, actual)
102
+ self.assertEqual(actual.dtype, dtype)
103
+
104
+ def test_output_arg(self):
105
+ @vectorize(signatures, target='cuda')
106
+ def vector_add(a, b):
107
+ return a + b
108
+
109
+ A = np.arange(10, dtype=np.float32)
110
+ B = np.arange(10, dtype=np.float32)
111
+
112
+ expected = A + B
113
+ actual = np.empty_like(A)
114
+ vector_add(A, B, out=actual)
115
+
116
+ np.testing.assert_allclose(expected, actual)
117
+ self.assertEqual(expected.dtype, actual.dtype)
118
+
119
+ def test_reduce(self):
120
+ @vectorize(signatures, target='cuda')
121
+ def vector_add(a, b):
122
+ return a + b
123
+
124
+ dtype = np.int32
125
+
126
+ for n in input_sizes:
127
+ x = np.arange(n, dtype=dtype)
128
+ expected = np.add.reduce(x)
129
+ actual = vector_add.reduce(x)
130
+ np.testing.assert_allclose(expected, actual)
131
+ # np.add.reduce is special-cased to return an int64 for any int
132
+ # arguments, so we can't compare against its returned dtype when
133
+ # we're checking the general reduce machinery (which just happens
134
+ # to be using addition). Instead, compare against the input dtype.
135
+ self.assertEqual(dtype, actual.dtype)
136
+
137
+ def test_reduce_async(self):
138
+
139
+ @vectorize(signatures, target='cuda')
140
+ def vector_add(a, b):
141
+ return a + b
142
+
143
+ stream = cuda.stream()
144
+ dtype = np.int32
145
+
146
+ for n in input_sizes:
147
+ x = np.arange(n, dtype=dtype)
148
+ expected = np.add.reduce(x)
149
+ dx = cuda.to_device(x, stream)
150
+ actual = vector_add.reduce(dx, stream=stream)
151
+ np.testing.assert_allclose(expected, actual)
152
+ # Compare against the input dtype as in test_reduce().
153
+ self.assertEqual(dtype, actual.dtype)
154
+
155
+ def test_manual_transfer(self):
156
+ @vectorize(signatures, target='cuda')
157
+ def vector_add(a, b):
158
+ return a + b
159
+
160
+ n = 10
161
+ x = np.arange(n, dtype=np.int32)
162
+ dx = cuda.to_device(x)
163
+ expected = x + x
164
+ actual = vector_add(x, dx).copy_to_host()
165
+ np.testing.assert_equal(expected, actual)
166
+ self.assertEqual(expected.dtype, actual.dtype)
167
+
168
+ def test_ufunc_output_2d(self):
169
+ @vectorize(signatures, target='cuda')
170
+ def vector_add(a, b):
171
+ return a + b
172
+
173
+ n = 10
174
+ x = np.arange(n, dtype=np.int32).reshape(2, 5)
175
+ dx = cuda.to_device(x)
176
+ vector_add(dx, dx, out=dx)
177
+
178
+ expected = x + x
179
+ actual = dx.copy_to_host()
180
+ np.testing.assert_equal(expected, actual)
181
+ self.assertEqual(expected.dtype, actual.dtype)
182
+
183
+ def check_tuple_arg(self, a, b):
184
+ @vectorize(signatures, target='cuda')
185
+ def vector_add(a, b):
186
+ return a + b
187
+
188
+ r = vector_add(a, b)
189
+ np.testing.assert_equal(np.asarray(a) + np.asarray(b), r)
190
+
191
+ def test_tuple_arg(self):
192
+ a = (1.0, 2.0, 3.0)
193
+ b = (4.0, 5.0, 6.0)
194
+ self.check_tuple_arg(a, b)
195
+
196
+ def test_namedtuple_arg(self):
197
+ Point = namedtuple('Point', ('x', 'y', 'z'))
198
+ a = Point(x=1.0, y=2.0, z=3.0)
199
+ b = Point(x=4.0, y=5.0, z=6.0)
200
+ self.check_tuple_arg(a, b)
201
+
202
+ def test_tuple_of_array_arg(self):
203
+ arr = np.arange(10, dtype=np.int32)
204
+ a = (arr, arr + 1)
205
+ b = (arr + 2, arr + 2)
206
+ self.check_tuple_arg(a, b)
207
+
208
+ def test_tuple_of_namedtuple_arg(self):
209
+ Point = namedtuple('Point', ('x', 'y', 'z'))
210
+ a = (Point(x=1.0, y=2.0, z=3.0), Point(x=1.5, y=2.5, z=3.5))
211
+ b = (Point(x=4.0, y=5.0, z=6.0), Point(x=4.5, y=5.5, z=6.5))
212
+ self.check_tuple_arg(a, b)
213
+
214
+ def test_namedtuple_of_array_arg(self):
215
+ xs1 = np.arange(10, dtype=np.int32)
216
+ ys1 = xs1 + 2
217
+ xs2 = np.arange(10, dtype=np.int32) * 2
218
+ ys2 = xs2 + 1
219
+ Points = namedtuple('Points', ('xs', 'ys'))
220
+ a = Points(xs=xs1, ys=ys1)
221
+ b = Points(xs=xs2, ys=ys2)
222
+ self.check_tuple_arg(a, b)
223
+
224
+ def test_name_attribute(self):
225
+ @vectorize('f8(f8)', target='cuda')
226
+ def bar(x):
227
+ return x ** 2
228
+
229
+ self.assertEqual(bar.__name__, 'bar')
230
+
231
+ def test_no_transfer_for_device_data(self):
232
+ # Initialize test data on the device prior to banning host <-> device
233
+ # transfer
234
+
235
+ noise = np.random.randn(1, 3, 64, 64).astype(np.float32)
236
+ noise = cuda.to_device(noise)
237
+
238
+ # A mock of a CUDA function that always raises a CudaAPIError
239
+
240
+ def raising_transfer(*args, **kwargs):
241
+ raise CudaAPIError(999, 'Transfer not allowed')
242
+
243
+ # Use the mock for transfers between the host and device
244
+
245
+ old_HtoD = getattr(driver, 'cuMemcpyHtoD', None)
246
+ old_DtoH = getattr(driver, 'cuMemcpyDtoH', None)
247
+
248
+ setattr(driver, 'cuMemcpyHtoD', raising_transfer)
249
+ setattr(driver, 'cuMemcpyDtoH', raising_transfer)
250
+
251
+ # Ensure that the mock functions are working as expected
252
+
253
+ with self.assertRaisesRegex(CudaAPIError, "Transfer not allowed"):
254
+ noise.copy_to_host()
255
+
256
+ with self.assertRaisesRegex(CudaAPIError, "Transfer not allowed"):
257
+ cuda.to_device([1])
258
+
259
+ try:
260
+ # Check that defining and calling a ufunc with data on the device
261
+ # induces no transfers
262
+
263
+ @vectorize(['float32(float32)'], target='cuda')
264
+ def func(noise):
265
+ return noise + 1.0
266
+
267
+ func(noise)
268
+ finally:
269
+ # Replace our mocks with the original implementations. If there was
270
+ # no original implementation, simply remove ours.
271
+
272
+ if old_HtoD is not None:
273
+ setattr(driver, 'cuMemcpyHtoD', old_HtoD)
274
+ else:
275
+ del driver.cuMemcpyHtoD
276
+ if old_DtoH is not None:
277
+ setattr(driver, 'cuMemcpyDtoH', old_DtoH)
278
+ else:
279
+ del driver.cuMemcpyDtoH
280
+
281
+
282
+ if __name__ == '__main__':
283
+ unittest.main()
@@ -0,0 +1,20 @@
1
+ import numpy as np
2
+ from numba import vectorize
3
+ from numba.cuda.testing import skip_on_cudasim, CUDATestCase
4
+ import unittest
5
+
6
+
7
+ @skip_on_cudasim('ufunc API unsupported in the simulator')
8
+ class TestVectorizeComplex(CUDATestCase):
9
+ def test_vectorize_complex(self):
10
+ @vectorize(['complex128(complex128)'], target='cuda')
11
+ def vcomp(a):
12
+ return a * a + 1.
13
+
14
+ A = np.arange(5, dtype=np.complex128)
15
+ B = vcomp(A)
16
+ self.assertTrue(np.allclose(A * A + 1., B))
17
+
18
+
19
+ if __name__ == '__main__':
20
+ unittest.main()
@@ -0,0 +1,69 @@
1
+ import numpy as np
2
+
3
+ from numba import vectorize, cuda
4
+ from numba.tests.npyufunc.test_vectorize_decor import BaseVectorizeDecor, \
5
+ BaseVectorizeNopythonArg, BaseVectorizeUnrecognizedArg
6
+ from numba.cuda.testing import skip_on_cudasim, CUDATestCase
7
+ import unittest
8
+
9
+
10
+ @skip_on_cudasim('ufunc API unsupported in the simulator')
11
+ class TestVectorizeDecor(CUDATestCase, BaseVectorizeDecor):
12
+ """
13
+ Runs the tests from BaseVectorizeDecor with the CUDA target.
14
+ """
15
+ target = 'cuda'
16
+
17
+
18
+ @skip_on_cudasim('ufunc API unsupported in the simulator')
19
+ class TestGPUVectorizeBroadcast(CUDATestCase):
20
+ def test_broadcast(self):
21
+ a = np.random.randn(100, 3, 1)
22
+ b = a.transpose(2, 1, 0)
23
+
24
+ def fn(a, b):
25
+ return a - b
26
+
27
+ @vectorize(['float64(float64,float64)'], target='cuda')
28
+ def fngpu(a, b):
29
+ return a - b
30
+
31
+ expect = fn(a, b)
32
+ got = fngpu(a, b)
33
+ np.testing.assert_almost_equal(expect, got)
34
+
35
+ def test_device_broadcast(self):
36
+ """
37
+ Same test as .test_broadcast() but with device array as inputs
38
+ """
39
+
40
+ a = np.random.randn(100, 3, 1)
41
+ b = a.transpose(2, 1, 0)
42
+
43
+ def fn(a, b):
44
+ return a - b
45
+
46
+ @vectorize(['float64(float64,float64)'], target='cuda')
47
+ def fngpu(a, b):
48
+ return a - b
49
+
50
+ expect = fn(a, b)
51
+ got = fngpu(cuda.to_device(a), cuda.to_device(b))
52
+ np.testing.assert_almost_equal(expect, got.copy_to_host())
53
+
54
+
55
+ @skip_on_cudasim('ufunc API unsupported in the simulator')
56
+ class TestVectorizeNopythonArg(BaseVectorizeNopythonArg, CUDATestCase):
57
+ def test_target_cuda_nopython(self):
58
+ warnings = ["nopython kwarg for cuda target is redundant"]
59
+ self._test_target_nopython('cuda', warnings)
60
+
61
+
62
+ @skip_on_cudasim('ufunc API unsupported in the simulator')
63
+ class TestVectorizeUnrecognizedArg(BaseVectorizeUnrecognizedArg, CUDATestCase):
64
+ def test_target_cuda_unrecognized_arg(self):
65
+ self._test_target_unrecognized_arg('cuda')
66
+
67
+
68
+ if __name__ == '__main__':
69
+ unittest.main()