numba-cuda 0.0.1__py3-none-any.whl → 0.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. _numba_cuda_redirector.pth +1 -0
  2. _numba_cuda_redirector.py +74 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +5 -0
  5. numba_cuda/_version.py +19 -0
  6. numba_cuda/numba/cuda/__init__.py +22 -0
  7. numba_cuda/numba/cuda/api.py +526 -0
  8. numba_cuda/numba/cuda/api_util.py +30 -0
  9. numba_cuda/numba/cuda/args.py +77 -0
  10. numba_cuda/numba/cuda/cg.py +62 -0
  11. numba_cuda/numba/cuda/codegen.py +378 -0
  12. numba_cuda/numba/cuda/compiler.py +422 -0
  13. numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
  14. numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
  15. numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
  16. numba_cuda/numba/cuda/cuda_paths.py +258 -0
  17. numba_cuda/numba/cuda/cudadecl.py +806 -0
  18. numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
  19. numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
  20. numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
  21. numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
  22. numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
  23. numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
  24. numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
  25. numba_cuda/numba/cuda/cudadrv/error.py +36 -0
  26. numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
  27. numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
  28. numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
  29. numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
  30. numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
  31. numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
  32. numba_cuda/numba/cuda/cudaimpl.py +1055 -0
  33. numba_cuda/numba/cuda/cudamath.py +140 -0
  34. numba_cuda/numba/cuda/decorators.py +189 -0
  35. numba_cuda/numba/cuda/descriptor.py +33 -0
  36. numba_cuda/numba/cuda/device_init.py +89 -0
  37. numba_cuda/numba/cuda/deviceufunc.py +908 -0
  38. numba_cuda/numba/cuda/dispatcher.py +1057 -0
  39. numba_cuda/numba/cuda/errors.py +59 -0
  40. numba_cuda/numba/cuda/extending.py +7 -0
  41. numba_cuda/numba/cuda/initialize.py +13 -0
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
  43. numba_cuda/numba/cuda/intrinsics.py +198 -0
  44. numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  45. numba_cuda/numba/cuda/kernels/reduction.py +262 -0
  46. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  47. numba_cuda/numba/cuda/libdevice.py +3382 -0
  48. numba_cuda/numba/cuda/libdevicedecl.py +17 -0
  49. numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
  50. numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
  51. numba_cuda/numba/cuda/mathimpl.py +448 -0
  52. numba_cuda/numba/cuda/models.py +48 -0
  53. numba_cuda/numba/cuda/nvvmutils.py +235 -0
  54. numba_cuda/numba/cuda/printimpl.py +86 -0
  55. numba_cuda/numba/cuda/random.py +292 -0
  56. numba_cuda/numba/cuda/simulator/__init__.py +38 -0
  57. numba_cuda/numba/cuda/simulator/api.py +110 -0
  58. numba_cuda/numba/cuda/simulator/compiler.py +9 -0
  59. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
  60. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
  61. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
  62. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
  63. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
  64. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
  65. numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
  66. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
  67. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
  68. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
  69. numba_cuda/numba/cuda/simulator/kernel.py +308 -0
  70. numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
  71. numba_cuda/numba/cuda/simulator/reduction.py +15 -0
  72. numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
  73. numba_cuda/numba/cuda/simulator_init.py +17 -0
  74. numba_cuda/numba/cuda/stubs.py +902 -0
  75. numba_cuda/numba/cuda/target.py +440 -0
  76. numba_cuda/numba/cuda/testing.py +202 -0
  77. numba_cuda/numba/cuda/tests/__init__.py +58 -0
  78. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
  79. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
  80. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
  81. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
  88. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
  89. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
  90. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
  91. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
  92. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
  93. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
  94. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
  95. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
  98. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
  100. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
  101. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
  102. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
  103. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
  104. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
  105. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
  106. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
  107. numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
  108. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
  109. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
  110. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
  111. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
  112. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
  113. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
  115. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
  117. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
  118. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
  120. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
  121. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
  123. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
  124. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
  126. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
  127. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
  129. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
  131. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
  132. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
  133. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
  134. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
  136. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
  137. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
  138. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
  139. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
  141. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
  143. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
  144. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
  145. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
  148. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
  149. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
  150. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
  151. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
  152. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
  153. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
  154. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
  155. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
  156. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
  157. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
  158. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
  159. numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
  161. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
  162. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
  163. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
  164. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
  165. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
  166. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
  167. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
  168. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
  169. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
  170. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
  171. numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
  172. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
  173. numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
  174. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
  175. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
  176. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
  177. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  178. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
  179. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
  180. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
  182. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
  183. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
  184. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
  185. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
  186. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
  187. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
  188. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
  192. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
  193. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
  194. numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
  195. numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
  197. numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  198. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  199. numba_cuda/numba/cuda/tests/data/error.cu +7 -0
  200. numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
  201. numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
  202. numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
  203. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
  204. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
  206. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
  207. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
  208. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
  209. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
  210. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
  211. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
  212. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
  213. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
  214. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
  216. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
  217. numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
  218. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
  219. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
  220. numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
  221. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
  222. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
  223. numba_cuda/numba/cuda/types.py +37 -0
  224. numba_cuda/numba/cuda/ufuncs.py +662 -0
  225. numba_cuda/numba/cuda/vector_types.py +209 -0
  226. numba_cuda/numba/cuda/vectorizers.py +252 -0
  227. numba_cuda-0.0.13.dist-info/LICENSE +25 -0
  228. numba_cuda-0.0.13.dist-info/METADATA +69 -0
  229. numba_cuda-0.0.13.dist-info/RECORD +231 -0
  230. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.13.dist-info}/WHEEL +1 -1
  231. numba_cuda-0.0.1.dist-info/METADATA +0 -10
  232. numba_cuda-0.0.1.dist-info/RECORD +0 -5
  233. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,335 @@
1
+ from numba import cuda, njit, types, version_info
2
+ from numba.core.errors import TypingError
3
+ from numba.core.extending import overload, overload_attribute
4
+ from numba.core.typing.typeof import typeof
5
+ from numba.cuda.testing import CUDATestCase, skip_on_cudasim, unittest
6
+ import numpy as np
7
+
8
+
9
+ # Dummy function definitions to overload
10
+
11
+ def generic_func_1():
12
+ pass
13
+
14
+
15
+ def cuda_func_1():
16
+ pass
17
+
18
+
19
+ def generic_func_2():
20
+ pass
21
+
22
+
23
+ def cuda_func_2():
24
+ pass
25
+
26
+
27
+ def generic_calls_generic():
28
+ pass
29
+
30
+
31
+ def generic_calls_cuda():
32
+ pass
33
+
34
+
35
+ def cuda_calls_generic():
36
+ pass
37
+
38
+
39
+ def cuda_calls_cuda():
40
+ pass
41
+
42
+
43
+ def target_overloaded():
44
+ pass
45
+
46
+
47
+ def generic_calls_target_overloaded():
48
+ pass
49
+
50
+
51
+ def cuda_calls_target_overloaded():
52
+ pass
53
+
54
+
55
+ def target_overloaded_calls_target_overloaded():
56
+ pass
57
+
58
+
59
+ # To recognise which functions are resolved for a call, we identify each with a
60
+ # prime number. Each function called multiplies a value by its prime (starting
61
+ # with the value 1), and we can check that the result is as expected based on
62
+ # the final value after all multiplications.
63
+
64
+ GENERIC_FUNCTION_1 = 2
65
+ CUDA_FUNCTION_1 = 3
66
+ GENERIC_FUNCTION_2 = 5
67
+ CUDA_FUNCTION_2 = 7
68
+ GENERIC_CALLS_GENERIC = 11
69
+ GENERIC_CALLS_CUDA = 13
70
+ CUDA_CALLS_GENERIC = 17
71
+ CUDA_CALLS_CUDA = 19
72
+ GENERIC_TARGET_OL = 23
73
+ CUDA_TARGET_OL = 29
74
+ GENERIC_CALLS_TARGET_OL = 31
75
+ CUDA_CALLS_TARGET_OL = 37
76
+ GENERIC_TARGET_OL_CALLS_TARGET_OL = 41
77
+ CUDA_TARGET_OL_CALLS_TARGET_OL = 43
78
+
79
+
80
+ # Overload implementations
81
+
82
+ @overload(generic_func_1, target='generic')
83
+ def ol_generic_func_1(x):
84
+ def impl(x):
85
+ x[0] *= GENERIC_FUNCTION_1
86
+ return impl
87
+
88
+
89
+ @overload(cuda_func_1, target='cuda')
90
+ def ol_cuda_func_1(x):
91
+ def impl(x):
92
+ x[0] *= CUDA_FUNCTION_1
93
+ return impl
94
+
95
+
96
+ @overload(generic_func_2, target='generic')
97
+ def ol_generic_func_2(x):
98
+ def impl(x):
99
+ x[0] *= GENERIC_FUNCTION_2
100
+ return impl
101
+
102
+
103
+ @overload(cuda_func_2, target='cuda')
104
+ def ol_cuda_func(x):
105
+ def impl(x):
106
+ x[0] *= CUDA_FUNCTION_2
107
+ return impl
108
+
109
+
110
+ @overload(generic_calls_generic, target='generic')
111
+ def ol_generic_calls_generic(x):
112
+ def impl(x):
113
+ x[0] *= GENERIC_CALLS_GENERIC
114
+ generic_func_1(x)
115
+ return impl
116
+
117
+
118
+ @overload(generic_calls_cuda, target='generic')
119
+ def ol_generic_calls_cuda(x):
120
+ def impl(x):
121
+ x[0] *= GENERIC_CALLS_CUDA
122
+ cuda_func_1(x)
123
+ return impl
124
+
125
+
126
+ @overload(cuda_calls_generic, target='cuda')
127
+ def ol_cuda_calls_generic(x):
128
+ def impl(x):
129
+ x[0] *= CUDA_CALLS_GENERIC
130
+ generic_func_1(x)
131
+ return impl
132
+
133
+
134
+ @overload(cuda_calls_cuda, target='cuda')
135
+ def ol_cuda_calls_cuda(x):
136
+ def impl(x):
137
+ x[0] *= CUDA_CALLS_CUDA
138
+ cuda_func_1(x)
139
+ return impl
140
+
141
+
142
+ @overload(target_overloaded, target='generic')
143
+ def ol_target_overloaded_generic(x):
144
+ def impl(x):
145
+ x[0] *= GENERIC_TARGET_OL
146
+ return impl
147
+
148
+
149
+ @overload(target_overloaded, target='cuda')
150
+ def ol_target_overloaded_cuda(x):
151
+ def impl(x):
152
+ x[0] *= CUDA_TARGET_OL
153
+ return impl
154
+
155
+
156
+ @overload(generic_calls_target_overloaded, target='generic')
157
+ def ol_generic_calls_target_overloaded(x):
158
+ def impl(x):
159
+ x[0] *= GENERIC_CALLS_TARGET_OL
160
+ target_overloaded(x)
161
+ return impl
162
+
163
+
164
+ @overload(cuda_calls_target_overloaded, target='cuda')
165
+ def ol_cuda_calls_target_overloaded(x):
166
+ def impl(x):
167
+ x[0] *= CUDA_CALLS_TARGET_OL
168
+ target_overloaded(x)
169
+ return impl
170
+
171
+
172
+ @overload(target_overloaded_calls_target_overloaded, target='generic')
173
+ def ol_generic_calls_target_overloaded_generic(x):
174
+ def impl(x):
175
+ x[0] *= GENERIC_TARGET_OL_CALLS_TARGET_OL
176
+ target_overloaded(x)
177
+ return impl
178
+
179
+
180
+ @overload(target_overloaded_calls_target_overloaded, target='cuda')
181
+ def ol_generic_calls_target_overloaded_cuda(x):
182
+ def impl(x):
183
+ x[0] *= CUDA_TARGET_OL_CALLS_TARGET_OL
184
+ target_overloaded(x)
185
+ return impl
186
+
187
+
188
+ @skip_on_cudasim('Overloading not supported in cudasim')
189
+ class TestOverload(CUDATestCase):
190
+ def check_overload(self, kernel, expected):
191
+ x = np.ones(1, dtype=np.int32)
192
+ cuda.jit(kernel)[1, 1](x)
193
+ self.assertEqual(x[0], expected)
194
+
195
+ def check_overload_cpu(self, kernel, expected):
196
+ x = np.ones(1, dtype=np.int32)
197
+ njit(kernel)(x)
198
+ self.assertEqual(x[0], expected)
199
+
200
+ def test_generic(self):
201
+ def kernel(x):
202
+ generic_func_1(x)
203
+
204
+ expected = GENERIC_FUNCTION_1
205
+ self.check_overload(kernel, expected)
206
+
207
+ def test_cuda(self):
208
+ def kernel(x):
209
+ cuda_func_1(x)
210
+
211
+ expected = CUDA_FUNCTION_1
212
+ self.check_overload(kernel, expected)
213
+
214
+ def test_generic_and_cuda(self):
215
+ def kernel(x):
216
+ generic_func_1(x)
217
+ cuda_func_1(x)
218
+
219
+ expected = GENERIC_FUNCTION_1 * CUDA_FUNCTION_1
220
+ self.check_overload(kernel, expected)
221
+
222
+ def test_call_two_generic_calls(self):
223
+ def kernel(x):
224
+ generic_func_1(x)
225
+ generic_func_2(x)
226
+
227
+ expected = GENERIC_FUNCTION_1 * GENERIC_FUNCTION_2
228
+ self.check_overload(kernel, expected)
229
+
230
+ def test_call_two_cuda_calls(self):
231
+ def kernel(x):
232
+ cuda_func_1(x)
233
+ cuda_func_2(x)
234
+
235
+ expected = CUDA_FUNCTION_1 * CUDA_FUNCTION_2
236
+ self.check_overload(kernel, expected)
237
+
238
+ def test_generic_calls_generic(self):
239
+ def kernel(x):
240
+ generic_calls_generic(x)
241
+
242
+ expected = GENERIC_CALLS_GENERIC * GENERIC_FUNCTION_1
243
+ self.check_overload(kernel, expected)
244
+
245
+ def test_generic_calls_cuda(self):
246
+ def kernel(x):
247
+ generic_calls_cuda(x)
248
+
249
+ expected = GENERIC_CALLS_CUDA * CUDA_FUNCTION_1
250
+ self.check_overload(kernel, expected)
251
+
252
+ def test_cuda_calls_generic(self):
253
+ def kernel(x):
254
+ cuda_calls_generic(x)
255
+
256
+ expected = CUDA_CALLS_GENERIC * GENERIC_FUNCTION_1
257
+ self.check_overload(kernel, expected)
258
+
259
+ def test_cuda_calls_cuda(self):
260
+ def kernel(x):
261
+ cuda_calls_cuda(x)
262
+
263
+ expected = CUDA_CALLS_CUDA * CUDA_FUNCTION_1
264
+ self.check_overload(kernel, expected)
265
+
266
+ def test_call_target_overloaded(self):
267
+ def kernel(x):
268
+ target_overloaded(x)
269
+
270
+ expected = CUDA_TARGET_OL
271
+ self.check_overload(kernel, expected)
272
+
273
+ def test_generic_calls_target_overloaded(self):
274
+ def kernel(x):
275
+ generic_calls_target_overloaded(x)
276
+
277
+ expected = GENERIC_CALLS_TARGET_OL * CUDA_TARGET_OL
278
+ self.check_overload(kernel, expected)
279
+
280
+ def test_cuda_calls_target_overloaded(self):
281
+ def kernel(x):
282
+ cuda_calls_target_overloaded(x)
283
+
284
+ expected = CUDA_CALLS_TARGET_OL * CUDA_TARGET_OL
285
+ self.check_overload(kernel, expected)
286
+
287
+ def test_target_overloaded_calls_target_overloaded(self):
288
+ def kernel(x):
289
+ target_overloaded_calls_target_overloaded(x)
290
+
291
+ # Check the CUDA overloads are used on CUDA
292
+ expected = CUDA_TARGET_OL_CALLS_TARGET_OL * CUDA_TARGET_OL
293
+ self.check_overload(kernel, expected)
294
+
295
+ # Also check that the CPU overloads are used on the CPU
296
+ expected = GENERIC_TARGET_OL_CALLS_TARGET_OL * GENERIC_TARGET_OL
297
+ self.check_overload_cpu(kernel, expected)
298
+
299
+ def test_overload_attribute_target(self):
300
+ MyDummy, MyDummyType = self.make_dummy_type()
301
+ mydummy_type = typeof(MyDummy())
302
+
303
+ @overload_attribute(MyDummyType, 'cuda_only', target='cuda')
304
+ def ov_dummy_cuda_attr(obj):
305
+ def imp(obj):
306
+ return 42
307
+
308
+ return imp
309
+
310
+ # Ensure that we cannot use the CUDA target-specific attribute on the
311
+ # CPU, and that an appropriate typing error is raised
312
+
313
+ # A different error is produced prior to version 0.60
314
+ # (the fixes in #9454 improved the message)
315
+ # https://github.com/numba/numba/pull/9454
316
+ if version_info[:2] < (0, 60):
317
+ msg = 'resolving type of attribute "cuda_only" of "x"'
318
+ else:
319
+ msg = "Unknown attribute 'cuda_only'"
320
+
321
+ with self.assertRaisesRegex(TypingError, msg):
322
+ @njit(types.int64(mydummy_type))
323
+ def illegal_target_attr_use(x):
324
+ return x.cuda_only
325
+
326
+ # Ensure that the CUDA target-specific attribute is usable and works
327
+ # correctly when the target is CUDA - note eager compilation via
328
+ # signature
329
+ @cuda.jit(types.void(types.int64[::1], mydummy_type))
330
+ def cuda_target_attr_use(res, dummy):
331
+ res[0] = dummy.cuda_only
332
+
333
+
334
+ if __name__ == '__main__':
335
+ unittest.main()
@@ -0,0 +1,124 @@
1
+ import math
2
+ import numpy as np
3
+ from numba import cuda, float64, int8, int32, void
4
+ from numba.cuda.testing import unittest, CUDATestCase
5
+
6
+
7
+ def cu_mat_power(A, power, power_A):
8
+ y, x = cuda.grid(2)
9
+
10
+ m, n = power_A.shape
11
+ if x >= n or y >= m:
12
+ return
13
+
14
+ power_A[y, x] = math.pow(A[y, x], int32(power))
15
+
16
+
17
+ def cu_mat_power_binop(A, power, power_A):
18
+ y, x = cuda.grid(2)
19
+
20
+ m, n = power_A.shape
21
+ if x >= n or y >= m:
22
+ return
23
+
24
+ power_A[y, x] = A[y, x] ** power
25
+
26
+
27
+ def vec_pow(r, x, y):
28
+ i = cuda.grid(1)
29
+
30
+ if i < len(r):
31
+ r[i] = pow(x[i], y[i])
32
+
33
+
34
+ def vec_pow_binop(r, x, y):
35
+ i = cuda.grid(1)
36
+
37
+ if i < len(r):
38
+ r[i] = x[i] ** y[i]
39
+
40
+
41
+ def vec_pow_inplace_binop(r, x):
42
+ i = cuda.grid(1)
43
+
44
+ if i < len(r):
45
+ r[i] **= x[i]
46
+
47
+
48
+ def random_complex(N):
49
+ np.random.seed(123)
50
+ return (np.random.random(1) + np.random.random(1) * 1j)
51
+
52
+
53
+ class TestCudaPowi(CUDATestCase):
54
+ def test_powi(self):
55
+ dec = cuda.jit(void(float64[:, :], int8, float64[:, :]))
56
+ kernel = dec(cu_mat_power)
57
+
58
+ power = 2
59
+ A = np.arange(10, dtype=np.float64).reshape(2, 5)
60
+ Aout = np.empty_like(A)
61
+ kernel[1, A.shape](A, power, Aout)
62
+ self.assertTrue(np.allclose(Aout, A ** power))
63
+
64
+ def test_powi_binop(self):
65
+ dec = cuda.jit(void(float64[:, :], int8, float64[:, :]))
66
+ kernel = dec(cu_mat_power_binop)
67
+
68
+ power = 2
69
+ A = np.arange(10, dtype=np.float64).reshape(2, 5)
70
+ Aout = np.empty_like(A)
71
+ kernel[1, A.shape](A, power, Aout)
72
+ self.assertTrue(np.allclose(Aout, A ** power))
73
+
74
+ # Relative tolerance kwarg is provided because 1.0e-7 (the default for
75
+ # assert_allclose) is a bit tight for single precision.
76
+ def _test_cpow(self, dtype, func, rtol=1.0e-7):
77
+ N = 32
78
+ x = random_complex(N).astype(dtype)
79
+ y = random_complex(N).astype(dtype)
80
+ r = np.zeros_like(x)
81
+
82
+ cfunc = cuda.jit(func)
83
+ cfunc[1, N](r, x, y)
84
+ np.testing.assert_allclose(r, x ** y, rtol=rtol)
85
+
86
+ # Checks special cases
87
+ x = np.asarray([0.0j, 1.0j], dtype=dtype)
88
+ y = np.asarray([0.0j, 1.0], dtype=dtype)
89
+ r = np.zeros_like(x)
90
+
91
+ cfunc[1, 2](r, x, y)
92
+ np.testing.assert_allclose(r, x ** y, rtol=rtol)
93
+
94
+ def test_cpow_complex64_pow(self):
95
+ self._test_cpow(np.complex64, vec_pow, rtol=3.0e-7)
96
+
97
+ def test_cpow_complex64_binop(self):
98
+ self._test_cpow(np.complex64, vec_pow_binop, rtol=3.0e-7)
99
+
100
+ def test_cpow_complex128_pow(self):
101
+ self._test_cpow(np.complex128, vec_pow)
102
+
103
+ def test_cpow_complex128_binop(self):
104
+ self._test_cpow(np.complex128, vec_pow_binop)
105
+
106
+ def _test_cpow_inplace_binop(self, dtype, rtol=1.0e-7):
107
+ N = 32
108
+ x = random_complex(N).astype(dtype)
109
+ y = random_complex(N).astype(dtype)
110
+ r = x ** y
111
+
112
+ cfunc = cuda.jit(vec_pow_inplace_binop)
113
+ cfunc[1, N](x, y)
114
+ np.testing.assert_allclose(x, r, rtol=rtol)
115
+
116
+ def test_cpow_complex64_inplace_binop(self):
117
+ self._test_cpow_inplace_binop(np.complex64, rtol=3.0e-7)
118
+
119
+ def test_cpow_complex128_inplace_binop(self):
120
+ self._test_cpow_inplace_binop(np.complex128, rtol=3.0e-7)
121
+
122
+
123
+ if __name__ == '__main__':
124
+ unittest.main()
@@ -0,0 +1,128 @@
1
+ from numba.cuda.testing import CUDATestCase, skip_on_cudasim
2
+ import subprocess
3
+ import sys
4
+ import unittest
5
+
6
+
7
+ cuhello_usecase = """\
8
+ from numba import cuda
9
+
10
+ @cuda.jit
11
+ def cuhello():
12
+ i = cuda.grid(1)
13
+ print(i, 999)
14
+ print(-42)
15
+
16
+ cuhello[2, 3]()
17
+ cuda.synchronize()
18
+ """
19
+
20
+
21
+ printfloat_usecase = """\
22
+ from numba import cuda
23
+
24
+ @cuda.jit
25
+ def printfloat():
26
+ i = cuda.grid(1)
27
+ print(i, 23, 34.75, 321)
28
+
29
+ printfloat[1, 1]()
30
+ cuda.synchronize()
31
+ """
32
+
33
+
34
+ printstring_usecase = """\
35
+ from numba import cuda
36
+
37
+ @cuda.jit
38
+ def printstring():
39
+ i = cuda.grid(1)
40
+ print(i, "hop!", 999)
41
+
42
+ printstring[1, 3]()
43
+ cuda.synchronize()
44
+ """
45
+
46
+ printempty_usecase = """\
47
+ from numba import cuda
48
+
49
+ @cuda.jit
50
+ def printempty():
51
+ print()
52
+
53
+ printempty[1, 1]()
54
+ cuda.synchronize()
55
+ """
56
+
57
+
58
+ print_too_many_usecase = """\
59
+ from numba import cuda
60
+ import numpy as np
61
+
62
+ @cuda.jit
63
+ def print_too_many(r):
64
+ print(r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8], r[9], r[10],
65
+ r[11], r[12], r[13], r[14], r[15], r[16], r[17], r[18], r[19], r[20],
66
+ r[21], r[22], r[23], r[24], r[25], r[26], r[27], r[28], r[29], r[30],
67
+ r[31], r[32])
68
+
69
+ print_too_many[1, 1](np.arange(33))
70
+ cuda.synchronize()
71
+ """
72
+
73
+
74
+ class TestPrint(CUDATestCase):
75
+ # Note that in these tests we generally strip the output to avoid dealing
76
+ # with platform-specific line ending issues, e.g. '\r\n' vs '\n' etc.
77
+
78
+ def run_code(self, code):
79
+ """Runs code in a subprocess and returns the captured output"""
80
+ cmd = [sys.executable, "-c", code]
81
+ cp = subprocess.run(cmd, timeout=60, capture_output=True, check=True)
82
+ return cp.stdout.decode(), cp.stderr.decode()
83
+
84
+ def test_cuhello(self):
85
+ output, _ = self.run_code(cuhello_usecase)
86
+ actual = [line.strip() for line in output.splitlines()]
87
+ expected = ['-42'] * 6 + ['%d 999' % i for i in range(6)]
88
+ # The output of GPU threads is intermingled, but each print()
89
+ # call is still atomic
90
+ self.assertEqual(sorted(actual), expected)
91
+
92
+ def test_printfloat(self):
93
+ output, _ = self.run_code(printfloat_usecase)
94
+ # CUDA and the simulator use different formats for float formatting
95
+ expected_cases = ["0 23 34.750000 321", "0 23 34.75 321"]
96
+ self.assertIn(output.strip(), expected_cases)
97
+
98
+ def test_printempty(self):
99
+ output, _ = self.run_code(printempty_usecase)
100
+ self.assertEqual(output.strip(), "")
101
+
102
+ def test_string(self):
103
+ output, _ = self.run_code(printstring_usecase)
104
+ lines = [line.strip() for line in output.splitlines(True)]
105
+ expected = ['%d hop! 999' % i for i in range(3)]
106
+ self.assertEqual(sorted(lines), expected)
107
+
108
+ @skip_on_cudasim('cudasim can print unlimited output')
109
+ def test_too_many_args(self):
110
+ # Tests that we emit the format string and warn when there are more
111
+ # than 32 arguments, in common with CUDA C/C++ printf - this is due to
112
+ # a limitation in CUDA vprintf, see:
113
+ # https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#limitations
114
+
115
+ output, errors = self.run_code(print_too_many_usecase)
116
+
117
+ # Check that the format string was printed instead of formatted garbage
118
+ expected_fmt_string = ' '.join(['%lld' for _ in range(33)])
119
+ self.assertIn(expected_fmt_string, output)
120
+
121
+ # Check for the expected warning about formatting more than 32 items
122
+ warn_msg = ('CUDA print() cannot print more than 32 items. The raw '
123
+ 'format string will be emitted by the kernel instead.')
124
+ self.assertIn(warn_msg, errors)
125
+
126
+
127
+ if __name__ == '__main__':
128
+ unittest.main()
@@ -0,0 +1,33 @@
1
+ import numpy as np
2
+ from numba import cuda, float32, int32, void
3
+ from numba.cuda.testing import unittest, CUDATestCase
4
+
5
+
6
+ class TestCudaPy2Div(CUDATestCase):
7
+ def test_py2_div_issue(self):
8
+ @cuda.jit(void(float32[:], float32[:], float32[:], int32))
9
+ def preCalc(y, yA, yB, numDataPoints):
10
+ i = cuda.grid(1)
11
+ # k is unused, but may be part of the trigger for the bug this
12
+ # tests for.
13
+ k = i % numDataPoints # noqa: F841
14
+
15
+ ans = float32(1.001 * float32(i))
16
+
17
+ y[i] = ans
18
+ yA[i] = ans * 1.0
19
+ yB[i] = ans / 1.0
20
+
21
+ numDataPoints = 15
22
+
23
+ y = np.zeros(numDataPoints, dtype=np.float32)
24
+ yA = np.zeros(numDataPoints, dtype=np.float32)
25
+ yB = np.zeros(numDataPoints, dtype=np.float32)
26
+ preCalc[1, 15](y, yA, yB, numDataPoints)
27
+
28
+ self.assertTrue(np.all(y == yA))
29
+ self.assertTrue(np.all(y == yB))
30
+
31
+
32
+ if __name__ == '__main__':
33
+ unittest.main()