numba-cuda 0.0.1__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. _numba_cuda_redirector.pth +1 -0
  2. _numba_cuda_redirector.py +74 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +5 -0
  5. numba_cuda/_version.py +19 -0
  6. numba_cuda/numba/cuda/__init__.py +22 -0
  7. numba_cuda/numba/cuda/api.py +526 -0
  8. numba_cuda/numba/cuda/api_util.py +30 -0
  9. numba_cuda/numba/cuda/args.py +77 -0
  10. numba_cuda/numba/cuda/cg.py +62 -0
  11. numba_cuda/numba/cuda/codegen.py +378 -0
  12. numba_cuda/numba/cuda/compiler.py +422 -0
  13. numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
  14. numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
  15. numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
  16. numba_cuda/numba/cuda/cuda_paths.py +258 -0
  17. numba_cuda/numba/cuda/cudadecl.py +806 -0
  18. numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
  19. numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
  20. numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
  21. numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
  22. numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
  23. numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
  24. numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
  25. numba_cuda/numba/cuda/cudadrv/error.py +36 -0
  26. numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
  27. numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
  28. numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
  29. numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
  30. numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
  31. numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
  32. numba_cuda/numba/cuda/cudaimpl.py +1055 -0
  33. numba_cuda/numba/cuda/cudamath.py +140 -0
  34. numba_cuda/numba/cuda/decorators.py +189 -0
  35. numba_cuda/numba/cuda/descriptor.py +33 -0
  36. numba_cuda/numba/cuda/device_init.py +89 -0
  37. numba_cuda/numba/cuda/deviceufunc.py +908 -0
  38. numba_cuda/numba/cuda/dispatcher.py +1057 -0
  39. numba_cuda/numba/cuda/errors.py +59 -0
  40. numba_cuda/numba/cuda/extending.py +7 -0
  41. numba_cuda/numba/cuda/initialize.py +13 -0
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
  43. numba_cuda/numba/cuda/intrinsics.py +198 -0
  44. numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  45. numba_cuda/numba/cuda/kernels/reduction.py +262 -0
  46. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  47. numba_cuda/numba/cuda/libdevice.py +3382 -0
  48. numba_cuda/numba/cuda/libdevicedecl.py +17 -0
  49. numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
  50. numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
  51. numba_cuda/numba/cuda/mathimpl.py +448 -0
  52. numba_cuda/numba/cuda/models.py +48 -0
  53. numba_cuda/numba/cuda/nvvmutils.py +235 -0
  54. numba_cuda/numba/cuda/printimpl.py +86 -0
  55. numba_cuda/numba/cuda/random.py +292 -0
  56. numba_cuda/numba/cuda/simulator/__init__.py +38 -0
  57. numba_cuda/numba/cuda/simulator/api.py +110 -0
  58. numba_cuda/numba/cuda/simulator/compiler.py +9 -0
  59. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
  60. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
  61. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
  62. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
  63. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
  64. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
  65. numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
  66. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
  67. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
  68. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
  69. numba_cuda/numba/cuda/simulator/kernel.py +308 -0
  70. numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
  71. numba_cuda/numba/cuda/simulator/reduction.py +15 -0
  72. numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
  73. numba_cuda/numba/cuda/simulator_init.py +17 -0
  74. numba_cuda/numba/cuda/stubs.py +902 -0
  75. numba_cuda/numba/cuda/target.py +440 -0
  76. numba_cuda/numba/cuda/testing.py +202 -0
  77. numba_cuda/numba/cuda/tests/__init__.py +58 -0
  78. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
  79. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
  80. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
  81. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
  88. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
  89. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
  90. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
  91. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
  92. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
  93. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
  94. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
  95. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
  98. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
  100. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
  101. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
  102. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
  103. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
  104. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
  105. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
  106. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
  107. numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
  108. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
  109. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
  110. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
  111. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
  112. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
  113. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
  115. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
  117. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
  118. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
  120. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
  121. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
  123. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
  124. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
  126. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
  127. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
  129. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
  131. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
  132. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
  133. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
  134. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
  136. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
  137. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
  138. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
  139. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
  141. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
  143. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
  144. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
  145. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
  148. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
  149. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
  150. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
  151. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
  152. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
  153. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
  154. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
  155. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
  156. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
  157. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
  158. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
  159. numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
  161. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
  162. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
  163. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
  164. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
  165. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
  166. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
  167. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
  168. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
  169. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
  170. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
  171. numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
  172. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
  173. numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
  174. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
  175. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
  176. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
  177. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  178. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
  179. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
  180. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
  182. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
  183. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
  184. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
  185. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
  186. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
  187. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
  188. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
  192. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
  193. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
  194. numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
  195. numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
  197. numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  198. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  199. numba_cuda/numba/cuda/tests/data/error.cu +7 -0
  200. numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
  201. numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
  202. numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
  203. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
  204. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
  206. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
  207. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
  208. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
  209. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
  210. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
  211. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
  212. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
  213. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
  214. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
  216. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
  217. numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
  218. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
  219. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
  220. numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
  221. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
  222. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
  223. numba_cuda/numba/cuda/types.py +37 -0
  224. numba_cuda/numba/cuda/ufuncs.py +662 -0
  225. numba_cuda/numba/cuda/vector_types.py +209 -0
  226. numba_cuda/numba/cuda/vectorizers.py +252 -0
  227. numba_cuda-0.0.12.dist-info/LICENSE +25 -0
  228. numba_cuda-0.0.12.dist-info/METADATA +68 -0
  229. numba_cuda-0.0.12.dist-info/RECORD +231 -0
  230. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
  231. numba_cuda-0.0.1.dist-info/METADATA +0 -10
  232. numba_cuda-0.0.1.dist-info/RECORD +0 -5
  233. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,187 @@
1
+ import numpy as np
2
+ from numba.core import types
3
+ from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase
4
+ from numba import cuda
5
+ from numba.cuda import libdevice, compile_ptx
6
+ from numba.cuda.libdevicefuncs import functions, create_signature
7
+
8
+
9
+ def use_sincos(s, c, x):
10
+ i = cuda.grid(1)
11
+
12
+ if i < len(x):
13
+ sr, cr = libdevice.sincos(x[i])
14
+ s[i] = sr
15
+ c[i] = cr
16
+
17
+
18
+ def use_frexp(frac, exp, x):
19
+ i = cuda.grid(1)
20
+
21
+ if i < len(x):
22
+ fracr, expr = libdevice.frexp(x[i])
23
+ frac[i] = fracr
24
+ exp[i] = expr
25
+
26
+
27
+ def use_sad(r, x, y, z):
28
+ i = cuda.grid(1)
29
+
30
+ if i < len(x):
31
+ r[i] = libdevice.sad(x[i], y[i], z[i])
32
+
33
+
34
+ @skip_on_cudasim('Libdevice functions are not supported on cudasim')
35
+ class TestLibdevice(CUDATestCase):
36
+ """
37
+ Some tests of libdevice function wrappers that check the returned values.
38
+
39
+ These are mainly to check that the generation of the implementations
40
+ results in correct typing and lowering for each type of function return
41
+ (e.g. scalar return, UniTuple return, Tuple return, etc.).
42
+ """
43
+
44
+ def test_sincos(self):
45
+ # Tests return of a UniTuple from a libdevice function
46
+ arr = np.arange(100, dtype=np.float64)
47
+ sres = np.zeros_like(arr)
48
+ cres = np.zeros_like(arr)
49
+
50
+ cufunc = cuda.jit(use_sincos)
51
+ cufunc[4, 32](sres, cres, arr)
52
+
53
+ np.testing.assert_allclose(np.cos(arr), cres)
54
+ np.testing.assert_allclose(np.sin(arr), sres)
55
+
56
+ def test_frexp(self):
57
+ # Test return of a Tuple from a libdevice function
58
+ arr = np.linspace(start=1.0, stop=10.0, num=100, dtype=np.float64)
59
+ fracres = np.zeros_like(arr)
60
+ expres = np.zeros(shape=arr.shape, dtype=np.int32)
61
+
62
+ cufunc = cuda.jit(use_frexp)
63
+ cufunc[4, 32](fracres, expres, arr)
64
+
65
+ frac_expect, exp_expect = np.frexp(arr)
66
+
67
+ np.testing.assert_array_equal(frac_expect, fracres)
68
+ np.testing.assert_array_equal(exp_expect, expres)
69
+
70
+ def test_sad(self):
71
+ # Test return of a scalar from a libdevice function
72
+ x = np.arange(0, 200, 2)
73
+ y = np.arange(50, 150)
74
+ z = np.arange(15, 115)
75
+ r = np.zeros_like(x)
76
+
77
+ cufunc = cuda.jit(use_sad)
78
+ cufunc[4, 32](r, x, y, z)
79
+
80
+ np.testing.assert_array_equal(np.abs(x - y) + z, r)
81
+
82
+
83
+ # A template for generating tests of compiling calls to libdevice functions.
84
+ # The purpose of the call and assignment of the return variables is to ensure
85
+ # the actual function implementations are not thrown away resulting in a PTX
86
+ # implementation that only contains the ret instruction - this may hide certain
87
+ # errors.
88
+ function_template = """\
89
+ from numba.cuda import libdevice
90
+
91
+ def pyfunc(%(pyargs)s):
92
+ ret = libdevice.%(func)s(%(funcargs)s)
93
+ %(retvars)s = ret
94
+ """
95
+
96
+
97
+ def make_test_call(libname):
98
+ """
99
+ Generates a test function for each libdevice function.
100
+ """
101
+
102
+ def _test_call_functions(self):
103
+ # Strip off '__nv_' from libdevice name to get Python name
104
+ apiname = libname[5:]
105
+ apifunc = getattr(libdevice, apiname)
106
+ retty, args = functions[libname]
107
+ sig = create_signature(retty, args)
108
+
109
+ # Construct arguments to the libdevice function. These are all
110
+ # non-pointer arguments to the underlying bitcode function.
111
+ funcargs = ", ".join(['a%d' % i for i, arg in enumerate(args) if not
112
+ arg.is_ptr])
113
+
114
+ # Arguments to the Python function (`pyfunc` in the template above) are
115
+ # the arguments to the libdevice function, plus as many extra arguments
116
+ # as there are in the return type of the libdevice function - one for
117
+ # scalar-valued returns, or the length of the tuple for tuple-valued
118
+ # returns.
119
+ if isinstance(sig.return_type, (types.Tuple, types.UniTuple)):
120
+ # Start with the parameters for the return values
121
+ pyargs = ", ".join(['r%d' % i for i in
122
+ range(len(sig.return_type))])
123
+ # Add the parameters for the argument values
124
+ pyargs += ", " + funcargs
125
+ # Generate the unpacking of the return value from the libdevice
126
+ # function into the Python function return values (`r0`, `r1`,
127
+ # etc.).
128
+ retvars = ", ".join(['r%d[0]' % i for i in
129
+ range(len(sig.return_type))])
130
+ else:
131
+ # Scalar return is a more straightforward case
132
+ pyargs = "r0, " + funcargs
133
+ retvars = "r0[0]"
134
+
135
+ # Create the string containing the function to compile
136
+ d = { 'func': apiname,
137
+ 'pyargs': pyargs,
138
+ 'funcargs': funcargs,
139
+ 'retvars': retvars }
140
+ code = function_template % d
141
+
142
+ # Convert the string to a Python function
143
+ locals = {}
144
+ exec(code, globals(), locals)
145
+ pyfunc = locals['pyfunc']
146
+
147
+ # Compute the signature for compilation. This mirrors the creation of
148
+ # arguments to the Python function above.
149
+ pyargs = [ arg.ty for arg in args if not arg.is_ptr ]
150
+ if isinstance(sig.return_type, (types.Tuple, types.UniTuple)):
151
+ pyreturns = [ret[::1] for ret in sig.return_type]
152
+ pyargs = pyreturns + pyargs
153
+ else:
154
+ pyargs.insert(0, sig.return_type[::1])
155
+
156
+ pyargs = tuple(pyargs)
157
+ ptx, resty = compile_ptx(pyfunc, pyargs)
158
+
159
+ # If the function body was discarded by optimization (therefore making
160
+ # the test a bit weak), there won't be any loading of parameters -
161
+ # ensure that a load from parameters occurs somewhere in the PTX
162
+ self.assertIn('ld.param', ptx)
163
+
164
+ # Returning the result (through a passed-in array) should also require
165
+ # a store to global memory, so check for at least one of those too.
166
+ self.assertIn('st.global', ptx)
167
+
168
+ return _test_call_functions
169
+
170
+
171
+ @skip_on_cudasim('Compilation to PTX is not supported on cudasim')
172
+ class TestLibdeviceCompilation(unittest.TestCase):
173
+ """
174
+ Class for holding all tests of compiling calls to libdevice functions. We
175
+ generate the actual tests in this class (as opposed to using subTest and
176
+ one test within this class) because there are a lot of tests, and it makes
177
+ the test suite appear frozen to test them all as subTests in one test.
178
+ """
179
+
180
+
181
+ for libname in functions:
182
+ setattr(TestLibdeviceCompilation, 'test_%s' % libname,
183
+ make_test_call(libname))
184
+
185
+
186
+ if __name__ == '__main__':
187
+ unittest.main()
@@ -0,0 +1,199 @@
1
+ from numba import cuda, float32, int32
2
+ from numba.core.errors import NumbaInvalidConfigWarning
3
+ from numba.cuda.testing import CUDATestCase, skip_on_cudasim
4
+ from numba.tests.support import ignore_internal_warnings
5
+ import re
6
+ import unittest
7
+ import warnings
8
+
9
+
10
+ @skip_on_cudasim('Simulator does not produce lineinfo')
11
+ class TestCudaLineInfo(CUDATestCase):
12
+ def _loc_directive_regex(self):
13
+ # This is used in several tests
14
+
15
+ pat = (
16
+ r'\.loc' # .loc directive beginning
17
+ r'\s+[0-9]+' # whitespace then file index
18
+ r'\s+[0-9]+' # whitespace then line number
19
+ r'\s+[0-9]+' # whitespace then column position
20
+ )
21
+ return re.compile(pat)
22
+
23
+ def _check(self, fn, sig, expect):
24
+ fn.compile(sig)
25
+ llvm = fn.inspect_llvm(sig)
26
+ ptx = fn.inspect_asm(sig)
27
+ assertfn = self.assertIsNotNone if expect else self.assertIsNone
28
+
29
+ # DICompileUnit debug info metadata should all be of the
30
+ # DebugDirectivesOnly kind, and not the FullDebug kind
31
+ pat = (
32
+ r'!DICompileUnit\(.*' # Opening of DICompileUnit metadata. Since
33
+ # the order of attributes is not
34
+ # guaranteed, we need to match arbitrarily
35
+ # afterwards.
36
+ r'emissionKind:\s+' # The emissionKind attribute followed by
37
+ # whitespace.
38
+ r'DebugDirectivesOnly' # The correct emissionKind.
39
+ )
40
+ match = re.compile(pat).search(llvm)
41
+ assertfn(match, msg=ptx)
42
+
43
+ pat = (
44
+ r'!DICompileUnit\(.*' # Same as the pattern above, but for the
45
+ r'emissionKind:\s+' # incorrect FullDebug emissionKind.
46
+ r'FullDebug' #
47
+ )
48
+ match = re.compile(pat).search(llvm)
49
+ self.assertIsNone(match, msg=ptx)
50
+
51
+ # The name of this file should be present in the line mapping
52
+ # if lineinfo was propagated through correctly.
53
+ pat = (
54
+ r'\.file' # .file directive beginning
55
+ r'\s+[0-9]+\s+' # file number surrounded by whitespace
56
+ r'".*test_lineinfo.py"' # filename in quotes, ignoring full path
57
+ )
58
+ match = re.compile(pat).search(ptx)
59
+ assertfn(match, msg=ptx)
60
+
61
+ # .loc directives should be present in the ptx
62
+ self._loc_directive_regex().search(ptx)
63
+ assertfn(match, msg=ptx)
64
+
65
+ # Debug info sections should not be present when only lineinfo is
66
+ # generated
67
+ pat = (
68
+ r'\.section\s+' # .section directive beginning
69
+ r'\.debug_info' # Section named ".debug_info"
70
+ )
71
+ match = re.compile(pat).search(ptx)
72
+ self.assertIsNone(match, msg=ptx)
73
+
74
+ def test_no_lineinfo_in_asm(self):
75
+ @cuda.jit(lineinfo=False)
76
+ def foo(x):
77
+ x[0] = 1
78
+
79
+ self._check(foo, sig=(int32[:],), expect=False)
80
+
81
+ def test_lineinfo_in_asm(self):
82
+ @cuda.jit(lineinfo=True)
83
+ def foo(x):
84
+ x[0] = 1
85
+
86
+ self._check(foo, sig=(int32[:],), expect=True)
87
+
88
+ def test_lineinfo_maintains_error_model(self):
89
+ sig = (float32[::1], float32[::1])
90
+
91
+ @cuda.jit(sig, lineinfo=True)
92
+ def divide_kernel(x, y):
93
+ x[0] /= y[0]
94
+
95
+ llvm = divide_kernel.inspect_llvm(sig)
96
+
97
+ # When the error model is Python, the device function returns 1 to
98
+ # signal an exception (e.g. divide by zero) has occurred. When the
99
+ # error model is the default NumPy one (as it should be when only
100
+ # lineinfo is enabled) the device function always returns 0.
101
+ self.assertNotIn('ret i32 1', llvm)
102
+
103
+ def test_no_lineinfo_in_device_function(self):
104
+ # Ensure that no lineinfo is generated in device functions by default.
105
+ @cuda.jit
106
+ def callee(x):
107
+ x[0] += 1
108
+
109
+ @cuda.jit
110
+ def caller(x):
111
+ x[0] = 1
112
+ callee(x)
113
+
114
+ sig = (int32[:],)
115
+ self._check(caller, sig=sig, expect=False)
116
+
117
+ def test_lineinfo_in_device_function(self):
118
+ # First we define a device function / kernel pair and run the usual
119
+ # checks on the generated LLVM and PTX.
120
+
121
+ @cuda.jit(lineinfo=True)
122
+ def callee(x):
123
+ x[0] += 1
124
+
125
+ @cuda.jit(lineinfo=True)
126
+ def caller(x):
127
+ x[0] = 1
128
+ callee(x)
129
+
130
+ sig = (int32[:],)
131
+ self._check(caller, sig=sig, expect=True)
132
+
133
+ # Now we can check the PTX of the device function specifically.
134
+
135
+ ptx = caller.inspect_asm(sig)
136
+ ptxlines = ptx.splitlines()
137
+
138
+ # Check that there is no device function in the PTX
139
+
140
+ # A line beginning with ".weak .func" that identifies a device function
141
+ devfn_start = re.compile(r'^\.weak\s+\.func')
142
+
143
+ for line in ptxlines:
144
+ if devfn_start.match(line) is not None:
145
+ self.fail(f"Found device function in PTX:\n\n{ptx}")
146
+
147
+ # Scan for .loc directives that refer to an inlined device function
148
+
149
+ loc_directive = self._loc_directive_regex()
150
+ found = False
151
+
152
+ for line in ptxlines:
153
+ if loc_directive.search(line) is not None:
154
+ if 'inlined_at' in line:
155
+ found = True
156
+ break
157
+
158
+ if not found:
159
+ self.fail(f'No .loc directive with inlined_at info found'
160
+ f'in:\n\n{ptx}')
161
+
162
+ # We also inspect the LLVM to ensure that there's debug info for each
163
+ # subprogram (function). A lightweight way to check this is to ensure
164
+ # that we have as many DISubprograms as we expect.
165
+
166
+ llvm = caller.inspect_llvm(sig)
167
+ subprograms = 0
168
+ for line in llvm.splitlines():
169
+ if 'distinct !DISubprogram' in line:
170
+ subprograms += 1
171
+
172
+ # One DISubprogram for each of:
173
+ # - The kernel wrapper
174
+ # - The caller
175
+ # - The callee
176
+ expected_subprograms = 3
177
+
178
+ self.assertEqual(subprograms, expected_subprograms,
179
+ f'"Expected {expected_subprograms} DISubprograms; '
180
+ f'got {subprograms}')
181
+
182
+ def test_debug_and_lineinfo_warning(self):
183
+ with warnings.catch_warnings(record=True) as w:
184
+ ignore_internal_warnings()
185
+
186
+ # We pass opt=False to prevent the warning about opt and debug
187
+ # occurring as well
188
+ @cuda.jit(debug=True, lineinfo=True, opt=False)
189
+ def f():
190
+ pass
191
+
192
+ self.assertEqual(len(w), 1)
193
+ self.assertEqual(w[0].category, NumbaInvalidConfigWarning)
194
+ self.assertIn('debug and lineinfo are mutually exclusive',
195
+ str(w[0].message))
196
+
197
+
198
+ if __name__ == '__main__':
199
+ unittest.main()
@@ -0,0 +1,164 @@
1
+ import numpy as np
2
+
3
+ from numba import cuda, int32, complex128, void
4
+ from numba.core import types
5
+ from numba.core.errors import TypingError
6
+ from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
7
+ from .extensions_usecases import test_struct_model_type, TestStruct
8
+
9
+
10
+ def culocal(A, B):
11
+ C = cuda.local.array(1000, dtype=int32)
12
+ for i in range(C.shape[0]):
13
+ C[i] = A[i]
14
+ for i in range(C.shape[0]):
15
+ B[i] = C[i]
16
+
17
+
18
+ def culocalcomplex(A, B):
19
+ C = cuda.local.array(100, dtype=complex128)
20
+ for i in range(C.shape[0]):
21
+ C[i] = A[i]
22
+ for i in range(C.shape[0]):
23
+ B[i] = C[i]
24
+
25
+
26
+ def culocal1tuple(A, B):
27
+ C = cuda.local.array((5,), dtype=int32)
28
+ for i in range(C.shape[0]):
29
+ C[i] = A[i]
30
+ for i in range(C.shape[0]):
31
+ B[i] = C[i]
32
+
33
+
34
+ @skip_on_cudasim('PTX inspection not available in cudasim')
35
+ class TestCudaLocalMem(CUDATestCase):
36
+ def test_local_array(self):
37
+ sig = (int32[:], int32[:])
38
+ jculocal = cuda.jit(sig)(culocal)
39
+ self.assertTrue('.local' in jculocal.inspect_asm(sig))
40
+ A = np.arange(1000, dtype='int32')
41
+ B = np.zeros_like(A)
42
+ jculocal[1, 1](A, B)
43
+ self.assertTrue(np.all(A == B))
44
+
45
+ def test_local_array_1_tuple(self):
46
+ """Ensure that local arrays can be constructed with 1-tuple shape
47
+ """
48
+ jculocal = cuda.jit('void(int32[:], int32[:])')(culocal1tuple)
49
+ # Don't check if .local is in the ptx because the optimizer
50
+ # may reduce it to registers.
51
+ A = np.arange(5, dtype='int32')
52
+ B = np.zeros_like(A)
53
+ jculocal[1, 1](A, B)
54
+ self.assertTrue(np.all(A == B))
55
+
56
+ def test_local_array_complex(self):
57
+ sig = 'void(complex128[:], complex128[:])'
58
+ jculocalcomplex = cuda.jit(sig)(culocalcomplex)
59
+ A = (np.arange(100, dtype='complex128') - 1) / 2j
60
+ B = np.zeros_like(A)
61
+ jculocalcomplex[1, 1](A, B)
62
+ self.assertTrue(np.all(A == B))
63
+
64
+ def check_dtype(self, f, dtype):
65
+ # Find the typing of the dtype argument to cuda.local.array
66
+ annotation = next(iter(f.overloads.values()))._type_annotation
67
+ l_dtype = annotation.typemap['l'].dtype
68
+ # Ensure that the typing is correct
69
+ self.assertEqual(l_dtype, dtype)
70
+
71
+ @skip_on_cudasim("Can't check typing in simulator")
72
+ def test_numba_dtype(self):
73
+ # Check that Numba types can be used as the dtype of a local array
74
+ @cuda.jit(void(int32[::1]))
75
+ def f(x):
76
+ l = cuda.local.array(10, dtype=int32)
77
+ l[0] = x[0]
78
+ x[0] = l[0]
79
+
80
+ self.check_dtype(f, int32)
81
+
82
+ @skip_on_cudasim("Can't check typing in simulator")
83
+ def test_numpy_dtype(self):
84
+ # Check that NumPy types can be used as the dtype of a local array
85
+ @cuda.jit(void(int32[::1]))
86
+ def f(x):
87
+ l = cuda.local.array(10, dtype=np.int32)
88
+ l[0] = x[0]
89
+ x[0] = l[0]
90
+
91
+ self.check_dtype(f, int32)
92
+
93
+ @skip_on_cudasim("Can't check typing in simulator")
94
+ def test_string_dtype(self):
95
+ # Check that strings can be used to specify the dtype of a local array
96
+ @cuda.jit(void(int32[::1]))
97
+ def f(x):
98
+ l = cuda.local.array(10, dtype='int32')
99
+ l[0] = x[0]
100
+ x[0] = l[0]
101
+
102
+ self.check_dtype(f, int32)
103
+
104
+ @skip_on_cudasim("Can't check typing in simulator")
105
+ def test_invalid_string_dtype(self):
106
+ # Check that strings of invalid dtypes cause a typing error
107
+ re = ".*Invalid NumPy dtype specified: 'int33'.*"
108
+ with self.assertRaisesRegex(TypingError, re):
109
+ @cuda.jit(void(int32[::1]))
110
+ def f(x):
111
+ l = cuda.local.array(10, dtype='int33')
112
+ l[0] = x[0]
113
+ x[0] = l[0]
114
+
115
+ def test_type_with_struct_data_model(self):
116
+ @cuda.jit(void(test_struct_model_type[::1]))
117
+ def f(x):
118
+ l = cuda.local.array(10, dtype=test_struct_model_type)
119
+ l[0] = x[0]
120
+ x[0] = l[0]
121
+
122
+ self.check_dtype(f, test_struct_model_type)
123
+
124
+ def test_struct_model_type_arr(self):
125
+ @cuda.jit(void(int32[::1], int32[::1]))
126
+ def f(outx, outy):
127
+ # Test creation
128
+ arr = cuda.local.array(10, dtype=test_struct_model_type)
129
+ # Test set to arr
130
+ for i in range(len(arr)):
131
+ obj = TestStruct(int32(i), int32(i * 2))
132
+ arr[i] = obj
133
+ # Test get from arr
134
+ for i in range(len(arr)):
135
+ outx[i] = arr[i].x
136
+ outy[i] = arr[i].y
137
+
138
+ arrx = np.array((10,), dtype="int32")
139
+ arry = np.array((10,), dtype="int32")
140
+
141
+ f[1, 1](arrx, arry)
142
+
143
+ for i, x in enumerate(arrx):
144
+ self.assertEqual(x, i)
145
+ for i, y in enumerate(arry):
146
+ self.assertEqual(y, i * 2)
147
+
148
+ def _check_local_array_size_fp16(self, shape, expected, ty):
149
+ @cuda.jit
150
+ def s(a):
151
+ arr = cuda.local.array(shape, dtype=ty)
152
+ a[0] = arr.size
153
+
154
+ result = np.zeros(1, dtype=np.float16)
155
+ s[1, 1](result)
156
+ self.assertEqual(result[0], expected)
157
+
158
+ def test_issue_fp16_support(self):
159
+ self._check_local_array_size_fp16(2, 2, types.float16)
160
+ self._check_local_array_size_fp16(2, 2, np.float16)
161
+
162
+
163
+ if __name__ == '__main__':
164
+ unittest.main()
@@ -0,0 +1,37 @@
1
+ from numba import float64, uint32
2
+ from numba.cuda.compiler import compile_ptx
3
+ from numba.cuda.testing import skip_on_cudasim, unittest
4
+
5
+
6
+ @skip_on_cudasim('Compilation unsupported in the simulator')
7
+ class TestCudaMandel(unittest.TestCase):
8
+ def test_mandel(self):
9
+ """Just make sure we can compile this
10
+ """
11
+
12
+ def mandel(tid, min_x, max_x, min_y, max_y, width, height, iters):
13
+ pixel_size_x = (max_x - min_x) / width
14
+ pixel_size_y = (max_y - min_y) / height
15
+
16
+ x = tid % width
17
+ y = tid / width
18
+
19
+ real = min_x + x * pixel_size_x
20
+ imag = min_y + y * pixel_size_y
21
+
22
+ c = complex(real, imag)
23
+ z = 0.0j
24
+
25
+ for i in range(iters):
26
+ z = z * z + c
27
+ if (z.real * z.real + z.imag * z.imag) >= 4:
28
+ return i
29
+ return iters
30
+
31
+ args = (uint32, float64, float64, float64, float64,
32
+ uint32, uint32, uint32)
33
+ compile_ptx(mandel, args, device=True)
34
+
35
+
36
+ if __name__ == '__main__':
37
+ unittest.main()