numba-cuda 0.0.0__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. _numba_cuda_redirector.pth +1 -0
  2. _numba_cuda_redirector.py +74 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +5 -0
  5. numba_cuda/_version.py +19 -0
  6. numba_cuda/numba/cuda/__init__.py +22 -0
  7. numba_cuda/numba/cuda/api.py +526 -0
  8. numba_cuda/numba/cuda/api_util.py +30 -0
  9. numba_cuda/numba/cuda/args.py +77 -0
  10. numba_cuda/numba/cuda/cg.py +62 -0
  11. numba_cuda/numba/cuda/codegen.py +378 -0
  12. numba_cuda/numba/cuda/compiler.py +422 -0
  13. numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
  14. numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
  15. numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
  16. numba_cuda/numba/cuda/cuda_paths.py +258 -0
  17. numba_cuda/numba/cuda/cudadecl.py +806 -0
  18. numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
  19. numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
  20. numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
  21. numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
  22. numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
  23. numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
  24. numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
  25. numba_cuda/numba/cuda/cudadrv/error.py +36 -0
  26. numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
  27. numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
  28. numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
  29. numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
  30. numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
  31. numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
  32. numba_cuda/numba/cuda/cudaimpl.py +1055 -0
  33. numba_cuda/numba/cuda/cudamath.py +140 -0
  34. numba_cuda/numba/cuda/decorators.py +189 -0
  35. numba_cuda/numba/cuda/descriptor.py +33 -0
  36. numba_cuda/numba/cuda/device_init.py +89 -0
  37. numba_cuda/numba/cuda/deviceufunc.py +908 -0
  38. numba_cuda/numba/cuda/dispatcher.py +1057 -0
  39. numba_cuda/numba/cuda/errors.py +59 -0
  40. numba_cuda/numba/cuda/extending.py +7 -0
  41. numba_cuda/numba/cuda/initialize.py +13 -0
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
  43. numba_cuda/numba/cuda/intrinsics.py +198 -0
  44. numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  45. numba_cuda/numba/cuda/kernels/reduction.py +262 -0
  46. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  47. numba_cuda/numba/cuda/libdevice.py +3382 -0
  48. numba_cuda/numba/cuda/libdevicedecl.py +17 -0
  49. numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
  50. numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
  51. numba_cuda/numba/cuda/mathimpl.py +448 -0
  52. numba_cuda/numba/cuda/models.py +48 -0
  53. numba_cuda/numba/cuda/nvvmutils.py +235 -0
  54. numba_cuda/numba/cuda/printimpl.py +86 -0
  55. numba_cuda/numba/cuda/random.py +292 -0
  56. numba_cuda/numba/cuda/simulator/__init__.py +38 -0
  57. numba_cuda/numba/cuda/simulator/api.py +110 -0
  58. numba_cuda/numba/cuda/simulator/compiler.py +9 -0
  59. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
  60. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
  61. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
  62. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
  63. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
  64. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
  65. numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
  66. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
  67. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
  68. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
  69. numba_cuda/numba/cuda/simulator/kernel.py +308 -0
  70. numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
  71. numba_cuda/numba/cuda/simulator/reduction.py +15 -0
  72. numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
  73. numba_cuda/numba/cuda/simulator_init.py +17 -0
  74. numba_cuda/numba/cuda/stubs.py +902 -0
  75. numba_cuda/numba/cuda/target.py +440 -0
  76. numba_cuda/numba/cuda/testing.py +202 -0
  77. numba_cuda/numba/cuda/tests/__init__.py +58 -0
  78. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
  79. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
  80. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
  81. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
  88. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
  89. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
  90. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
  91. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
  92. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
  93. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
  94. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
  95. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
  98. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
  100. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
  101. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
  102. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
  103. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
  104. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
  105. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
  106. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
  107. numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
  108. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
  109. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
  110. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
  111. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
  112. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
  113. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
  115. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
  117. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
  118. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
  120. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
  121. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
  123. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
  124. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
  126. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
  127. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
  129. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
  131. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
  132. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
  133. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
  134. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
  136. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
  137. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
  138. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
  139. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
  141. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
  143. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
  144. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
  145. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
  148. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
  149. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
  150. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
  151. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
  152. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
  153. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
  154. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
  155. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
  156. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
  157. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
  158. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
  159. numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
  161. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
  162. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
  163. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
  164. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
  165. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
  166. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
  167. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
  168. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
  169. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
  170. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
  171. numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
  172. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
  173. numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
  174. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
  175. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
  176. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
  177. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  178. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
  179. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
  180. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
  182. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
  183. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
  184. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
  185. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
  186. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
  187. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
  188. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
  192. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
  193. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
  194. numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
  195. numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
  197. numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  198. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  199. numba_cuda/numba/cuda/tests/data/error.cu +7 -0
  200. numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
  201. numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
  202. numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
  203. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
  204. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
  206. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
  207. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
  208. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
  209. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
  210. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
  211. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
  212. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
  213. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
  214. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
  216. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
  217. numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
  218. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
  219. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
  220. numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
  221. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
  222. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
  223. numba_cuda/numba/cuda/types.py +37 -0
  224. numba_cuda/numba/cuda/ufuncs.py +662 -0
  225. numba_cuda/numba/cuda/vector_types.py +209 -0
  226. numba_cuda/numba/cuda/vectorizers.py +252 -0
  227. numba_cuda-0.0.12.dist-info/LICENSE +25 -0
  228. numba_cuda-0.0.12.dist-info/METADATA +68 -0
  229. numba_cuda-0.0.12.dist-info/RECORD +231 -0
  230. {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
  231. numba_cuda-0.0.0.dist-info/METADATA +0 -6
  232. numba_cuda-0.0.0.dist-info/RECORD +0 -5
  233. {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,257 @@
1
+ import numpy as np
2
+
3
+ from numba.cuda import compile_ptx
4
+ from numba.core.types import f2, i1, i2, i4, i8, u1, u2, u4, u8
5
+ from numba import cuda
6
+ from numba.core import types
7
+ from numba.cuda.testing import (CUDATestCase, skip_on_cudasim,
8
+ skip_unless_cc_53)
9
+ from numba.types import float16, float32
10
+ import itertools
11
+ import unittest
12
+
13
+
14
+ def native_cast(x):
15
+ return float(x)
16
+
17
+
18
+ def to_int8(x):
19
+ return np.int8(x)
20
+
21
+
22
+ def to_int16(x):
23
+ return np.int16(x)
24
+
25
+
26
+ def to_int32(x):
27
+ return np.int32(x)
28
+
29
+
30
+ def to_int64(x):
31
+ return np.int64(x)
32
+
33
+
34
+ def to_uint8(x):
35
+ return np.uint8(x)
36
+
37
+
38
+ def to_uint16(x):
39
+ return np.uint16(x)
40
+
41
+
42
+ def to_uint32(x):
43
+ return types.uint32(x)
44
+
45
+
46
+ def to_uint64(x):
47
+ return types.uint64(x)
48
+
49
+
50
+ def to_float16(x):
51
+ # When division and operators on float16 types are supported, this should
52
+ # be changed to match the implementation in to_float32.
53
+ return (np.float16(x) * np.float16(0.5))
54
+
55
+
56
+ def to_float32(x):
57
+ return np.float32(x) / np.float32(2)
58
+
59
+
60
+ def to_float64(x):
61
+ return np.float64(x) / np.float64(2)
62
+
63
+
64
+ def to_complex64(x):
65
+ return np.complex64(x)
66
+
67
+
68
+ def to_complex128(x):
69
+ return np.complex128(x)
70
+
71
+
72
+ # Since multiplication of float16 is not supported via the operator * on
73
+ # float16s yet, and the host does not implement cuda.fp16.*, we need two
74
+ # versions of the following functions:
75
+ #
76
+ # - The device version uses cuda.fp16.hmul
77
+ # - The host version uses the * operator
78
+
79
+ def cuda_int_literal_to_float16(x):
80
+ # Note that we need to use `2` and not `np.float16(2)` to ensure that this
81
+ # types as a literal int and not a const float16.
82
+ return cuda.fp16.hmul(np.float16(x), 2)
83
+
84
+
85
+ def reference_int_literal_to_float16(x):
86
+ return np.float16(x) * np.float16(2)
87
+
88
+
89
+ def cuda_float_literal_to_float16(x):
90
+ # Note that `2.5` types as a const float64 and not a literal float, but
91
+ # this case is provided in case that changes in future.
92
+ return cuda.fp16.hmul(np.float16(x), 2.5)
93
+
94
+
95
+ def reference_float_literal_to_float16(x):
96
+ return np.float16(x) * np.float16(2.5)
97
+
98
+
99
+ class TestCasting(CUDATestCase):
100
+ def _create_wrapped(self, pyfunc, intype, outtype):
101
+ wrapped_func = cuda.jit(device=True)(pyfunc)
102
+
103
+ @cuda.jit
104
+ def cuda_wrapper_fn(arg, res):
105
+ res[0] = wrapped_func(arg[0])
106
+
107
+ def wrapper_fn(arg):
108
+ argarray = np.zeros(1, dtype=intype)
109
+ argarray[0] = arg
110
+ resarray = np.zeros(1, dtype=outtype)
111
+ cuda_wrapper_fn[1, 1](argarray, resarray)
112
+ return resarray[0]
113
+
114
+ return wrapper_fn
115
+
116
+ @skip_unless_cc_53
117
+ def test_float_to_int(self):
118
+ pyfuncs = (to_int8, to_int16, to_int32, to_int64)
119
+ totys = (np.int8, np.int16, np.int32, np.int64)
120
+ fromtys = (np.float16, np.float32, np.float64)
121
+
122
+ for pyfunc, toty in zip(pyfuncs, totys):
123
+ for fromty in fromtys:
124
+ with self.subTest(fromty=fromty, toty=toty):
125
+ cfunc = self._create_wrapped(pyfunc, fromty, toty)
126
+ self.assertEqual(cfunc(12.3), pyfunc(12.3))
127
+ self.assertEqual(cfunc(12.3), int(12.3))
128
+ self.assertEqual(cfunc(-12.3), pyfunc(-12.3))
129
+ self.assertEqual(cfunc(-12.3), int(-12.3))
130
+
131
+ @skip_on_cudasim('Compilation unsupported in the simulator')
132
+ def test_float16_to_int_ptx(self):
133
+ pyfuncs = (to_int8, to_int16, to_int32, to_int64)
134
+ sizes = (8, 16, 32, 64)
135
+
136
+ for pyfunc, size in zip(pyfuncs, sizes):
137
+ ptx, _ = compile_ptx(pyfunc, (f2,), device=True)
138
+ self.assertIn(f"cvt.rni.s{size}.f16", ptx)
139
+
140
+ @skip_unless_cc_53
141
+ def test_float_to_uint(self):
142
+ pyfuncs = (to_int8, to_int16, to_int32, to_int64)
143
+ totys = (np.uint8, np.uint16, np.uint32, np.uint64)
144
+ fromtys = (np.float16, np.float32, np.float64)
145
+
146
+ for pyfunc, toty in zip(pyfuncs, totys):
147
+ for fromty in fromtys:
148
+ with self.subTest(fromty=fromty, toty=toty):
149
+ cfunc = self._create_wrapped(pyfunc, fromty, toty)
150
+ self.assertEqual(cfunc(12.3), pyfunc(12.3))
151
+ self.assertEqual(cfunc(12.3), int(12.3))
152
+
153
+ @skip_on_cudasim('Compilation unsupported in the simulator')
154
+ def test_float16_to_uint_ptx(self):
155
+ pyfuncs = (to_uint8, to_uint16, to_uint32, to_uint64)
156
+ sizes = (8, 16, 32, 64)
157
+
158
+ for pyfunc, size in zip(pyfuncs, sizes):
159
+ ptx, _ = compile_ptx(pyfunc, (f2,), device=True)
160
+ self.assertIn(f"cvt.rni.u{size}.f16", ptx)
161
+
162
+ @skip_unless_cc_53
163
+ def test_int_to_float(self):
164
+ pyfuncs = (to_float16, to_float32, to_float64)
165
+ totys = (np.float16, np.float32, np.float64)
166
+
167
+ for pyfunc, toty in zip(pyfuncs, totys):
168
+ with self.subTest(toty=toty):
169
+ cfunc = self._create_wrapped(pyfunc, np.int64, toty)
170
+ self.assertEqual(cfunc(321), pyfunc(321))
171
+
172
+ @skip_unless_cc_53
173
+ def test_literal_to_float16(self):
174
+ cudafuncs = (cuda_int_literal_to_float16,
175
+ cuda_float_literal_to_float16)
176
+ hostfuncs = (reference_int_literal_to_float16,
177
+ reference_float_literal_to_float16)
178
+
179
+ for cudafunc, hostfunc in zip(cudafuncs, hostfuncs):
180
+ with self.subTest(func=cudafunc):
181
+ cfunc = self._create_wrapped(cudafunc, np.float16, np.float16)
182
+ self.assertEqual(cfunc(321), hostfunc(321))
183
+
184
+ @skip_on_cudasim('Compilation unsupported in the simulator')
185
+ def test_int_to_float16_ptx(self):
186
+ fromtys = (i1, i2, i4, i8)
187
+ sizes = (8, 16, 32, 64)
188
+
189
+ for ty, size in zip(fromtys, sizes):
190
+ ptx, _ = compile_ptx(to_float16, (ty,), device=True)
191
+ self.assertIn(f"cvt.rn.f16.s{size}", ptx)
192
+
193
+ @skip_on_cudasim('Compilation unsupported in the simulator')
194
+ def test_uint_to_float16_ptx(self):
195
+ fromtys = (u1, u2, u4, u8)
196
+ sizes = (8, 16, 32, 64)
197
+
198
+ for ty, size in zip(fromtys, sizes):
199
+ ptx, _ = compile_ptx(to_float16, (ty,), device=True)
200
+ self.assertIn(f"cvt.rn.f16.u{size}", ptx)
201
+
202
+ @skip_unless_cc_53
203
+ def test_float_to_float(self):
204
+ pyfuncs = (to_float16, to_float32, to_float64)
205
+ tys = (np.float16, np.float32, np.float64)
206
+
207
+ for (pyfunc, fromty), toty in itertools.product(zip(pyfuncs, tys), tys):
208
+ with self.subTest(fromty=fromty, toty=toty):
209
+ cfunc = self._create_wrapped(pyfunc, fromty, toty)
210
+ # For this test we cannot use the pyfunc for comparison because
211
+ # the CUDA target doesn't yet implement division (or operators)
212
+ # for float16 values, so we test by comparing with the computed
213
+ # expression instead.
214
+ np.testing.assert_allclose(cfunc(12.3),
215
+ toty(12.3) / toty(2), rtol=0.0003)
216
+ np.testing.assert_allclose(cfunc(-12.3),
217
+ toty(-12.3) / toty(2), rtol=0.0003)
218
+
219
+ @skip_on_cudasim('Compilation unsupported in the simulator')
220
+ def test_float16_to_float_ptx(self):
221
+ pyfuncs = (to_float32, to_float64)
222
+ postfixes = ("f32", "f64")
223
+
224
+ for pyfunc, postfix in zip(pyfuncs, postfixes):
225
+ ptx, _ = compile_ptx(pyfunc, (f2,), device=True)
226
+ self.assertIn(f"cvt.{postfix}.f16", ptx)
227
+
228
+ @skip_unless_cc_53
229
+ def test_float_to_complex(self):
230
+ pyfuncs = (to_complex64, to_complex128)
231
+ totys = (np.complex64, np.complex128)
232
+ fromtys = (np.float16, np.float32, np.float64)
233
+
234
+ for pyfunc, toty in zip(pyfuncs, totys):
235
+ for fromty in fromtys:
236
+ with self.subTest(fromty=fromty, toty=toty):
237
+ cfunc = self._create_wrapped(pyfunc, fromty, toty)
238
+ # Here we need to explicitly cast the input to the pyfunc
239
+ # to match the casting that is automatically applied when
240
+ # passing the input to the cfunc as part of wrapping it in
241
+ # an array of type fromtype.
242
+ np.testing.assert_allclose(cfunc(3.21),
243
+ pyfunc(fromty(3.21)))
244
+ np.testing.assert_allclose(cfunc(-3.21),
245
+ pyfunc(fromty(-3.21)) + 0j)
246
+
247
+ @skip_on_cudasim('Compilation unsupported in the simulator')
248
+ def test_native_cast(self):
249
+ float32_ptx, _ = cuda.compile_ptx(native_cast, (float32,), device=True)
250
+ self.assertIn("st.f32", float32_ptx)
251
+
252
+ float16_ptx, _ = cuda.compile_ptx(native_cast, (float16,), device=True)
253
+ self.assertIn("st.u16", float16_ptx)
254
+
255
+
256
+ if __name__ == '__main__':
257
+ unittest.main()
@@ -0,0 +1,33 @@
1
+ import numpy as np
2
+
3
+ from numba import cuda, types
4
+ from numba.cuda.testing import (skip_on_cudasim, test_data_dir, unittest,
5
+ CUDATestCase)
6
+ from numba.tests.support import skip_unless_cffi
7
+
8
+
9
+ @skip_unless_cffi
10
+ @skip_on_cudasim('Simulator does not support linking')
11
+ class TestCFFI(CUDATestCase):
12
+ def test_from_buffer(self):
13
+ import cffi
14
+ ffi = cffi.FFI()
15
+
16
+ link = str(test_data_dir / 'jitlink.ptx')
17
+ sig = types.void(types.CPointer(types.int32))
18
+ array_mutator = cuda.declare_device('array_mutator', sig)
19
+
20
+ @cuda.jit(link=[link])
21
+ def mutate_array(x):
22
+ x_ptr = ffi.from_buffer(x)
23
+ array_mutator(x_ptr)
24
+
25
+ x = np.arange(2).astype(np.int32)
26
+ mutate_array[1, 1](x)
27
+
28
+ # The foreign function should have copied element 1 to element 0
29
+ self.assertEqual(x[0], x[1])
30
+
31
+
32
+ if __name__ == '__main__':
33
+ unittest.main()
@@ -0,0 +1,276 @@
1
+ from math import sqrt
2
+ from numba import cuda, float32, int16, int32, int64, uint32, void
3
+ from numba.cuda import (compile, compile_for_current_device, compile_ptx,
4
+ compile_ptx_for_current_device)
5
+ from numba.cuda.cudadrv import runtime
6
+ from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase
7
+
8
+
9
+ # A test function at the module scope to ensure we get the name right for the C
10
+ # ABI whether a function is at module or local scope.
11
+ def f_module(x, y):
12
+ return x + y
13
+
14
+
15
+ @skip_on_cudasim('Compilation unsupported in the simulator')
16
+ class TestCompile(unittest.TestCase):
17
+ def test_global_kernel(self):
18
+ def f(r, x, y):
19
+ i = cuda.grid(1)
20
+ if i < len(r):
21
+ r[i] = x[i] + y[i]
22
+
23
+ args = (float32[:], float32[:], float32[:])
24
+ ptx, resty = compile_ptx(f, args)
25
+
26
+ # Kernels should not have a func_retval parameter
27
+ self.assertNotIn('func_retval', ptx)
28
+ # .visible .func is used to denote a device function
29
+ self.assertNotIn('.visible .func', ptx)
30
+ # .visible .entry would denote the presence of a global function
31
+ self.assertIn('.visible .entry', ptx)
32
+ # Return type for kernels should always be void
33
+ self.assertEqual(resty, void)
34
+
35
+ def test_device_function(self):
36
+ def add(x, y):
37
+ return x + y
38
+
39
+ args = (float32, float32)
40
+ ptx, resty = compile_ptx(add, args, device=True)
41
+
42
+ # Device functions take a func_retval parameter for storing the
43
+ # returned value in by reference
44
+ self.assertIn('func_retval', ptx)
45
+ # .visible .func is used to denote a device function
46
+ self.assertIn('.visible .func', ptx)
47
+ # .visible .entry would denote the presence of a global function
48
+ self.assertNotIn('.visible .entry', ptx)
49
+ # Inferred return type as expected?
50
+ self.assertEqual(resty, float32)
51
+
52
+ # Check that function's output matches signature
53
+ sig_int32 = int32(int32, int32)
54
+ ptx, resty = compile_ptx(add, sig_int32, device=True)
55
+ self.assertEqual(resty, int32)
56
+
57
+ sig_int16 = int16(int16, int16)
58
+ ptx, resty = compile_ptx(add, sig_int16, device=True)
59
+ self.assertEqual(resty, int16)
60
+ # Using string as signature
61
+ sig_string = "uint32(uint32, uint32)"
62
+ ptx, resty = compile_ptx(add, sig_string, device=True)
63
+ self.assertEqual(resty, uint32)
64
+
65
+ def test_fastmath(self):
66
+ def f(x, y, z, d):
67
+ return sqrt((x * y + z) / d)
68
+
69
+ args = (float32, float32, float32, float32)
70
+ ptx, resty = compile_ptx(f, args, device=True)
71
+
72
+ # Without fastmath, fma contraction is enabled by default, but ftz and
73
+ # approximate div / sqrt is not.
74
+ self.assertIn('fma.rn.f32', ptx)
75
+ self.assertIn('div.rn.f32', ptx)
76
+ self.assertIn('sqrt.rn.f32', ptx)
77
+
78
+ ptx, resty = compile_ptx(f, args, device=True, fastmath=True)
79
+
80
+ # With fastmath, ftz and approximate div / sqrt are enabled
81
+ self.assertIn('fma.rn.ftz.f32', ptx)
82
+ self.assertIn('div.approx.ftz.f32', ptx)
83
+ self.assertIn('sqrt.approx.ftz.f32', ptx)
84
+
85
+ def check_debug_info(self, ptx):
86
+ # A debug_info section should exist in the PTX. Whitespace varies
87
+ # between CUDA toolkit versions.
88
+ self.assertRegex(ptx, '\\.section\\s+\\.debug_info')
89
+ # A .file directive should be produced and include the name of the
90
+ # source. The path and whitespace may vary, so we accept anything
91
+ # ending in the filename of this module.
92
+ self.assertRegex(ptx, '\\.file.*test_compiler.py"')
93
+
94
+ def test_device_function_with_debug(self):
95
+ # See Issue #6719 - this ensures that compilation with debug succeeds
96
+ # with CUDA 11.2 / NVVM 7.0 onwards. Previously it failed because NVVM
97
+ # IR version metadata was not added when compiling device functions,
98
+ # and NVVM assumed DBG version 1.0 if not specified, which is
99
+ # incompatible with the 3.0 IR we use. This was specified only for
100
+ # kernels.
101
+ def f():
102
+ pass
103
+
104
+ ptx, resty = compile_ptx(f, (), device=True, debug=True)
105
+ self.check_debug_info(ptx)
106
+
107
+ def test_kernel_with_debug(self):
108
+ # Inspired by (but not originally affected by) Issue #6719
109
+ def f():
110
+ pass
111
+
112
+ ptx, resty = compile_ptx(f, (), debug=True)
113
+ self.check_debug_info(ptx)
114
+
115
+ def check_line_info(self, ptx):
116
+ # A .file directive should be produced and include the name of the
117
+ # source. The path and whitespace may vary, so we accept anything
118
+ # ending in the filename of this module.
119
+ self.assertRegex(ptx, '\\.file.*test_compiler.py"')
120
+
121
+ def test_device_function_with_line_info(self):
122
+ def f():
123
+ pass
124
+
125
+ ptx, resty = compile_ptx(f, (), device=True, lineinfo=True)
126
+ self.check_line_info(ptx)
127
+
128
+ def test_kernel_with_line_info(self):
129
+ def f():
130
+ pass
131
+
132
+ ptx, resty = compile_ptx(f, (), lineinfo=True)
133
+ self.check_line_info(ptx)
134
+
135
+ def test_non_void_return_type(self):
136
+ def f(x, y):
137
+ return x[0] + y[0]
138
+
139
+ with self.assertRaisesRegex(TypeError, 'must have void return type'):
140
+ compile_ptx(f, (uint32[::1], uint32[::1]))
141
+
142
+ def test_c_abi_disallowed_for_kernel(self):
143
+ def f(x, y):
144
+ return x + y
145
+
146
+ with self.assertRaisesRegex(NotImplementedError,
147
+ "The C ABI is not supported for kernels"):
148
+ compile_ptx(f, (int32, int32), abi="c")
149
+
150
+ def test_unsupported_abi(self):
151
+ def f(x, y):
152
+ return x + y
153
+
154
+ with self.assertRaisesRegex(NotImplementedError,
155
+ "Unsupported ABI: fastcall"):
156
+ compile_ptx(f, (int32, int32), abi="fastcall")
157
+
158
+ def test_c_abi_device_function(self):
159
+ def f(x, y):
160
+ return x + y
161
+
162
+ ptx, resty = compile_ptx(f, int32(int32, int32), device=True, abi="c")
163
+ # There should be no more than two parameters
164
+ self.assertNotIn(ptx, "param_2")
165
+
166
+ # The function name should match the Python function name (not the
167
+ # qualname, which includes additional info), and its return value
168
+ # should be 32 bits
169
+ self.assertRegex(ptx, r"\.visible\s+\.func\s+\(\.param\s+\.b32\s+"
170
+ r"func_retval0\)\s+f\(")
171
+
172
+ # If we compile for 64-bit integers, the return type should be 64 bits
173
+ # wide
174
+ ptx, resty = compile_ptx(f, int64(int64, int64), device=True, abi="c")
175
+ self.assertRegex(ptx, r"\.visible\s+\.func\s+\(\.param\s+\.b64")
176
+
177
+ def test_c_abi_device_function_module_scope(self):
178
+ ptx, resty = compile_ptx(f_module, int32(int32, int32), device=True,
179
+ abi="c")
180
+
181
+ # The function name should match the Python function name, and its
182
+ # return value should be 32 bits
183
+ self.assertRegex(ptx, r"\.visible\s+\.func\s+\(\.param\s+\.b32\s+"
184
+ r"func_retval0\)\s+f_module\(")
185
+
186
+ def test_c_abi_with_abi_name(self):
187
+ abi_info = {'abi_name': '_Z4funcii'}
188
+ ptx, resty = compile_ptx(f_module, int32(int32, int32), device=True,
189
+ abi="c", abi_info=abi_info)
190
+
191
+ # The function name should match the one given in the ABI info, and its
192
+ # return value should be 32 bits
193
+ self.assertRegex(ptx, r"\.visible\s+\.func\s+\(\.param\s+\.b32\s+"
194
+ r"func_retval0\)\s+_Z4funcii\(")
195
+
196
+ def test_compile_defaults_to_c_abi(self):
197
+ ptx, resty = compile(f_module, int32(int32, int32), device=True)
198
+
199
+ # The function name should match the Python function name, and its
200
+ # return value should be 32 bits
201
+ self.assertRegex(ptx, r"\.visible\s+\.func\s+\(\.param\s+\.b32\s+"
202
+ r"func_retval0\)\s+f_module\(")
203
+
204
+ def test_compile_to_ltoir(self):
205
+ if runtime.get_version() < (11, 5):
206
+ self.skipTest("-gen-lto unavailable in this toolkit version")
207
+
208
+ ltoir, resty = compile(f_module, int32(int32, int32), device=True,
209
+ output="ltoir")
210
+
211
+ # There are no tools to interpret the LTOIR output, but we can check
212
+ # that we appear to have obtained an LTOIR file. This magic number is
213
+ # not documented, but is expected to remain consistent.
214
+ LTOIR_MAGIC = 0x7F4E43ED
215
+ header = int.from_bytes(ltoir[:4], byteorder='little')
216
+ self.assertEqual(header, LTOIR_MAGIC)
217
+ self.assertEqual(resty, int32)
218
+
219
+ def test_compile_to_invalid_error(self):
220
+ illegal_output = "illegal"
221
+ msg = f"Unsupported output type: {illegal_output}"
222
+ with self.assertRaisesRegex(NotImplementedError, msg):
223
+ compile(f_module, int32(int32, int32), device=True,
224
+ output=illegal_output)
225
+
226
+
227
+ @skip_on_cudasim('Compilation unsupported in the simulator')
228
+ class TestCompileForCurrentDevice(CUDATestCase):
229
+ def _check_ptx_for_current_device(self, compile_function):
230
+ def add(x, y):
231
+ return x + y
232
+
233
+ args = (float32, float32)
234
+ ptx, resty = compile_function(add, args, device=True)
235
+
236
+ # Check we target the current device's compute capability, or the
237
+ # closest compute capability supported by the current toolkit.
238
+ device_cc = cuda.get_current_device().compute_capability
239
+ cc = cuda.cudadrv.nvvm.find_closest_arch(device_cc)
240
+ target = f'.target sm_{cc[0]}{cc[1]}'
241
+ self.assertIn(target, ptx)
242
+
243
+ def test_compile_ptx_for_current_device(self):
244
+ self._check_ptx_for_current_device(compile_ptx_for_current_device)
245
+
246
+ def test_compile_for_current_device(self):
247
+ self._check_ptx_for_current_device(compile_for_current_device)
248
+
249
+
250
+ @skip_on_cudasim('Compilation unsupported in the simulator')
251
+ class TestCompileOnlyTests(unittest.TestCase):
252
+ '''For tests where we can only check correctness by examining the compiler
253
+ output rather than observing the effects of execution.'''
254
+
255
+ def test_nanosleep(self):
256
+ def use_nanosleep(x):
257
+ # Sleep for a constant time
258
+ cuda.nanosleep(32)
259
+ # Sleep for a variable time
260
+ cuda.nanosleep(x)
261
+
262
+ ptx, resty = compile_ptx(use_nanosleep, (uint32,), cc=(7, 0))
263
+
264
+ nanosleep_count = 0
265
+ for line in ptx.split('\n'):
266
+ if 'nanosleep.u32' in line:
267
+ nanosleep_count += 1
268
+
269
+ expected = 2
270
+ self.assertEqual(expected, nanosleep_count,
271
+ (f'Got {nanosleep_count} nanosleep instructions, '
272
+ f'expected {expected}'))
273
+
274
+
275
+ if __name__ == '__main__':
276
+ unittest.main()