numba-cuda 0.0.1__py3-none-any.whl → 0.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. _numba_cuda_redirector.pth +1 -0
  2. _numba_cuda_redirector.py +74 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +5 -0
  5. numba_cuda/_version.py +19 -0
  6. numba_cuda/numba/cuda/__init__.py +22 -0
  7. numba_cuda/numba/cuda/api.py +526 -0
  8. numba_cuda/numba/cuda/api_util.py +30 -0
  9. numba_cuda/numba/cuda/args.py +77 -0
  10. numba_cuda/numba/cuda/cg.py +62 -0
  11. numba_cuda/numba/cuda/codegen.py +378 -0
  12. numba_cuda/numba/cuda/compiler.py +422 -0
  13. numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
  14. numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
  15. numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
  16. numba_cuda/numba/cuda/cuda_paths.py +258 -0
  17. numba_cuda/numba/cuda/cudadecl.py +806 -0
  18. numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
  19. numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
  20. numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
  21. numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
  22. numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
  23. numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
  24. numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
  25. numba_cuda/numba/cuda/cudadrv/error.py +36 -0
  26. numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
  27. numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
  28. numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
  29. numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
  30. numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
  31. numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
  32. numba_cuda/numba/cuda/cudaimpl.py +1055 -0
  33. numba_cuda/numba/cuda/cudamath.py +140 -0
  34. numba_cuda/numba/cuda/decorators.py +189 -0
  35. numba_cuda/numba/cuda/descriptor.py +33 -0
  36. numba_cuda/numba/cuda/device_init.py +89 -0
  37. numba_cuda/numba/cuda/deviceufunc.py +908 -0
  38. numba_cuda/numba/cuda/dispatcher.py +1057 -0
  39. numba_cuda/numba/cuda/errors.py +59 -0
  40. numba_cuda/numba/cuda/extending.py +7 -0
  41. numba_cuda/numba/cuda/initialize.py +13 -0
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
  43. numba_cuda/numba/cuda/intrinsics.py +198 -0
  44. numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  45. numba_cuda/numba/cuda/kernels/reduction.py +262 -0
  46. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  47. numba_cuda/numba/cuda/libdevice.py +3382 -0
  48. numba_cuda/numba/cuda/libdevicedecl.py +17 -0
  49. numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
  50. numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
  51. numba_cuda/numba/cuda/mathimpl.py +448 -0
  52. numba_cuda/numba/cuda/models.py +48 -0
  53. numba_cuda/numba/cuda/nvvmutils.py +235 -0
  54. numba_cuda/numba/cuda/printimpl.py +86 -0
  55. numba_cuda/numba/cuda/random.py +292 -0
  56. numba_cuda/numba/cuda/simulator/__init__.py +38 -0
  57. numba_cuda/numba/cuda/simulator/api.py +110 -0
  58. numba_cuda/numba/cuda/simulator/compiler.py +9 -0
  59. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
  60. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
  61. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
  62. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
  63. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
  64. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
  65. numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
  66. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
  67. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
  68. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
  69. numba_cuda/numba/cuda/simulator/kernel.py +308 -0
  70. numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
  71. numba_cuda/numba/cuda/simulator/reduction.py +15 -0
  72. numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
  73. numba_cuda/numba/cuda/simulator_init.py +17 -0
  74. numba_cuda/numba/cuda/stubs.py +902 -0
  75. numba_cuda/numba/cuda/target.py +440 -0
  76. numba_cuda/numba/cuda/testing.py +202 -0
  77. numba_cuda/numba/cuda/tests/__init__.py +58 -0
  78. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
  79. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
  80. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
  81. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
  88. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
  89. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
  90. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
  91. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
  92. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
  93. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
  94. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
  95. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
  98. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
  100. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
  101. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
  102. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
  103. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
  104. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
  105. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
  106. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
  107. numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
  108. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
  109. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
  110. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
  111. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
  112. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
  113. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
  115. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
  117. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
  118. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
  120. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
  121. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
  123. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
  124. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
  126. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
  127. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
  129. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
  131. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
  132. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
  133. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
  134. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
  136. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
  137. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
  138. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
  139. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
  141. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
  143. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
  144. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
  145. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
  148. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
  149. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
  150. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
  151. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
  152. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
  153. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
  154. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
  155. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
  156. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
  157. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
  158. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
  159. numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
  161. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
  162. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
  163. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
  164. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
  165. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
  166. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
  167. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
  168. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
  169. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
  170. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
  171. numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
  172. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
  173. numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
  174. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
  175. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
  176. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
  177. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  178. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
  179. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
  180. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
  182. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
  183. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
  184. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
  185. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
  186. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
  187. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
  188. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
  192. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
  193. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
  194. numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
  195. numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
  197. numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  198. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  199. numba_cuda/numba/cuda/tests/data/error.cu +7 -0
  200. numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
  201. numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
  202. numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
  203. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
  204. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
  206. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
  207. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
  208. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
  209. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
  210. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
  211. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
  212. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
  213. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
  214. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
  216. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
  217. numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
  218. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
  219. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
  220. numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
  221. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
  222. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
  223. numba_cuda/numba/cuda/types.py +37 -0
  224. numba_cuda/numba/cuda/ufuncs.py +662 -0
  225. numba_cuda/numba/cuda/vector_types.py +209 -0
  226. numba_cuda/numba/cuda/vectorizers.py +252 -0
  227. numba_cuda-0.0.13.dist-info/LICENSE +25 -0
  228. numba_cuda-0.0.13.dist-info/METADATA +69 -0
  229. numba_cuda-0.0.13.dist-info/RECORD +231 -0
  230. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.13.dist-info}/WHEEL +1 -1
  231. numba_cuda-0.0.1.dist-info/METADATA +0 -10
  232. numba_cuda-0.0.1.dist-info/RECORD +0 -5
  233. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1057 @@
1
+ from collections import namedtuple
2
+ from textwrap import indent
3
+
4
+ from numba.types import float32, float64, int16, int32, int64, void, Tuple
5
+ from numba.core.typing.templates import signature
6
+
7
+ arg = namedtuple("arg", ("name", "ty", "is_ptr"))
8
+
9
+ functions = {
10
+ "__nv_abs": (int32, [arg(name="x", ty=int32, is_ptr=False)]),
11
+ "__nv_acos": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
12
+ "__nv_acosf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
13
+ "__nv_acosh": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
14
+ "__nv_acoshf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
15
+ "__nv_asin": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
16
+ "__nv_asinf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
17
+ "__nv_asinh": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
18
+ "__nv_asinhf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
19
+ "__nv_atan": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
20
+ "__nv_atan2": (
21
+ float64,
22
+ [
23
+ arg(name="x", ty=float64, is_ptr=False),
24
+ arg(name="y", ty=float64, is_ptr=False),
25
+ ],
26
+ ),
27
+ "__nv_atan2f": (
28
+ float32,
29
+ [
30
+ arg(name="x", ty=float32, is_ptr=False),
31
+ arg(name="y", ty=float32, is_ptr=False),
32
+ ],
33
+ ),
34
+ "__nv_atanf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
35
+ "__nv_atanh": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
36
+ "__nv_atanhf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
37
+ "__nv_brev": (int32, [arg(name="x", ty=int32, is_ptr=False)]),
38
+ "__nv_brevll": (int64, [arg(name="x", ty=int64, is_ptr=False)]),
39
+ "__nv_byte_perm": (
40
+ int32,
41
+ [
42
+ arg(name="x", ty=int32, is_ptr=False),
43
+ arg(name="y", ty=int32, is_ptr=False),
44
+ arg(name="z", ty=int32, is_ptr=False),
45
+ ],
46
+ ),
47
+ "__nv_cbrt": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
48
+ "__nv_cbrtf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
49
+ "__nv_ceil": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
50
+ "__nv_ceilf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
51
+ "__nv_clz": (int32, [arg(name="x", ty=int32, is_ptr=False)]),
52
+ "__nv_clzll": (int32, [arg(name="x", ty=int64, is_ptr=False)]),
53
+ "__nv_copysign": (
54
+ float64,
55
+ [
56
+ arg(name="x", ty=float64, is_ptr=False),
57
+ arg(name="y", ty=float64, is_ptr=False),
58
+ ],
59
+ ),
60
+ "__nv_copysignf": (
61
+ float32,
62
+ [
63
+ arg(name="x", ty=float32, is_ptr=False),
64
+ arg(name="y", ty=float32, is_ptr=False),
65
+ ],
66
+ ),
67
+ "__nv_cos": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
68
+ "__nv_cosf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
69
+ "__nv_cosh": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
70
+ "__nv_coshf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
71
+ "__nv_cospi": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
72
+ "__nv_cospif": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
73
+ "__nv_dadd_rd": (
74
+ float64,
75
+ [
76
+ arg(name="x", ty=float64, is_ptr=False),
77
+ arg(name="y", ty=float64, is_ptr=False),
78
+ ],
79
+ ),
80
+ "__nv_dadd_rn": (
81
+ float64,
82
+ [
83
+ arg(name="x", ty=float64, is_ptr=False),
84
+ arg(name="y", ty=float64, is_ptr=False),
85
+ ],
86
+ ),
87
+ "__nv_dadd_ru": (
88
+ float64,
89
+ [
90
+ arg(name="x", ty=float64, is_ptr=False),
91
+ arg(name="y", ty=float64, is_ptr=False),
92
+ ],
93
+ ),
94
+ "__nv_dadd_rz": (
95
+ float64,
96
+ [
97
+ arg(name="x", ty=float64, is_ptr=False),
98
+ arg(name="y", ty=float64, is_ptr=False),
99
+ ],
100
+ ),
101
+ "__nv_ddiv_rd": (
102
+ float64,
103
+ [
104
+ arg(name="x", ty=float64, is_ptr=False),
105
+ arg(name="y", ty=float64, is_ptr=False),
106
+ ],
107
+ ),
108
+ "__nv_ddiv_rn": (
109
+ float64,
110
+ [
111
+ arg(name="x", ty=float64, is_ptr=False),
112
+ arg(name="y", ty=float64, is_ptr=False),
113
+ ],
114
+ ),
115
+ "__nv_ddiv_ru": (
116
+ float64,
117
+ [
118
+ arg(name="x", ty=float64, is_ptr=False),
119
+ arg(name="y", ty=float64, is_ptr=False),
120
+ ],
121
+ ),
122
+ "__nv_ddiv_rz": (
123
+ float64,
124
+ [
125
+ arg(name="x", ty=float64, is_ptr=False),
126
+ arg(name="y", ty=float64, is_ptr=False),
127
+ ],
128
+ ),
129
+ "__nv_dmul_rd": (
130
+ float64,
131
+ [
132
+ arg(name="x", ty=float64, is_ptr=False),
133
+ arg(name="y", ty=float64, is_ptr=False),
134
+ ],
135
+ ),
136
+ "__nv_dmul_rn": (
137
+ float64,
138
+ [
139
+ arg(name="x", ty=float64, is_ptr=False),
140
+ arg(name="y", ty=float64, is_ptr=False),
141
+ ],
142
+ ),
143
+ "__nv_dmul_ru": (
144
+ float64,
145
+ [
146
+ arg(name="x", ty=float64, is_ptr=False),
147
+ arg(name="y", ty=float64, is_ptr=False),
148
+ ],
149
+ ),
150
+ "__nv_dmul_rz": (
151
+ float64,
152
+ [
153
+ arg(name="x", ty=float64, is_ptr=False),
154
+ arg(name="y", ty=float64, is_ptr=False),
155
+ ],
156
+ ),
157
+ "__nv_double2float_rd": (
158
+ float32,
159
+ [arg(name="d", ty=float64, is_ptr=False)],
160
+ ),
161
+ "__nv_double2float_rn": (
162
+ float32,
163
+ [arg(name="d", ty=float64, is_ptr=False)],
164
+ ),
165
+ "__nv_double2float_ru": (
166
+ float32,
167
+ [arg(name="d", ty=float64, is_ptr=False)],
168
+ ),
169
+ "__nv_double2float_rz": (
170
+ float32,
171
+ [arg(name="d", ty=float64, is_ptr=False)],
172
+ ),
173
+ "__nv_double2hiint": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
174
+ "__nv_double2int_rd": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
175
+ "__nv_double2int_rn": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
176
+ "__nv_double2int_ru": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
177
+ "__nv_double2int_rz": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
178
+ "__nv_double2ll_rd": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
179
+ "__nv_double2ll_rn": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
180
+ "__nv_double2ll_ru": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
181
+ "__nv_double2ll_rz": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
182
+ "__nv_double2loint": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
183
+ "__nv_double2uint_rd": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
184
+ "__nv_double2uint_rn": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
185
+ "__nv_double2uint_ru": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
186
+ "__nv_double2uint_rz": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
187
+ "__nv_double2ull_rd": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
188
+ "__nv_double2ull_rn": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
189
+ "__nv_double2ull_ru": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
190
+ "__nv_double2ull_rz": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
191
+ "__nv_double_as_longlong": (
192
+ int64,
193
+ [arg(name="x", ty=float64, is_ptr=False)],
194
+ ),
195
+ "__nv_drcp_rd": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
196
+ "__nv_drcp_rn": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
197
+ "__nv_drcp_ru": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
198
+ "__nv_drcp_rz": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
199
+ "__nv_dsqrt_rd": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
200
+ "__nv_dsqrt_rn": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
201
+ "__nv_dsqrt_ru": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
202
+ "__nv_dsqrt_rz": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
203
+ "__nv_erf": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
204
+ "__nv_erfc": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
205
+ "__nv_erfcf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
206
+ "__nv_erfcinv": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
207
+ "__nv_erfcinvf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
208
+ "__nv_erfcx": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
209
+ "__nv_erfcxf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
210
+ "__nv_erff": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
211
+ "__nv_erfinv": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
212
+ "__nv_erfinvf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
213
+ "__nv_exp": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
214
+ "__nv_exp10": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
215
+ "__nv_exp10f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
216
+ "__nv_exp2": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
217
+ "__nv_exp2f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
218
+ "__nv_expf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
219
+ "__nv_expm1": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
220
+ "__nv_expm1f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
221
+ "__nv_fabs": (float64, [arg(name="f", ty=float64, is_ptr=False)]),
222
+ "__nv_fabsf": (float32, [arg(name="f", ty=float32, is_ptr=False)]),
223
+ "__nv_fadd_rd": (
224
+ float32,
225
+ [
226
+ arg(name="x", ty=float32, is_ptr=False),
227
+ arg(name="y", ty=float32, is_ptr=False),
228
+ ],
229
+ ),
230
+ "__nv_fadd_rn": (
231
+ float32,
232
+ [
233
+ arg(name="x", ty=float32, is_ptr=False),
234
+ arg(name="y", ty=float32, is_ptr=False),
235
+ ],
236
+ ),
237
+ "__nv_fadd_ru": (
238
+ float32,
239
+ [
240
+ arg(name="x", ty=float32, is_ptr=False),
241
+ arg(name="y", ty=float32, is_ptr=False),
242
+ ],
243
+ ),
244
+ "__nv_fadd_rz": (
245
+ float32,
246
+ [
247
+ arg(name="x", ty=float32, is_ptr=False),
248
+ arg(name="y", ty=float32, is_ptr=False),
249
+ ],
250
+ ),
251
+ "__nv_fast_cosf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
252
+ "__nv_fast_exp10f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
253
+ "__nv_fast_expf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
254
+ "__nv_fast_fdividef": (
255
+ float32,
256
+ [
257
+ arg(name="x", ty=float32, is_ptr=False),
258
+ arg(name="y", ty=float32, is_ptr=False),
259
+ ],
260
+ ),
261
+ "__nv_fast_log10f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
262
+ "__nv_fast_log2f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
263
+ "__nv_fast_logf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
264
+ "__nv_fast_powf": (
265
+ float32,
266
+ [
267
+ arg(name="x", ty=float32, is_ptr=False),
268
+ arg(name="y", ty=float32, is_ptr=False),
269
+ ],
270
+ ),
271
+ "__nv_fast_sincosf": (
272
+ void,
273
+ [
274
+ arg(name="x", ty=float32, is_ptr=False),
275
+ arg(name="sptr", ty=float32, is_ptr=True),
276
+ arg(name="cptr", ty=float32, is_ptr=True),
277
+ ],
278
+ ),
279
+ "__nv_fast_sinf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
280
+ "__nv_fast_tanf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
281
+ "__nv_fdim": (
282
+ float64,
283
+ [
284
+ arg(name="x", ty=float64, is_ptr=False),
285
+ arg(name="y", ty=float64, is_ptr=False),
286
+ ],
287
+ ),
288
+ "__nv_fdimf": (
289
+ float32,
290
+ [
291
+ arg(name="x", ty=float32, is_ptr=False),
292
+ arg(name="y", ty=float32, is_ptr=False),
293
+ ],
294
+ ),
295
+ "__nv_fdiv_rd": (
296
+ float32,
297
+ [
298
+ arg(name="x", ty=float32, is_ptr=False),
299
+ arg(name="y", ty=float32, is_ptr=False),
300
+ ],
301
+ ),
302
+ "__nv_fdiv_rn": (
303
+ float32,
304
+ [
305
+ arg(name="x", ty=float32, is_ptr=False),
306
+ arg(name="y", ty=float32, is_ptr=False),
307
+ ],
308
+ ),
309
+ "__nv_fdiv_ru": (
310
+ float32,
311
+ [
312
+ arg(name="x", ty=float32, is_ptr=False),
313
+ arg(name="y", ty=float32, is_ptr=False),
314
+ ],
315
+ ),
316
+ "__nv_fdiv_rz": (
317
+ float32,
318
+ [
319
+ arg(name="x", ty=float32, is_ptr=False),
320
+ arg(name="y", ty=float32, is_ptr=False),
321
+ ],
322
+ ),
323
+ "__nv_ffs": (int32, [arg(name="x", ty=int32, is_ptr=False)]),
324
+ "__nv_ffsll": (int32, [arg(name="x", ty=int64, is_ptr=False)]),
325
+ "__nv_finitef": (int32, [arg(name="x", ty=float32, is_ptr=False)]),
326
+ "__nv_float2half_rn": (int16, [arg(name="f", ty=float32, is_ptr=False)]),
327
+ "__nv_float2int_rd": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
328
+ "__nv_float2int_rn": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
329
+ "__nv_float2int_ru": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
330
+ "__nv_float2int_rz": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
331
+ "__nv_float2ll_rd": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
332
+ "__nv_float2ll_rn": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
333
+ "__nv_float2ll_ru": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
334
+ "__nv_float2ll_rz": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
335
+ "__nv_float2uint_rd": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
336
+ "__nv_float2uint_rn": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
337
+ "__nv_float2uint_ru": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
338
+ "__nv_float2uint_rz": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
339
+ "__nv_float2ull_rd": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
340
+ "__nv_float2ull_rn": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
341
+ "__nv_float2ull_ru": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
342
+ "__nv_float2ull_rz": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
343
+ "__nv_float_as_int": (int32, [arg(name="x", ty=float32, is_ptr=False)]),
344
+ "__nv_floor": (float64, [arg(name="f", ty=float64, is_ptr=False)]),
345
+ "__nv_floorf": (float32, [arg(name="f", ty=float32, is_ptr=False)]),
346
+ "__nv_fma": (
347
+ float64,
348
+ [
349
+ arg(name="x", ty=float64, is_ptr=False),
350
+ arg(name="y", ty=float64, is_ptr=False),
351
+ arg(name="z", ty=float64, is_ptr=False),
352
+ ],
353
+ ),
354
+ "__nv_fma_rd": (
355
+ float64,
356
+ [
357
+ arg(name="x", ty=float64, is_ptr=False),
358
+ arg(name="y", ty=float64, is_ptr=False),
359
+ arg(name="z", ty=float64, is_ptr=False),
360
+ ],
361
+ ),
362
+ "__nv_fma_rn": (
363
+ float64,
364
+ [
365
+ arg(name="x", ty=float64, is_ptr=False),
366
+ arg(name="y", ty=float64, is_ptr=False),
367
+ arg(name="z", ty=float64, is_ptr=False),
368
+ ],
369
+ ),
370
+ "__nv_fma_ru": (
371
+ float64,
372
+ [
373
+ arg(name="x", ty=float64, is_ptr=False),
374
+ arg(name="y", ty=float64, is_ptr=False),
375
+ arg(name="z", ty=float64, is_ptr=False),
376
+ ],
377
+ ),
378
+ "__nv_fma_rz": (
379
+ float64,
380
+ [
381
+ arg(name="x", ty=float64, is_ptr=False),
382
+ arg(name="y", ty=float64, is_ptr=False),
383
+ arg(name="z", ty=float64, is_ptr=False),
384
+ ],
385
+ ),
386
+ "__nv_fmaf": (
387
+ float32,
388
+ [
389
+ arg(name="x", ty=float32, is_ptr=False),
390
+ arg(name="y", ty=float32, is_ptr=False),
391
+ arg(name="z", ty=float32, is_ptr=False),
392
+ ],
393
+ ),
394
+ "__nv_fmaf_rd": (
395
+ float32,
396
+ [
397
+ arg(name="x", ty=float32, is_ptr=False),
398
+ arg(name="y", ty=float32, is_ptr=False),
399
+ arg(name="z", ty=float32, is_ptr=False),
400
+ ],
401
+ ),
402
+ "__nv_fmaf_rn": (
403
+ float32,
404
+ [
405
+ arg(name="x", ty=float32, is_ptr=False),
406
+ arg(name="y", ty=float32, is_ptr=False),
407
+ arg(name="z", ty=float32, is_ptr=False),
408
+ ],
409
+ ),
410
+ "__nv_fmaf_ru": (
411
+ float32,
412
+ [
413
+ arg(name="x", ty=float32, is_ptr=False),
414
+ arg(name="y", ty=float32, is_ptr=False),
415
+ arg(name="z", ty=float32, is_ptr=False),
416
+ ],
417
+ ),
418
+ "__nv_fmaf_rz": (
419
+ float32,
420
+ [
421
+ arg(name="x", ty=float32, is_ptr=False),
422
+ arg(name="y", ty=float32, is_ptr=False),
423
+ arg(name="z", ty=float32, is_ptr=False),
424
+ ],
425
+ ),
426
+ "__nv_fmax": (
427
+ float64,
428
+ [
429
+ arg(name="x", ty=float64, is_ptr=False),
430
+ arg(name="y", ty=float64, is_ptr=False),
431
+ ],
432
+ ),
433
+ "__nv_fmaxf": (
434
+ float32,
435
+ [
436
+ arg(name="x", ty=float32, is_ptr=False),
437
+ arg(name="y", ty=float32, is_ptr=False),
438
+ ],
439
+ ),
440
+ "__nv_fmin": (
441
+ float64,
442
+ [
443
+ arg(name="x", ty=float64, is_ptr=False),
444
+ arg(name="y", ty=float64, is_ptr=False),
445
+ ],
446
+ ),
447
+ "__nv_fminf": (
448
+ float32,
449
+ [
450
+ arg(name="x", ty=float32, is_ptr=False),
451
+ arg(name="y", ty=float32, is_ptr=False),
452
+ ],
453
+ ),
454
+ "__nv_fmod": (
455
+ float64,
456
+ [
457
+ arg(name="x", ty=float64, is_ptr=False),
458
+ arg(name="y", ty=float64, is_ptr=False),
459
+ ],
460
+ ),
461
+ "__nv_fmodf": (
462
+ float32,
463
+ [
464
+ arg(name="x", ty=float32, is_ptr=False),
465
+ arg(name="y", ty=float32, is_ptr=False),
466
+ ],
467
+ ),
468
+ "__nv_fmul_rd": (
469
+ float32,
470
+ [
471
+ arg(name="x", ty=float32, is_ptr=False),
472
+ arg(name="y", ty=float32, is_ptr=False),
473
+ ],
474
+ ),
475
+ "__nv_fmul_rn": (
476
+ float32,
477
+ [
478
+ arg(name="x", ty=float32, is_ptr=False),
479
+ arg(name="y", ty=float32, is_ptr=False),
480
+ ],
481
+ ),
482
+ "__nv_fmul_ru": (
483
+ float32,
484
+ [
485
+ arg(name="x", ty=float32, is_ptr=False),
486
+ arg(name="y", ty=float32, is_ptr=False),
487
+ ],
488
+ ),
489
+ "__nv_fmul_rz": (
490
+ float32,
491
+ [
492
+ arg(name="x", ty=float32, is_ptr=False),
493
+ arg(name="y", ty=float32, is_ptr=False),
494
+ ],
495
+ ),
496
+ "__nv_frcp_rd": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
497
+ "__nv_frcp_rn": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
498
+ "__nv_frcp_ru": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
499
+ "__nv_frcp_rz": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
500
+ "__nv_frexp": (
501
+ float64,
502
+ [
503
+ arg(name="x", ty=float64, is_ptr=False),
504
+ arg(name="b", ty=int32, is_ptr=True),
505
+ ],
506
+ ),
507
+ "__nv_frexpf": (
508
+ float32,
509
+ [
510
+ arg(name="x", ty=float32, is_ptr=False),
511
+ arg(name="b", ty=int32, is_ptr=True),
512
+ ],
513
+ ),
514
+ "__nv_frsqrt_rn": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
515
+ "__nv_fsqrt_rd": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
516
+ "__nv_fsqrt_rn": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
517
+ "__nv_fsqrt_ru": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
518
+ "__nv_fsqrt_rz": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
519
+ "__nv_fsub_rd": (
520
+ float32,
521
+ [
522
+ arg(name="x", ty=float32, is_ptr=False),
523
+ arg(name="y", ty=float32, is_ptr=False),
524
+ ],
525
+ ),
526
+ "__nv_fsub_rn": (
527
+ float32,
528
+ [
529
+ arg(name="x", ty=float32, is_ptr=False),
530
+ arg(name="y", ty=float32, is_ptr=False),
531
+ ],
532
+ ),
533
+ "__nv_fsub_ru": (
534
+ float32,
535
+ [
536
+ arg(name="x", ty=float32, is_ptr=False),
537
+ arg(name="y", ty=float32, is_ptr=False),
538
+ ],
539
+ ),
540
+ "__nv_fsub_rz": (
541
+ float32,
542
+ [
543
+ arg(name="x", ty=float32, is_ptr=False),
544
+ arg(name="y", ty=float32, is_ptr=False),
545
+ ],
546
+ ),
547
+ "__nv_hadd": (
548
+ int32,
549
+ [
550
+ arg(name="x", ty=int32, is_ptr=False),
551
+ arg(name="y", ty=int32, is_ptr=False),
552
+ ],
553
+ ),
554
+ "__nv_half2float": (float32, [arg(name="h", ty=int16, is_ptr=False)]),
555
+ "__nv_hiloint2double": (
556
+ float64,
557
+ [
558
+ arg(name="x", ty=int32, is_ptr=False),
559
+ arg(name="y", ty=int32, is_ptr=False),
560
+ ],
561
+ ),
562
+ "__nv_hypot": (
563
+ float64,
564
+ [
565
+ arg(name="x", ty=float64, is_ptr=False),
566
+ arg(name="y", ty=float64, is_ptr=False),
567
+ ],
568
+ ),
569
+ "__nv_hypotf": (
570
+ float32,
571
+ [
572
+ arg(name="x", ty=float32, is_ptr=False),
573
+ arg(name="y", ty=float32, is_ptr=False),
574
+ ],
575
+ ),
576
+ "__nv_ilogb": (int32, [arg(name="x", ty=float64, is_ptr=False)]),
577
+ "__nv_ilogbf": (int32, [arg(name="x", ty=float32, is_ptr=False)]),
578
+ "__nv_int2double_rn": (float64, [arg(name="i", ty=int32, is_ptr=False)]),
579
+ "__nv_int2float_rd": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
580
+ "__nv_int2float_rn": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
581
+ "__nv_int2float_ru": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
582
+ "__nv_int2float_rz": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
583
+ "__nv_int_as_float": (float32, [arg(name="x", ty=int32, is_ptr=False)]),
584
+ "__nv_isfinited": (int32, [arg(name="x", ty=float64, is_ptr=False)]),
585
+ "__nv_isinfd": (int32, [arg(name="x", ty=float64, is_ptr=False)]),
586
+ "__nv_isinff": (int32, [arg(name="x", ty=float32, is_ptr=False)]),
587
+ "__nv_isnand": (int32, [arg(name="x", ty=float64, is_ptr=False)]),
588
+ "__nv_isnanf": (int32, [arg(name="x", ty=float32, is_ptr=False)]),
589
+ "__nv_j0": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
590
+ "__nv_j0f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
591
+ "__nv_j1": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
592
+ "__nv_j1f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
593
+ "__nv_jn": (
594
+ float64,
595
+ [
596
+ arg(name="n", ty=int32, is_ptr=False),
597
+ arg(name="x", ty=float64, is_ptr=False),
598
+ ],
599
+ ),
600
+ "__nv_jnf": (
601
+ float32,
602
+ [
603
+ arg(name="n", ty=int32, is_ptr=False),
604
+ arg(name="x", ty=float32, is_ptr=False),
605
+ ],
606
+ ),
607
+ "__nv_ldexp": (
608
+ float64,
609
+ [
610
+ arg(name="x", ty=float64, is_ptr=False),
611
+ arg(name="y", ty=int32, is_ptr=False),
612
+ ],
613
+ ),
614
+ "__nv_ldexpf": (
615
+ float32,
616
+ [
617
+ arg(name="x", ty=float32, is_ptr=False),
618
+ arg(name="y", ty=int32, is_ptr=False),
619
+ ],
620
+ ),
621
+ "__nv_lgamma": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
622
+ "__nv_lgammaf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
623
+ "__nv_ll2double_rd": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
624
+ "__nv_ll2double_rn": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
625
+ "__nv_ll2double_ru": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
626
+ "__nv_ll2double_rz": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
627
+ "__nv_ll2float_rd": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
628
+ "__nv_ll2float_rn": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
629
+ "__nv_ll2float_ru": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
630
+ "__nv_ll2float_rz": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
631
+ "__nv_llabs": (int64, [arg(name="x", ty=int64, is_ptr=False)]),
632
+ "__nv_llmax": (
633
+ int64,
634
+ [
635
+ arg(name="x", ty=int64, is_ptr=False),
636
+ arg(name="y", ty=int64, is_ptr=False),
637
+ ],
638
+ ),
639
+ "__nv_llmin": (
640
+ int64,
641
+ [
642
+ arg(name="x", ty=int64, is_ptr=False),
643
+ arg(name="y", ty=int64, is_ptr=False),
644
+ ],
645
+ ),
646
+ "__nv_llrint": (int64, [arg(name="x", ty=float64, is_ptr=False)]),
647
+ "__nv_llrintf": (int64, [arg(name="x", ty=float32, is_ptr=False)]),
648
+ "__nv_llround": (int64, [arg(name="x", ty=float64, is_ptr=False)]),
649
+ "__nv_llroundf": (int64, [arg(name="x", ty=float32, is_ptr=False)]),
650
+ "__nv_log": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
651
+ "__nv_log10": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
652
+ "__nv_log10f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
653
+ "__nv_log1p": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
654
+ "__nv_log1pf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
655
+ "__nv_log2": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
656
+ "__nv_log2f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
657
+ "__nv_logb": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
658
+ "__nv_logbf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
659
+ "__nv_logf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
660
+ "__nv_longlong_as_double": (
661
+ float64,
662
+ [arg(name="x", ty=int64, is_ptr=False)],
663
+ ),
664
+ "__nv_max": (
665
+ int32,
666
+ [
667
+ arg(name="x", ty=int32, is_ptr=False),
668
+ arg(name="y", ty=int32, is_ptr=False),
669
+ ],
670
+ ),
671
+ "__nv_min": (
672
+ int32,
673
+ [
674
+ arg(name="x", ty=int32, is_ptr=False),
675
+ arg(name="y", ty=int32, is_ptr=False),
676
+ ],
677
+ ),
678
+ "__nv_modf": (
679
+ float64,
680
+ [
681
+ arg(name="x", ty=float64, is_ptr=False),
682
+ arg(name="b", ty=float64, is_ptr=True),
683
+ ],
684
+ ),
685
+ "__nv_modff": (
686
+ float32,
687
+ [
688
+ arg(name="x", ty=float32, is_ptr=False),
689
+ arg(name="b", ty=float32, is_ptr=True),
690
+ ],
691
+ ),
692
+ "__nv_mul24": (
693
+ int32,
694
+ [
695
+ arg(name="x", ty=int32, is_ptr=False),
696
+ arg(name="y", ty=int32, is_ptr=False),
697
+ ],
698
+ ),
699
+ "__nv_mul64hi": (
700
+ int64,
701
+ [
702
+ arg(name="x", ty=int64, is_ptr=False),
703
+ arg(name="y", ty=int64, is_ptr=False),
704
+ ],
705
+ ),
706
+ "__nv_mulhi": (
707
+ int32,
708
+ [
709
+ arg(name="x", ty=int32, is_ptr=False),
710
+ arg(name="y", ty=int32, is_ptr=False),
711
+ ],
712
+ ),
713
+ # __nv_nan and __nv_nanf are excluded - they return a representation of a
714
+ # quiet NaN, but the argument they take seems to be undocumented, and
715
+ # follows a strange form - it is not an output like every other pointer
716
+ # argument. If a NaN is required, one can be obtained in CUDA Python by
717
+ # other means, e.g. `math.nan`. They are left in this list for completeness
718
+ # / reference.
719
+ # "__nv_nan": (float64, [arg(name="tagp", ty=int8, is_ptr=True)]),
720
+ # "__nv_nanf": (float32, [arg(name="tagp", ty=int8, is_ptr=True)]),
721
+ "__nv_nearbyint": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
722
+ "__nv_nearbyintf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
723
+ "__nv_nextafter": (
724
+ float64,
725
+ [
726
+ arg(name="x", ty=float64, is_ptr=False),
727
+ arg(name="y", ty=float64, is_ptr=False),
728
+ ],
729
+ ),
730
+ "__nv_nextafterf": (
731
+ float32,
732
+ [
733
+ arg(name="x", ty=float32, is_ptr=False),
734
+ arg(name="y", ty=float32, is_ptr=False),
735
+ ],
736
+ ),
737
+ "__nv_normcdf": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
738
+ "__nv_normcdff": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
739
+ "__nv_normcdfinv": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
740
+ "__nv_normcdfinvf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
741
+ "__nv_popc": (int32, [arg(name="x", ty=int32, is_ptr=False)]),
742
+ "__nv_popcll": (int32, [arg(name="x", ty=int64, is_ptr=False)]),
743
+ "__nv_pow": (
744
+ float64,
745
+ [
746
+ arg(name="x", ty=float64, is_ptr=False),
747
+ arg(name="y", ty=float64, is_ptr=False),
748
+ ],
749
+ ),
750
+ "__nv_powf": (
751
+ float32,
752
+ [
753
+ arg(name="x", ty=float32, is_ptr=False),
754
+ arg(name="y", ty=float32, is_ptr=False),
755
+ ],
756
+ ),
757
+ "__nv_powi": (
758
+ float64,
759
+ [
760
+ arg(name="x", ty=float64, is_ptr=False),
761
+ arg(name="y", ty=int32, is_ptr=False),
762
+ ],
763
+ ),
764
+ "__nv_powif": (
765
+ float32,
766
+ [
767
+ arg(name="x", ty=float32, is_ptr=False),
768
+ arg(name="y", ty=int32, is_ptr=False),
769
+ ],
770
+ ),
771
+ "__nv_rcbrt": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
772
+ "__nv_rcbrtf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
773
+ "__nv_remainder": (
774
+ float64,
775
+ [
776
+ arg(name="x", ty=float64, is_ptr=False),
777
+ arg(name="y", ty=float64, is_ptr=False),
778
+ ],
779
+ ),
780
+ "__nv_remainderf": (
781
+ float32,
782
+ [
783
+ arg(name="x", ty=float32, is_ptr=False),
784
+ arg(name="y", ty=float32, is_ptr=False),
785
+ ],
786
+ ),
787
+ "__nv_remquo": (
788
+ float64,
789
+ [
790
+ arg(name="x", ty=float64, is_ptr=False),
791
+ arg(name="y", ty=float64, is_ptr=False),
792
+ arg(name="c", ty=int32, is_ptr=True),
793
+ ],
794
+ ),
795
+ "__nv_remquof": (
796
+ float32,
797
+ [
798
+ arg(name="x", ty=float32, is_ptr=False),
799
+ arg(name="y", ty=float32, is_ptr=False),
800
+ arg(name="quo", ty=int32, is_ptr=True),
801
+ ],
802
+ ),
803
+ "__nv_rhadd": (
804
+ int32,
805
+ [
806
+ arg(name="x", ty=int32, is_ptr=False),
807
+ arg(name="y", ty=int32, is_ptr=False),
808
+ ],
809
+ ),
810
+ "__nv_rint": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
811
+ "__nv_rintf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
812
+ "__nv_round": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
813
+ "__nv_roundf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
814
+ "__nv_rsqrt": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
815
+ "__nv_rsqrtf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
816
+ "__nv_sad": (
817
+ int32,
818
+ [
819
+ arg(name="x", ty=int32, is_ptr=False),
820
+ arg(name="y", ty=int32, is_ptr=False),
821
+ arg(name="z", ty=int32, is_ptr=False),
822
+ ],
823
+ ),
824
+ "__nv_saturatef": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
825
+ "__nv_scalbn": (
826
+ float64,
827
+ [
828
+ arg(name="x", ty=float64, is_ptr=False),
829
+ arg(name="y", ty=int32, is_ptr=False),
830
+ ],
831
+ ),
832
+ "__nv_scalbnf": (
833
+ float32,
834
+ [
835
+ arg(name="x", ty=float32, is_ptr=False),
836
+ arg(name="y", ty=int32, is_ptr=False),
837
+ ],
838
+ ),
839
+ "__nv_signbitd": (int32, [arg(name="x", ty=float64, is_ptr=False)]),
840
+ "__nv_signbitf": (int32, [arg(name="x", ty=float32, is_ptr=False)]),
841
+ "__nv_sin": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
842
+ "__nv_sincos": (
843
+ void,
844
+ [
845
+ arg(name="x", ty=float64, is_ptr=False),
846
+ arg(name="sptr", ty=float64, is_ptr=True),
847
+ arg(name="cptr", ty=float64, is_ptr=True),
848
+ ],
849
+ ),
850
+ "__nv_sincosf": (
851
+ void,
852
+ [
853
+ arg(name="x", ty=float32, is_ptr=False),
854
+ arg(name="sptr", ty=float32, is_ptr=True),
855
+ arg(name="cptr", ty=float32, is_ptr=True),
856
+ ],
857
+ ),
858
+ "__nv_sincospi": (
859
+ void,
860
+ [
861
+ arg(name="x", ty=float64, is_ptr=False),
862
+ arg(name="sptr", ty=float64, is_ptr=True),
863
+ arg(name="cptr", ty=float64, is_ptr=True),
864
+ ],
865
+ ),
866
+ "__nv_sincospif": (
867
+ void,
868
+ [
869
+ arg(name="x", ty=float32, is_ptr=False),
870
+ arg(name="sptr", ty=float32, is_ptr=True),
871
+ arg(name="cptr", ty=float32, is_ptr=True),
872
+ ],
873
+ ),
874
+ "__nv_sinf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
875
+ "__nv_sinh": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
876
+ "__nv_sinhf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
877
+ "__nv_sinpi": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
878
+ "__nv_sinpif": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
879
+ "__nv_sqrt": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
880
+ "__nv_sqrtf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
881
+ "__nv_tan": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
882
+ "__nv_tanf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
883
+ "__nv_tanh": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
884
+ "__nv_tanhf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
885
+ "__nv_tgamma": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
886
+ "__nv_tgammaf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
887
+ "__nv_trunc": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
888
+ "__nv_truncf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
889
+ "__nv_uhadd": (
890
+ int32,
891
+ [
892
+ arg(name="x", ty=int32, is_ptr=False),
893
+ arg(name="y", ty=int32, is_ptr=False),
894
+ ],
895
+ ),
896
+ "__nv_uint2double_rn": (float64, [arg(name="i", ty=int32, is_ptr=False)]),
897
+ "__nv_uint2float_rd": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
898
+ "__nv_uint2float_rn": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
899
+ "__nv_uint2float_ru": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
900
+ "__nv_uint2float_rz": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
901
+ "__nv_ull2double_rd": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
902
+ "__nv_ull2double_rn": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
903
+ "__nv_ull2double_ru": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
904
+ "__nv_ull2double_rz": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
905
+ "__nv_ull2float_rd": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
906
+ "__nv_ull2float_rn": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
907
+ "__nv_ull2float_ru": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
908
+ "__nv_ull2float_rz": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
909
+ "__nv_ullmax": (
910
+ int64,
911
+ [
912
+ arg(name="x", ty=int64, is_ptr=False),
913
+ arg(name="y", ty=int64, is_ptr=False),
914
+ ],
915
+ ),
916
+ "__nv_ullmin": (
917
+ int64,
918
+ [
919
+ arg(name="x", ty=int64, is_ptr=False),
920
+ arg(name="y", ty=int64, is_ptr=False),
921
+ ],
922
+ ),
923
+ "__nv_umax": (
924
+ int32,
925
+ [
926
+ arg(name="x", ty=int32, is_ptr=False),
927
+ arg(name="y", ty=int32, is_ptr=False),
928
+ ],
929
+ ),
930
+ "__nv_umin": (
931
+ int32,
932
+ [
933
+ arg(name="x", ty=int32, is_ptr=False),
934
+ arg(name="y", ty=int32, is_ptr=False),
935
+ ],
936
+ ),
937
+ "__nv_umul24": (
938
+ int32,
939
+ [
940
+ arg(name="x", ty=int32, is_ptr=False),
941
+ arg(name="y", ty=int32, is_ptr=False),
942
+ ],
943
+ ),
944
+ "__nv_umul64hi": (
945
+ int64,
946
+ [
947
+ arg(name="x", ty=int64, is_ptr=False),
948
+ arg(name="y", ty=int64, is_ptr=False),
949
+ ],
950
+ ),
951
+ "__nv_umulhi": (
952
+ int32,
953
+ [
954
+ arg(name="x", ty=int32, is_ptr=False),
955
+ arg(name="y", ty=int32, is_ptr=False),
956
+ ],
957
+ ),
958
+ "__nv_urhadd": (
959
+ int32,
960
+ [
961
+ arg(name="x", ty=int32, is_ptr=False),
962
+ arg(name="y", ty=int32, is_ptr=False),
963
+ ],
964
+ ),
965
+ "__nv_usad": (
966
+ int32,
967
+ [
968
+ arg(name="x", ty=int32, is_ptr=False),
969
+ arg(name="y", ty=int32, is_ptr=False),
970
+ arg(name="z", ty=int32, is_ptr=False),
971
+ ],
972
+ ),
973
+ "__nv_y0": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
974
+ "__nv_y0f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
975
+ "__nv_y1": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
976
+ "__nv_y1f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
977
+ "__nv_yn": (
978
+ float64,
979
+ [
980
+ arg(name="n", ty=int32, is_ptr=False),
981
+ arg(name="x", ty=float64, is_ptr=False),
982
+ ],
983
+ ),
984
+ "__nv_ynf": (
985
+ float32,
986
+ [
987
+ arg(name="n", ty=int32, is_ptr=False),
988
+ arg(name="x", ty=float32, is_ptr=False),
989
+ ],
990
+ ),
991
+ }
992
+
993
+
994
+ def create_signature(retty, args):
995
+ """
996
+ Given the return type and arguments for a libdevice function, return the
997
+ signature of the stub function used to call it from CUDA Python.
998
+ """
999
+
1000
+ # Any pointer arguments should be part of the return type.
1001
+ return_types = [arg.ty for arg in args if arg.is_ptr]
1002
+ # If the return type is void, there is no point adding it to the list of
1003
+ # return types.
1004
+ if retty != void:
1005
+ return_types.insert(0, retty)
1006
+
1007
+ if len(return_types) > 1:
1008
+ retty = Tuple(return_types)
1009
+ else:
1010
+ retty = return_types[0]
1011
+
1012
+ argtypes = [arg.ty for arg in args if not arg.is_ptr]
1013
+
1014
+ return signature(retty, *argtypes)
1015
+
1016
+
1017
+ # The following code generates the stubs for libdevice functions.
1018
+ #
1019
+ # Stubs can be regenerated (e.g. if the functions dict above is modified) with:
1020
+ #
1021
+ # python -c "from numba.cuda.libdevicefuncs import generate_stubs; \
1022
+ # generate_stubs()" > numba/cuda/libdevice.py
1023
+
1024
+ docstring_template = """
1025
+ See https://docs.nvidia.com/cuda/libdevice-users-guide/{func}.html
1026
+
1027
+ {param_types}
1028
+ :rtype: {retty}
1029
+ """
1030
+
1031
+ param_template = """\
1032
+ :param {a.name}: Argument.
1033
+ :type {a.name}: {a.ty}"""
1034
+
1035
+
1036
+ def generate_stubs():
1037
+ for name, (retty, args) in functions.items():
1038
+ # Some libdevice functions have arguments called `in`, which causes a
1039
+ # syntax error in Python, so we rename these to `x`.
1040
+ def argname(arg):
1041
+ if arg.name == "in":
1042
+ return "x"
1043
+ else:
1044
+ return arg.name
1045
+
1046
+ argnames = [argname(a) for a in args if not a.is_ptr]
1047
+ argstr = ", ".join(argnames)
1048
+ signature = create_signature(retty, args)
1049
+
1050
+ param_types = "\n".join(
1051
+ [param_template.format(a=a) for a in args if not a.is_ptr]
1052
+ )
1053
+ docstring = docstring_template.format(
1054
+ param_types=param_types, retty=signature.return_type, func=name
1055
+ )
1056
+ docstring = indent(docstring, " ")
1057
+ print(f'def {name[5:]}({argstr}):\n """{docstring}"""\n\n')