numba-cuda 0.18.1__py3-none-any.whl → 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (88) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/__init__.py +1 -1
  3. numba_cuda/numba/cuda/_internal/cuda_bf16.py +2 -2
  4. numba_cuda/numba/cuda/_internal/cuda_fp16.py +1 -1
  5. numba_cuda/numba/cuda/api.py +2 -7
  6. numba_cuda/numba/cuda/compiler.py +7 -4
  7. numba_cuda/numba/cuda/core/interpreter.py +3592 -0
  8. numba_cuda/numba/cuda/core/ir_utils.py +2645 -0
  9. numba_cuda/numba/cuda/core/sigutils.py +55 -0
  10. numba_cuda/numba/cuda/cuda_paths.py +9 -17
  11. numba_cuda/numba/cuda/cudadecl.py +1 -1
  12. numba_cuda/numba/cuda/cudadrv/driver.py +4 -19
  13. numba_cuda/numba/cuda/cudadrv/libs.py +1 -2
  14. numba_cuda/numba/cuda/cudadrv/nvrtc.py +44 -44
  15. numba_cuda/numba/cuda/cudadrv/nvvm.py +3 -18
  16. numba_cuda/numba/cuda/cudadrv/runtime.py +12 -1
  17. numba_cuda/numba/cuda/cudamath.py +1 -1
  18. numba_cuda/numba/cuda/decorators.py +4 -3
  19. numba_cuda/numba/cuda/deviceufunc.py +2 -1
  20. numba_cuda/numba/cuda/dispatcher.py +3 -2
  21. numba_cuda/numba/cuda/extending.py +1 -1
  22. numba_cuda/numba/cuda/itanium_mangler.py +211 -0
  23. numba_cuda/numba/cuda/libdevicedecl.py +1 -1
  24. numba_cuda/numba/cuda/libdevicefuncs.py +1 -1
  25. numba_cuda/numba/cuda/lowering.py +1 -1
  26. numba_cuda/numba/cuda/simulator/api.py +1 -1
  27. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +0 -7
  28. numba_cuda/numba/cuda/target.py +1 -2
  29. numba_cuda/numba/cuda/testing.py +4 -6
  30. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +80 -0
  31. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +1 -1
  32. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
  33. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +1 -1
  34. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
  35. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +1 -1
  36. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +1 -1
  37. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +1 -1
  38. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +4 -6
  39. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +0 -4
  40. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
  41. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +1 -3
  42. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +1 -3
  43. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +146 -3
  44. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +1 -1
  45. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +0 -4
  46. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +1 -1
  47. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +1 -1
  48. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
  49. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +1 -284
  50. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +473 -0
  51. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +1 -1
  52. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
  53. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1 -6
  54. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +1 -1
  55. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +1 -1
  56. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +295 -0
  57. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +1 -1
  58. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
  59. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +1 -1
  60. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +5 -1
  61. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +1 -1
  62. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +1 -1
  63. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +1 -1
  64. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +1 -1
  65. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +1 -1
  66. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +1 -1
  67. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +1 -1
  68. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +1 -1
  69. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +1 -1
  70. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +1 -1
  71. numba_cuda/numba/cuda/tests/nocuda/test_import.py +1 -1
  72. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -2
  73. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +1 -1
  74. numba_cuda/numba/cuda/tests/support.py +752 -0
  75. numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +3 -3
  76. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +4 -1
  77. numba_cuda/numba/cuda/typing/__init__.py +8 -0
  78. numba_cuda/numba/cuda/typing/templates.py +1453 -0
  79. numba_cuda/numba/cuda/vector_types.py +3 -3
  80. {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.0.dist-info}/METADATA +21 -28
  81. {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.0.dist-info}/RECORD +84 -79
  82. numba_cuda/numba/cuda/include/11/cuda_bf16.h +0 -3749
  83. numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +0 -2683
  84. numba_cuda/numba/cuda/include/11/cuda_fp16.h +0 -3794
  85. numba_cuda/numba/cuda/include/11/cuda_fp16.hpp +0 -2614
  86. {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.0.dist-info}/WHEEL +0 -0
  87. {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.0.dist-info}/licenses/LICENSE +0 -0
  88. {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.0.dist-info}/top_level.txt +0 -0
@@ -1,3794 +0,0 @@
1
- /*
2
- * Copyright 1993-2021 NVIDIA Corporation. All rights reserved.
3
- *
4
- * NOTICE TO LICENSEE:
5
- *
6
- * This source code and/or documentation ("Licensed Deliverables") are
7
- * subject to NVIDIA intellectual property rights under U.S. and
8
- * international Copyright laws.
9
- *
10
- * These Licensed Deliverables contained herein is PROPRIETARY and
11
- * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
- * conditions of a form of NVIDIA software license agreement by and
13
- * between NVIDIA and Licensee ("License Agreement") or electronically
14
- * accepted by Licensee. Notwithstanding any terms or conditions to
15
- * the contrary in the License Agreement, reproduction or disclosure
16
- * of the Licensed Deliverables to any third party without the express
17
- * written consent of NVIDIA is prohibited.
18
- *
19
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
- * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
- * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
- * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
- * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
- * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
- * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
- * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
- * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
- * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
- * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
- * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
- * OF THESE LICENSED DELIVERABLES.
33
- *
34
- * U.S. Government End Users. These Licensed Deliverables are a
35
- * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
- * 1995), consisting of "commercial computer software" and "commercial
37
- * computer software documentation" as such terms are used in 48
38
- * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
- * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
- * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
- * U.S. Government End Users acquire the Licensed Deliverables with
42
- * only those rights set forth herein.
43
- *
44
- * Any use of the Licensed Deliverables in individual and commercial
45
- * software must include, in the user documentation and internal
46
- * comments to the code, the above Disclaimer and U.S. Government End
47
- * Users Notice.
48
- */
49
-
50
- /**
51
- * \defgroup CUDA_MATH_INTRINSIC_HALF Half Precision Intrinsics
52
- * This section describes half precision intrinsic functions that are
53
- * only supported in device code.
54
- * To use these functions, include the header file \p cuda_fp16.h in your program.
55
- */
56
-
57
- /**
58
- * \defgroup CUDA_MATH__HALF_ARITHMETIC Half Arithmetic Functions
59
- * \ingroup CUDA_MATH_INTRINSIC_HALF
60
- * To use these functions, include the header file \p cuda_fp16.h in your program.
61
- */
62
-
63
- /**
64
- * \defgroup CUDA_MATH__HALF2_ARITHMETIC Half2 Arithmetic Functions
65
- * \ingroup CUDA_MATH_INTRINSIC_HALF
66
- * To use these functions, include the header file \p cuda_fp16.h in your program.
67
- */
68
-
69
- /**
70
- * \defgroup CUDA_MATH__HALF_COMPARISON Half Comparison Functions
71
- * \ingroup CUDA_MATH_INTRINSIC_HALF
72
- * To use these functions, include the header file \p cuda_fp16.h in your program.
73
- */
74
-
75
- /**
76
- * \defgroup CUDA_MATH__HALF2_COMPARISON Half2 Comparison Functions
77
- * \ingroup CUDA_MATH_INTRINSIC_HALF
78
- * To use these functions, include the header file \p cuda_fp16.h in your program.
79
- */
80
-
81
- /**
82
- * \defgroup CUDA_MATH__HALF_MISC Half Precision Conversion and Data Movement
83
- * \ingroup CUDA_MATH_INTRINSIC_HALF
84
- * To use these functions, include the header file \p cuda_fp16.h in your program.
85
- */
86
-
87
- /**
88
- * \defgroup CUDA_MATH__HALF_FUNCTIONS Half Math Functions
89
- * \ingroup CUDA_MATH_INTRINSIC_HALF
90
- * To use these functions, include the header file \p cuda_fp16.h in your program.
91
- */
92
-
93
- /**
94
- * \defgroup CUDA_MATH__HALF2_FUNCTIONS Half2 Math Functions
95
- * \ingroup CUDA_MATH_INTRINSIC_HALF
96
- * To use these functions, include the header file \p cuda_fp16.h in your program.
97
- */
98
-
99
- #ifndef __CUDA_FP16_H__
100
- #define __CUDA_FP16_H__
101
-
102
- #define ___CUDA_FP16_STRINGIFY_INNERMOST(x) #x
103
- #define __CUDA_FP16_STRINGIFY(x) ___CUDA_FP16_STRINGIFY_INNERMOST(x)
104
-
105
- #if defined(__cplusplus)
106
- #if defined(__CUDACC__)
107
- #define __CUDA_FP16_DECL__ static __device__ __inline__
108
- #define __CUDA_HOSTDEVICE_FP16_DECL__ static __host__ __device__ __inline__
109
- #else
110
- #define __CUDA_HOSTDEVICE_FP16_DECL__ static
111
- #endif /* defined(__CUDACC__) */
112
-
113
- #define __CUDA_FP16_TYPES_EXIST__
114
-
115
- /* Forward-declaration of structures defined in "cuda_fp16.hpp" */
116
-
117
- /**
118
- * \brief half datatype
119
- *
120
- * \details This structure implements the datatype for storing
121
- * half-precision floating-point numbers. The structure implements
122
- * assignment operators and type conversions.
123
- * 16 bits are being used in total: 1 sign bit, 5 bits for the exponent,
124
- * and the significand is being stored in 10 bits.
125
- * The total precision is 11 bits. There are 15361 representable
126
- * numbers within the interval [0.0, 1.0], endpoints included.
127
- * On average we have log10(2**11) ~ 3.311 decimal digits.
128
- *
129
- * \internal
130
- * \req IEEE 754-2008 compliant implementation of half-precision
131
- * floating-point numbers.
132
- * \endinternal
133
- */
134
- struct __half;
135
-
136
- /**
137
- * \brief half2 datatype
138
- *
139
- * \details This structure implements the datatype for storing two
140
- * half-precision floating-point numbers.
141
- * The structure implements assignment operators and type conversions.
142
- *
143
- * \internal
144
- * \req Vectorified version of half.
145
- * \endinternal
146
- */
147
- struct __half2;
148
-
149
- /**
150
- * \ingroup CUDA_MATH__HALF_MISC
151
- * \brief Converts double number to half precision in round-to-nearest-even mode
152
- * and returns \p half with converted value.
153
- *
154
- * \details Converts double number \p a to half precision in round-to-nearest-even mode.
155
- * \param[in] a - double. Is only being read.
156
- * \returns half
157
- * - \p a converted to half.
158
- * \internal
159
- * \exception-guarantee no-throw guarantee
160
- * \behavior reentrant, thread safe
161
- * \endinternal
162
- */
163
- __CUDA_HOSTDEVICE_FP16_DECL__ __half __double2half(const double a);
164
- /**
165
- * \ingroup CUDA_MATH__HALF_MISC
166
- * \brief Converts float number to half precision in round-to-nearest-even mode
167
- * and returns \p half with converted value.
168
- *
169
- * \details Converts float number \p a to half precision in round-to-nearest-even mode.
170
- * \param[in] a - float. Is only being read.
171
- * \returns half
172
- * - \p a converted to half.
173
- * \internal
174
- * \exception-guarantee no-throw guarantee
175
- * \behavior reentrant, thread safe
176
- * \endinternal
177
- */
178
- __CUDA_HOSTDEVICE_FP16_DECL__ __half __float2half(const float a);
179
- /**
180
- * \ingroup CUDA_MATH__HALF_MISC
181
- * \brief Converts float number to half precision in round-to-nearest-even mode
182
- * and returns \p half with converted value.
183
- *
184
- * \details Converts float number \p a to half precision in round-to-nearest-even mode.
185
- * \param[in] a - float. Is only being read.
186
- * \returns half
187
- * - \p a converted to half.
188
- * \internal
189
- * \exception-guarantee no-throw guarantee
190
- * \behavior reentrant, thread safe
191
- * \endinternal
192
- */
193
- __CUDA_HOSTDEVICE_FP16_DECL__ __half __float2half_rn(const float a);
194
- /**
195
- * \ingroup CUDA_MATH__HALF_MISC
196
- * \brief Converts float number to half precision in round-towards-zero mode
197
- * and returns \p half with converted value.
198
- *
199
- * \details Converts float number \p a to half precision in round-towards-zero mode.
200
- * \param[in] a - float. Is only being read.
201
- * \returns half
202
- * - \p a converted to half.
203
- * \internal
204
- * \exception-guarantee no-throw guarantee
205
- * \behavior reentrant, thread safe
206
- * \endinternal
207
- */
208
- __CUDA_HOSTDEVICE_FP16_DECL__ __half __float2half_rz(const float a);
209
- /**
210
- * \ingroup CUDA_MATH__HALF_MISC
211
- * \brief Converts float number to half precision in round-down mode
212
- * and returns \p half with converted value.
213
- *
214
- * \details Converts float number \p a to half precision in round-down mode.
215
- * \param[in] a - float. Is only being read.
216
- *
217
- * \returns half
218
- * - \p a converted to half.
219
- * \internal
220
- * \exception-guarantee no-throw guarantee
221
- * \behavior reentrant, thread safe
222
- * \endinternal
223
- */
224
- __CUDA_HOSTDEVICE_FP16_DECL__ __half __float2half_rd(const float a);
225
- /**
226
- * \ingroup CUDA_MATH__HALF_MISC
227
- * \brief Converts float number to half precision in round-up mode
228
- * and returns \p half with converted value.
229
- *
230
- * \details Converts float number \p a to half precision in round-up mode.
231
- * \param[in] a - float. Is only being read.
232
- *
233
- * \returns half
234
- * - \p a converted to half.
235
- * \internal
236
- * \exception-guarantee no-throw guarantee
237
- * \behavior reentrant, thread safe
238
- * \endinternal
239
- */
240
- __CUDA_HOSTDEVICE_FP16_DECL__ __half __float2half_ru(const float a);
241
- /**
242
- * \ingroup CUDA_MATH__HALF_MISC
243
- * \brief Converts \p half number to float.
244
- *
245
- * \details Converts half number \p a to float.
246
- * \param[in] a - float. Is only being read.
247
- *
248
- * \returns float
249
- * - \p a converted to float.
250
- * \internal
251
- * \exception-guarantee no-throw guarantee
252
- * \behavior reentrant, thread safe
253
- * \endinternal
254
- */
255
- __CUDA_HOSTDEVICE_FP16_DECL__ float __half2float(const __half a);
256
- /**
257
- * \ingroup CUDA_MATH__HALF_MISC
258
- * \brief Converts input to half precision in round-to-nearest-even mode and
259
- * populates both halves of \p half2 with converted value.
260
- *
261
- * \details Converts input \p a to half precision in round-to-nearest-even mode and
262
- * populates both halves of \p half2 with converted value.
263
- * \param[in] a - float. Is only being read.
264
- *
265
- * \returns half2
266
- * - The \p half2 value with both halves equal to the converted half
267
- * precision number.
268
- * \internal
269
- * \exception-guarantee no-throw guarantee
270
- * \behavior reentrant, thread safe
271
- * \endinternal
272
- */
273
- __CUDA_HOSTDEVICE_FP16_DECL__ __half2 __float2half2_rn(const float a);
274
- /**
275
- * \ingroup CUDA_MATH__HALF_MISC
276
- * \brief Converts both input floats to half precision in round-to-nearest-even
277
- * mode and returns \p half2 with converted values.
278
- *
279
- * \details Converts both input floats to half precision in round-to-nearest-even mode
280
- * and combines the results into one \p half2 number. Low 16 bits of the return
281
- * value correspond to the input \p a, high 16 bits correspond to the input \p
282
- * b.
283
- * \param[in] a - float. Is only being read.
284
- * \param[in] b - float. Is only being read.
285
- *
286
- * \returns half2
287
- * - The \p half2 value with corresponding halves equal to the
288
- * converted input floats.
289
- * \internal
290
- * \exception-guarantee no-throw guarantee
291
- * \behavior reentrant, thread safe
292
- * \endinternal
293
- */
294
- __CUDA_HOSTDEVICE_FP16_DECL__ __half2 __floats2half2_rn(const float a, const float b);
295
- /**
296
- * \ingroup CUDA_MATH__HALF_MISC
297
- * \brief Converts low 16 bits of \p half2 to float and returns the result
298
- *
299
- * \details Converts low 16 bits of \p half2 input \p a to 32-bit floating-point number
300
- * and returns the result.
301
- * \param[in] a - half2. Is only being read.
302
- *
303
- * \returns float
304
- * - The low 16 bits of \p a converted to float.
305
- * \internal
306
- * \exception-guarantee no-throw guarantee
307
- * \behavior reentrant, thread safe
308
- * \endinternal
309
- */
310
- __CUDA_HOSTDEVICE_FP16_DECL__ float __low2float(const __half2 a);
311
- /**
312
- * \ingroup CUDA_MATH__HALF_MISC
313
- * \brief Converts high 16 bits of \p half2 to float and returns the result
314
- *
315
- * \details Converts high 16 bits of \p half2 input \p a to 32-bit floating-point number
316
- * and returns the result.
317
- * \param[in] a - half2. Is only being read.
318
- *
319
- * \returns float
320
- * - The high 16 bits of \p a converted to float.
321
- * \internal
322
- * \exception-guarantee no-throw guarantee
323
- * \behavior reentrant, thread safe
324
- * \endinternal
325
- */
326
- __CUDA_HOSTDEVICE_FP16_DECL__ float __high2float(const __half2 a);
327
- /**
328
- * \ingroup CUDA_MATH__HALF_MISC
329
- * \brief Convert a half to a signed short integer in round-towards-zero mode.
330
- *
331
- * \details Convert the half-precision floating-point value \p h to a signed short
332
- * integer in round-towards-zero mode. NaN inputs are converted to 0.
333
- * \param[in] h - half. Is only being read.
334
- *
335
- * \returns short int
336
- * - \p h converted to a signed short integer.
337
- * \internal
338
- * \exception-guarantee no-throw guarantee
339
- * \behavior reentrant, thread safe
340
- * \endinternal
341
- */
342
- __CUDA_HOSTDEVICE_FP16_DECL__ short int __half2short_rz(const __half h);
343
- /**
344
- * \ingroup CUDA_MATH__HALF_MISC
345
- * \brief Convert a half to an unsigned short integer in round-towards-zero
346
- * mode.
347
- *
348
- * \details Convert the half-precision floating-point value \p h to an unsigned short
349
- * integer in round-towards-zero mode. NaN inputs are converted to 0.
350
- * \param[in] h - half. Is only being read.
351
- *
352
- * \returns unsigned short int
353
- * - \p h converted to an unsigned short integer.
354
- * \internal
355
- * \exception-guarantee no-throw guarantee
356
- * \behavior reentrant, thread safe
357
- * \endinternal
358
- */
359
- __CUDA_HOSTDEVICE_FP16_DECL__ unsigned short int __half2ushort_rz(const __half h);
360
- /**
361
- * \ingroup CUDA_MATH__HALF_MISC
362
- * \brief Convert a half to a signed integer in round-towards-zero mode.
363
- *
364
- * \details Convert the half-precision floating-point value \p h to a signed integer in
365
- * round-towards-zero mode. NaN inputs are converted to 0.
366
- * \param[in] h - half. Is only being read.
367
- *
368
- * \returns int
369
- * - \p h converted to a signed integer.
370
- * \internal
371
- * \exception-guarantee no-throw guarantee
372
- * \behavior reentrant, thread safe
373
- * \endinternal
374
- */
375
- __CUDA_HOSTDEVICE_FP16_DECL__ int __half2int_rz(const __half h);
376
- /**
377
- * \ingroup CUDA_MATH__HALF_MISC
378
- * \brief Convert a half to an unsigned integer in round-towards-zero mode.
379
- *
380
- * \details Convert the half-precision floating-point value \p h to an unsigned integer
381
- * in round-towards-zero mode. NaN inputs are converted to 0.
382
- * \param[in] h - half. Is only being read.
383
- *
384
- * \returns unsigned int
385
- * - \p h converted to an unsigned integer.
386
- * \internal
387
- * \exception-guarantee no-throw guarantee
388
- * \behavior reentrant, thread safe
389
- * \endinternal
390
- */
391
- __CUDA_HOSTDEVICE_FP16_DECL__ unsigned int __half2uint_rz(const __half h);
392
- /**
393
- * \ingroup CUDA_MATH__HALF_MISC
394
- * \brief Convert a half to a signed 64-bit integer in round-towards-zero mode.
395
- *
396
- * \details Convert the half-precision floating-point value \p h to a signed 64-bit
397
- * integer in round-towards-zero mode. NaN inputs return a long long int with hex value of 0x8000000000000000.
398
- * \param[in] h - half. Is only being read.
399
- *
400
- * \returns long long int
401
- * - \p h converted to a signed 64-bit integer.
402
- * \internal
403
- * \exception-guarantee no-throw guarantee
404
- * \behavior reentrant, thread safe
405
- * \endinternal
406
- */
407
- __CUDA_HOSTDEVICE_FP16_DECL__ long long int __half2ll_rz(const __half h);
408
- /**
409
- * \ingroup CUDA_MATH__HALF_MISC
410
- * \brief Convert a half to an unsigned 64-bit integer in round-towards-zero
411
- * mode.
412
- *
413
- * \details Convert the half-precision floating-point value \p h to an unsigned 64-bit
414
- * integer in round-towards-zero mode. NaN inputs return 0x8000000000000000.
415
- * \param[in] h - half. Is only being read.
416
- *
417
- * \returns unsigned long long int
418
- * - \p h converted to an unsigned 64-bit integer.
419
- * \internal
420
- * \exception-guarantee no-throw guarantee
421
- * \behavior reentrant, thread safe
422
- * \endinternal
423
- */
424
- __CUDA_HOSTDEVICE_FP16_DECL__ unsigned long long int __half2ull_rz(const __half h);
425
-
426
- #if defined(__CUDACC__)
427
- /**
428
- * \ingroup CUDA_MATH__HALF_MISC
429
- * \brief Converts both components of float2 number to half precision in
430
- * round-to-nearest-even mode and returns \p half2 with converted values.
431
- *
432
- * \details Converts both components of float2 to half precision in round-to-nearest
433
- * mode and combines the results into one \p half2 number. Low 16 bits of the
434
- * return value correspond to \p a.x and high 16 bits of the return value
435
- * correspond to \p a.y.
436
- * \param[in] a - float2. Is only being read.
437
- *
438
- * \returns half2
439
- * - The \p half2 which has corresponding halves equal to the
440
- * converted float2 components.
441
- * \internal
442
- * \exception-guarantee no-throw guarantee
443
- * \behavior reentrant, thread safe
444
- * \endinternal
445
- */
446
- __CUDA_HOSTDEVICE_FP16_DECL__ __half2 __float22half2_rn(const float2 a);
447
- /**
448
- * \ingroup CUDA_MATH__HALF_MISC
449
- * \brief Converts both halves of \p half2 to float2 and returns the result.
450
- *
451
- * \details Converts both halves of \p half2 input \p a to float2 and returns the
452
- * result.
453
- * \param[in] a - half2. Is only being read.
454
- *
455
- * \returns float2
456
- * - \p a converted to float2.
457
- * \internal
458
- * \exception-guarantee no-throw guarantee
459
- * \behavior reentrant, thread safe
460
- * \endinternal
461
- */
462
- __CUDA_HOSTDEVICE_FP16_DECL__ float2 __half22float2(const __half2 a);
463
- /**
464
- * \ingroup CUDA_MATH__HALF_MISC
465
- * \brief Convert a half to a signed integer in round-to-nearest-even mode.
466
- *
467
- * \details Convert the half-precision floating-point value \p h to a signed integer in
468
- * round-to-nearest-even mode. NaN inputs are converted to 0.
469
- * \param[in] h - half. Is only being read.
470
- *
471
- * \returns int
472
- * - \p h converted to a signed integer.
473
- * \internal
474
- * \exception-guarantee no-throw guarantee
475
- * \behavior reentrant, thread safe
476
- * \endinternal
477
- */
478
- __CUDA_FP16_DECL__ int __half2int_rn(const __half h);
479
- /**
480
- * \ingroup CUDA_MATH__HALF_MISC
481
- * \brief Convert a half to a signed integer in round-down mode.
482
- *
483
- * \details Convert the half-precision floating-point value \p h to a signed integer in
484
- * round-down mode. NaN inputs are converted to 0.
485
- * \param[in] h - half. Is only being read.
486
- *
487
- * \returns int
488
- * - \p h converted to a signed integer.
489
- * \internal
490
- * \exception-guarantee no-throw guarantee
491
- * \behavior reentrant, thread safe
492
- * \endinternal
493
- */
494
- __CUDA_FP16_DECL__ int __half2int_rd(const __half h);
495
- /**
496
- * \ingroup CUDA_MATH__HALF_MISC
497
- * \brief Convert a half to a signed integer in round-up mode.
498
- *
499
- * \details Convert the half-precision floating-point value \p h to a signed integer in
500
- * round-up mode. NaN inputs are converted to 0.
501
- * \param[in] h - half. Is only being read.
502
- *
503
- * \returns int
504
- * - \p h converted to a signed integer.
505
- * \internal
506
- * \exception-guarantee no-throw guarantee
507
- * \behavior reentrant, thread safe
508
- * \endinternal
509
- */
510
- __CUDA_FP16_DECL__ int __half2int_ru(const __half h);
511
-
512
- /**
513
- * \ingroup CUDA_MATH__HALF_MISC
514
- * \brief Convert a signed integer to a half in round-to-nearest-even mode.
515
- *
516
- * \details Convert the signed integer value \p i to a half-precision floating-point
517
- * value in round-to-nearest-even mode.
518
- * \param[in] i - int. Is only being read.
519
- *
520
- * \returns half
521
- * - \p i converted to half.
522
- * \internal
523
- * \exception-guarantee no-throw guarantee
524
- * \behavior reentrant, thread safe
525
- * \endinternal
526
- */
527
- __CUDA_HOSTDEVICE_FP16_DECL__ __half __int2half_rn(const int i);
528
- /**
529
- * \ingroup CUDA_MATH__HALF_MISC
530
- * \brief Convert a signed integer to a half in round-towards-zero mode.
531
- *
532
- * \details Convert the signed integer value \p i to a half-precision floating-point
533
- * value in round-towards-zero mode.
534
- * \param[in] i - int. Is only being read.
535
- *
536
- * \returns half
537
- * - \p i converted to half.
538
- * \internal
539
- * \exception-guarantee no-throw guarantee
540
- * \behavior reentrant, thread safe
541
- * \endinternal
542
- */
543
- __CUDA_FP16_DECL__ __half __int2half_rz(const int i);
544
- /**
545
- * \ingroup CUDA_MATH__HALF_MISC
546
- * \brief Convert a signed integer to a half in round-down mode.
547
- *
548
- * \details Convert the signed integer value \p i to a half-precision floating-point
549
- * value in round-down mode.
550
- * \param[in] i - int. Is only being read.
551
- *
552
- * \returns half
553
- * - \p i converted to half.
554
- * \internal
555
- * \exception-guarantee no-throw guarantee
556
- * \behavior reentrant, thread safe
557
- * \endinternal
558
- */
559
- __CUDA_FP16_DECL__ __half __int2half_rd(const int i);
560
- /**
561
- * \ingroup CUDA_MATH__HALF_MISC
562
- * \brief Convert a signed integer to a half in round-up mode.
563
- *
564
- * \details Convert the signed integer value \p i to a half-precision floating-point
565
- * value in round-up mode.
566
- * \param[in] i - int. Is only being read.
567
- *
568
- * \returns half
569
- * - \p i converted to half.
570
- * \internal
571
- * \exception-guarantee no-throw guarantee
572
- * \behavior reentrant, thread safe
573
- * \endinternal
574
- */
575
- __CUDA_FP16_DECL__ __half __int2half_ru(const int i);
576
-
577
- /**
578
- * \ingroup CUDA_MATH__HALF_MISC
579
- * \brief Convert a half to a signed short integer in round-to-nearest-even
580
- * mode.
581
- *
582
- * \details Convert the half-precision floating-point value \p h to a signed short
583
- * integer in round-to-nearest-even mode. NaN inputs are converted to 0.
584
- * \param[in] h - half. Is only being read.
585
- *
586
- * \returns short int
587
- * - \p h converted to a signed short integer.
588
- * \internal
589
- * \exception-guarantee no-throw guarantee
590
- * \behavior reentrant, thread safe
591
- * \endinternal
592
- */
593
- __CUDA_FP16_DECL__ short int __half2short_rn(const __half h);
594
- /**
595
- * \ingroup CUDA_MATH__HALF_MISC
596
- * \brief Convert a half to a signed short integer in round-down mode.
597
- *
598
- * \details Convert the half-precision floating-point value \p h to a signed short
599
- * integer in round-down mode. NaN inputs are converted to 0.
600
- * \param[in] h - half. Is only being read.
601
- *
602
- * \returns short int
603
- * - \p h converted to a signed short integer.
604
- * \internal
605
- * \exception-guarantee no-throw guarantee
606
- * \behavior reentrant, thread safe
607
- * \endinternal
608
- */
609
- __CUDA_FP16_DECL__ short int __half2short_rd(const __half h);
610
- /**
611
- * \ingroup CUDA_MATH__HALF_MISC
612
- * \brief Convert a half to a signed short integer in round-up mode.
613
- *
614
- * \details Convert the half-precision floating-point value \p h to a signed short
615
- * integer in round-up mode. NaN inputs are converted to 0.
616
- * \param[in] h - half. Is only being read.
617
- *
618
- * \returns short int
619
- * - \p h converted to a signed short integer.
620
- * \internal
621
- * \exception-guarantee no-throw guarantee
622
- * \behavior reentrant, thread safe
623
- * \endinternal
624
- */
625
- __CUDA_FP16_DECL__ short int __half2short_ru(const __half h);
626
-
627
- /**
628
- * \ingroup CUDA_MATH__HALF_MISC
629
- * \brief Convert a signed short integer to a half in round-to-nearest-even
630
- * mode.
631
- *
632
- * \details Convert the signed short integer value \p i to a half-precision floating-point
633
- * value in round-to-nearest-even mode.
634
- * \param[in] i - short int. Is only being read.
635
- *
636
- * \returns half
637
- * - \p i converted to half.
638
- * \internal
639
- * \exception-guarantee no-throw guarantee
640
- * \behavior reentrant, thread safe
641
- * \endinternal
642
- */
643
- __CUDA_HOSTDEVICE_FP16_DECL__ __half __short2half_rn(const short int i);
644
- /**
645
- * \ingroup CUDA_MATH__HALF_MISC
646
- * \brief Convert a signed short integer to a half in round-towards-zero mode.
647
- *
648
- * \details Convert the signed short integer value \p i to a half-precision floating-point
649
- * value in round-towards-zero mode.
650
- * \param[in] i - short int. Is only being read.
651
- *
652
- * \returns half
653
- * - \p i converted to half.
654
- * \internal
655
- * \exception-guarantee no-throw guarantee
656
- * \behavior reentrant, thread safe
657
- * \endinternal
658
- */
659
- __CUDA_FP16_DECL__ __half __short2half_rz(const short int i);
660
- /**
661
- * \ingroup CUDA_MATH__HALF_MISC
662
- * \brief Convert a signed short integer to a half in round-down mode.
663
- *
664
- * \details Convert the signed short integer value \p i to a half-precision floating-point
665
- * value in round-down mode.
666
- * \param[in] i - short int. Is only being read.
667
- *
668
- * \returns half
669
- * - \p i converted to half.
670
- * \internal
671
- * \exception-guarantee no-throw guarantee
672
- * \behavior reentrant, thread safe
673
- * \endinternal
674
- */
675
- __CUDA_FP16_DECL__ __half __short2half_rd(const short int i);
676
- /**
677
- * \ingroup CUDA_MATH__HALF_MISC
678
- * \brief Convert a signed short integer to a half in round-up mode.
679
- *
680
- * \details Convert the signed short integer value \p i to a half-precision floating-point
681
- * value in round-up mode.
682
- * \param[in] i - short int. Is only being read.
683
- *
684
- * \returns half
685
- * - \p i converted to half.
686
- * \internal
687
- * \exception-guarantee no-throw guarantee
688
- * \behavior reentrant, thread safe
689
- * \endinternal
690
- */
691
- __CUDA_FP16_DECL__ __half __short2half_ru(const short int i);
692
-
693
- /**
694
- * \ingroup CUDA_MATH__HALF_MISC
695
- * \brief Convert a half to an unsigned integer in round-to-nearest-even mode.
696
- *
697
- * \details Convert the half-precision floating-point value \p h to an unsigned integer
698
- * in round-to-nearest-even mode. NaN inputs are converted to 0.
699
- * \param[in] h - half. Is only being read.
700
- *
701
- * \returns unsigned int
702
- * - \p h converted to an unsigned integer.
703
- * \internal
704
- * \exception-guarantee no-throw guarantee
705
- * \behavior reentrant, thread safe
706
- * \endinternal
707
- */
708
- __CUDA_FP16_DECL__ unsigned int __half2uint_rn(const __half h);
709
- /**
710
- * \ingroup CUDA_MATH__HALF_MISC
711
- * \brief Convert a half to an unsigned integer in round-down mode.
712
- *
713
- * \details Convert the half-precision floating-point value \p h to an unsigned integer
714
- * in round-down mode. NaN inputs are converted to 0.
715
- * \param[in] h - half. Is only being read.
716
- *
717
- * \returns unsigned int
718
- * - \p h converted to an unsigned integer.
719
- * \internal
720
- * \exception-guarantee no-throw guarantee
721
- * \behavior reentrant, thread safe
722
- * \endinternal
723
- */
724
- __CUDA_FP16_DECL__ unsigned int __half2uint_rd(const __half h);
725
- /**
726
- * \ingroup CUDA_MATH__HALF_MISC
727
- * \brief Convert a half to an unsigned integer in round-up mode.
728
- *
729
- * \details Convert the half-precision floating-point value \p h to an unsigned integer
730
- * in round-up mode. NaN inputs are converted to 0.
731
- * \param[in] h - half. Is only being read.
732
- *
733
- * \returns unsigned int
734
- * - \p h converted to an unsigned integer.
735
- * \internal
736
- * \exception-guarantee no-throw guarantee
737
- * \behavior reentrant, thread safe
738
- * \endinternal
739
- */
740
- __CUDA_FP16_DECL__ unsigned int __half2uint_ru(const __half h);
741
-
742
- /**
743
- * \ingroup CUDA_MATH__HALF_MISC
744
- * \brief Convert an unsigned integer to a half in round-to-nearest-even mode.
745
- *
746
- * \details Convert the unsigned integer value \p i to a half-precision floating-point
747
- * value in round-to-nearest-even mode.
748
- * \param[in] i - unsigned int. Is only being read.
749
- *
750
- * \returns half
751
- * - \p i converted to half.
752
- * \internal
753
- * \exception-guarantee no-throw guarantee
754
- * \behavior reentrant, thread safe
755
- * \endinternal
756
- */
757
- __CUDA_HOSTDEVICE_FP16_DECL__ __half __uint2half_rn(const unsigned int i);
758
- /**
759
- * \ingroup CUDA_MATH__HALF_MISC
760
- * \brief Convert an unsigned integer to a half in round-towards-zero mode.
761
- *
762
- * \details Convert the unsigned integer value \p i to a half-precision floating-point
763
- * value in round-towards-zero mode.
764
- * \param[in] i - unsigned int. Is only being read.
765
- *
766
- * \returns half
767
- * - \p i converted to half.
768
- * \internal
769
- * \exception-guarantee no-throw guarantee
770
- * \behavior reentrant, thread safe
771
- * \endinternal
772
- */
773
- __CUDA_FP16_DECL__ __half __uint2half_rz(const unsigned int i);
774
- /**
775
- * \ingroup CUDA_MATH__HALF_MISC
776
- * \brief Convert an unsigned integer to a half in round-down mode.
777
- *
778
- * \details Convert the unsigned integer value \p i to a half-precision floating-point
779
- * value in round-down mode.
780
- * \param[in] i - unsigned int. Is only being read.
781
- *
782
- * \returns half
783
- * - \p i converted to half.
784
- * \internal
785
- * \exception-guarantee no-throw guarantee
786
- * \behavior reentrant, thread safe
787
- * \endinternal
788
- */
789
- __CUDA_FP16_DECL__ __half __uint2half_rd(const unsigned int i);
790
- /**
791
- * \ingroup CUDA_MATH__HALF_MISC
792
- * \brief Convert an unsigned integer to a half in round-up mode.
793
- *
794
- * \details Convert the unsigned integer value \p i to a half-precision floating-point
795
- * value in round-up mode.
796
- * \param[in] i - unsigned int. Is only being read.
797
- *
798
- * \returns half
799
- * - \p i converted to half.
800
- * \internal
801
- * \exception-guarantee no-throw guarantee
802
- * \behavior reentrant, thread safe
803
- * \endinternal
804
- */
805
- __CUDA_FP16_DECL__ __half __uint2half_ru(const unsigned int i);
806
-
807
- /**
808
- * \ingroup CUDA_MATH__HALF_MISC
809
- * \brief Convert a half to an unsigned short integer in round-to-nearest-even
810
- * mode.
811
- *
812
- * \details Convert the half-precision floating-point value \p h to an unsigned short
813
- * integer in round-to-nearest-even mode. NaN inputs are converted to 0.
814
- * \param[in] h - half. Is only being read.
815
- *
816
- * \returns unsigned short int
817
- * - \p h converted to an unsigned short integer.
818
- * \internal
819
- * \exception-guarantee no-throw guarantee
820
- * \behavior reentrant, thread safe
821
- * \endinternal
822
- */
823
- __CUDA_FP16_DECL__ unsigned short int __half2ushort_rn(const __half h);
824
- /**
825
- * \ingroup CUDA_MATH__HALF_MISC
826
- * \brief Convert a half to an unsigned short integer in round-down mode.
827
- *
828
- * \details Convert the half-precision floating-point value \p h to an unsigned short
829
- * integer in round-down mode. NaN inputs are converted to 0.
830
- * \param[in] h - half. Is only being read.
831
- *
832
- * \returns unsigned short int
833
- * - \p h converted to an unsigned short integer.
834
- */
835
- __CUDA_FP16_DECL__ unsigned short int __half2ushort_rd(const __half h);
836
- /**
837
- * \ingroup CUDA_MATH__HALF_MISC
838
- * \brief Convert a half to an unsigned short integer in round-up mode.
839
- *
840
- * \details Convert the half-precision floating-point value \p h to an unsigned short
841
- * integer in round-up mode. NaN inputs are converted to 0.
842
- * \param[in] h - half. Is only being read.
843
- *
844
- * \returns unsigned short int
845
- * - \p h converted to an unsigned short integer.
846
- */
847
- __CUDA_FP16_DECL__ unsigned short int __half2ushort_ru(const __half h);
848
-
849
- /**
850
- * \ingroup CUDA_MATH__HALF_MISC
851
- * \brief Convert an unsigned short integer to a half in round-to-nearest-even
852
- * mode.
853
- *
854
- * \details Convert the unsigned short integer value \p i to a half-precision floating-point
855
- * value in round-to-nearest-even mode.
856
- * \param[in] i - unsigned short int. Is only being read.
857
- *
858
- * \returns half
859
- * - \p i converted to half.
860
- * \internal
861
- * \exception-guarantee no-throw guarantee
862
- * \behavior reentrant, thread safe
863
- * \endinternal
864
- */
865
- __CUDA_HOSTDEVICE_FP16_DECL__ __half __ushort2half_rn(const unsigned short int i);
866
- /**
867
- * \ingroup CUDA_MATH__HALF_MISC
868
- * \brief Convert an unsigned short integer to a half in round-towards-zero
869
- * mode.
870
- *
871
- * \details Convert the unsigned short integer value \p i to a half-precision floating-point
872
- * value in round-towards-zero mode.
873
- * \param[in] i - unsigned short int. Is only being read.
874
- *
875
- * \returns half
876
- * - \p i converted to half.
877
- * \internal
878
- * \exception-guarantee no-throw guarantee
879
- * \behavior reentrant, thread safe
880
- * \endinternal
881
- */
882
- __CUDA_FP16_DECL__ __half __ushort2half_rz(const unsigned short int i);
883
- /**
884
- * \ingroup CUDA_MATH__HALF_MISC
885
- * \brief Convert an unsigned short integer to a half in round-down mode.
886
- *
887
- * \details Convert the unsigned short integer value \p i to a half-precision floating-point
888
- * value in round-down mode.
889
- * \param[in] i - unsigned short int. Is only being read.
890
- *
891
- * \returns half
892
- * - \p i converted to half.
893
- * \internal
894
- * \exception-guarantee no-throw guarantee
895
- * \behavior reentrant, thread safe
896
- * \endinternal
897
- */
898
- __CUDA_FP16_DECL__ __half __ushort2half_rd(const unsigned short int i);
899
- /**
900
- * \ingroup CUDA_MATH__HALF_MISC
901
- * \brief Convert an unsigned short integer to a half in round-up mode.
902
- *
903
- * \details Convert the unsigned short integer value \p i to a half-precision floating-point
904
- * value in round-up mode.
905
- * \param[in] i - unsigned short int. Is only being read.
906
- *
907
- * \returns half
908
- * - \p i converted to half.
909
- * \internal
910
- * \exception-guarantee no-throw guarantee
911
- * \behavior reentrant, thread safe
912
- * \endinternal
913
- */
914
- __CUDA_FP16_DECL__ __half __ushort2half_ru(const unsigned short int i);
915
-
916
- /**
917
- * \ingroup CUDA_MATH__HALF_MISC
918
- * \brief Convert a half to an unsigned 64-bit integer in round-to-nearest-even
919
- * mode.
920
- *
921
- * \details Convert the half-precision floating-point value \p h to an unsigned 64-bit
922
- * integer in round-to-nearest-even mode. NaN inputs return 0x8000000000000000.
923
- * \param[in] h - half. Is only being read.
924
- *
925
- * \returns unsigned long long int
926
- * - \p h converted to an unsigned 64-bit integer.
927
- * \internal
928
- * \exception-guarantee no-throw guarantee
929
- * \behavior reentrant, thread safe
930
- * \endinternal
931
- */
932
- __CUDA_FP16_DECL__ unsigned long long int __half2ull_rn(const __half h);
933
- /**
934
- * \ingroup CUDA_MATH__HALF_MISC
935
- * \brief Convert a half to an unsigned 64-bit integer in round-down mode.
936
- *
937
- * \details Convert the half-precision floating-point value \p h to an unsigned 64-bit
938
- * integer in round-down mode. NaN inputs return 0x8000000000000000.
939
- * \param[in] h - half. Is only being read.
940
- *
941
- * \returns unsigned long long int
942
- * - \p h converted to an unsigned 64-bit integer.
943
- * \internal
944
- * \exception-guarantee no-throw guarantee
945
- * \behavior reentrant, thread safe
946
- * \endinternal
947
- */
948
- __CUDA_FP16_DECL__ unsigned long long int __half2ull_rd(const __half h);
949
- /**
950
- * \ingroup CUDA_MATH__HALF_MISC
951
- * \brief Convert a half to an unsigned 64-bit integer in round-up mode.
952
- *
953
- * \details Convert the half-precision floating-point value \p h to an unsigned 64-bit
954
- * integer in round-up mode. NaN inputs return 0x8000000000000000.
955
- * \param[in] h - half. Is only being read.
956
- *
957
- * \returns unsigned long long int
958
- * - \p h converted to an unsigned 64-bit integer.
959
- * \internal
960
- * \exception-guarantee no-throw guarantee
961
- * \behavior reentrant, thread safe
962
- * \endinternal
963
- */
964
- __CUDA_FP16_DECL__ unsigned long long int __half2ull_ru(const __half h);
965
-
966
- /**
967
- * \ingroup CUDA_MATH__HALF_MISC
968
- * \brief Convert an unsigned 64-bit integer to a half in round-to-nearest-even
969
- * mode.
970
- *
971
- * \details Convert the unsigned 64-bit integer value \p i to a half-precision floating-point
972
- * value in round-to-nearest-even mode.
973
- * \param[in] i - unsigned long long int. Is only being read.
974
- *
975
- * \returns half
976
- * - \p i converted to half.
977
- * \internal
978
- * \exception-guarantee no-throw guarantee
979
- * \behavior reentrant, thread safe
980
- * \endinternal
981
- */
982
- __CUDA_HOSTDEVICE_FP16_DECL__ __half __ull2half_rn(const unsigned long long int i);
983
- /**
984
- * \ingroup CUDA_MATH__HALF_MISC
985
- * \brief Convert an unsigned 64-bit integer to a half in round-towards-zero
986
- * mode.
987
- *
988
- * \details Convert the unsigned 64-bit integer value \p i to a half-precision floating-point
989
- * value in round-towards-zero mode.
990
- * \param[in] i - unsigned long long int. Is only being read.
991
- *
992
- * \returns half
993
- * - \p i converted to half.
994
- * \internal
995
- * \exception-guarantee no-throw guarantee
996
- * \behavior reentrant, thread safe
997
- * \endinternal
998
- */
999
- __CUDA_FP16_DECL__ __half __ull2half_rz(const unsigned long long int i);
1000
- /**
1001
- * \ingroup CUDA_MATH__HALF_MISC
1002
- * \brief Convert an unsigned 64-bit integer to a half in round-down mode.
1003
- *
1004
- * \details Convert the unsigned 64-bit integer value \p i to a half-precision floating-point
1005
- * value in round-down mode.
1006
- * \param[in] i - unsigned long long int. Is only being read.
1007
- *
1008
- * \returns half
1009
- * - \p i converted to half.
1010
- * \internal
1011
- * \exception-guarantee no-throw guarantee
1012
- * \behavior reentrant, thread safe
1013
- * \endinternal
1014
- */
1015
- __CUDA_FP16_DECL__ __half __ull2half_rd(const unsigned long long int i);
1016
- /**
1017
- * \ingroup CUDA_MATH__HALF_MISC
1018
- * \brief Convert an unsigned 64-bit integer to a half in round-up mode.
1019
- *
1020
- * \details Convert the unsigned 64-bit integer value \p i to a half-precision floating-point
1021
- * value in round-up mode.
1022
- * \param[in] i - unsigned long long int. Is only being read.
1023
- *
1024
- * \returns half
1025
- * - \p i converted to half.
1026
- * \internal
1027
- * \exception-guarantee no-throw guarantee
1028
- * \behavior reentrant, thread safe
1029
- * \endinternal
1030
- */
1031
- __CUDA_FP16_DECL__ __half __ull2half_ru(const unsigned long long int i);
1032
-
1033
- /**
1034
- * \ingroup CUDA_MATH__HALF_MISC
1035
- * \brief Convert a half to a signed 64-bit integer in round-to-nearest-even
1036
- * mode.
1037
- *
1038
- * \details Convert the half-precision floating-point value \p h to a signed 64-bit
1039
- * integer in round-to-nearest-even mode. NaN inputs return a long long int with hex value of 0x8000000000000000.
1040
- * \param[in] h - half. Is only being read.
1041
- *
1042
- * \returns long long int
1043
- * - \p h converted to a signed 64-bit integer.
1044
- * \internal
1045
- * \exception-guarantee no-throw guarantee
1046
- * \behavior reentrant, thread safe
1047
- * \endinternal
1048
- */
1049
- __CUDA_FP16_DECL__ long long int __half2ll_rn(const __half h);
1050
- /**
1051
- * \ingroup CUDA_MATH__HALF_MISC
1052
- * \brief Convert a half to a signed 64-bit integer in round-down mode.
1053
- *
1054
- * \details Convert the half-precision floating-point value \p h to a signed 64-bit
1055
- * integer in round-down mode. NaN inputs return a long long int with hex value of 0x8000000000000000.
1056
- * \param[in] h - half. Is only being read.
1057
- *
1058
- * \returns long long int
1059
- * - \p h converted to a signed 64-bit integer.
1060
- * \internal
1061
- * \exception-guarantee no-throw guarantee
1062
- * \behavior reentrant, thread safe
1063
- * \endinternal
1064
- */
1065
- __CUDA_FP16_DECL__ long long int __half2ll_rd(const __half h);
1066
- /**
1067
- * \ingroup CUDA_MATH__HALF_MISC
1068
- * \brief Convert a half to a signed 64-bit integer in round-up mode.
1069
- *
1070
- * \details Convert the half-precision floating-point value \p h to a signed 64-bit
1071
- * integer in round-up mode. NaN inputs return a long long int with hex value of 0x8000000000000000.
1072
- * \param[in] h - half. Is only being read.
1073
- *
1074
- * \returns long long int
1075
- * - \p h converted to a signed 64-bit integer.
1076
- * \internal
1077
- * \exception-guarantee no-throw guarantee
1078
- * \behavior reentrant, thread safe
1079
- * \endinternal
1080
- */
1081
- __CUDA_FP16_DECL__ long long int __half2ll_ru(const __half h);
1082
-
1083
- /**
1084
- * \ingroup CUDA_MATH__HALF_MISC
1085
- * \brief Convert a signed 64-bit integer to a half in round-to-nearest-even
1086
- * mode.
1087
- *
1088
- * \details Convert the signed 64-bit integer value \p i to a half-precision floating-point
1089
- * value in round-to-nearest-even mode.
1090
- * \param[in] i - long long int. Is only being read.
1091
- *
1092
- * \returns half
1093
- * - \p i converted to half.
1094
- * \internal
1095
- * \exception-guarantee no-throw guarantee
1096
- * \behavior reentrant, thread safe
1097
- * \endinternal
1098
- */
1099
- __CUDA_HOSTDEVICE_FP16_DECL__ __half __ll2half_rn(const long long int i);
1100
- /**
1101
- * \ingroup CUDA_MATH__HALF_MISC
1102
- * \brief Convert a signed 64-bit integer to a half in round-towards-zero mode.
1103
- *
1104
- * \details Convert the signed 64-bit integer value \p i to a half-precision floating-point
1105
- * value in round-towards-zero mode.
1106
- * \param[in] i - long long int. Is only being read.
1107
- *
1108
- * \returns half
1109
- * - \p i converted to half.
1110
- */
1111
- __CUDA_FP16_DECL__ __half __ll2half_rz(const long long int i);
1112
- /**
1113
- * \ingroup CUDA_MATH__HALF_MISC
1114
- * \brief Convert a signed 64-bit integer to a half in round-down mode.
1115
- *
1116
- * \details Convert the signed 64-bit integer value \p i to a half-precision floating-point
1117
- * value in round-down mode.
1118
- * \param[in] i - long long int. Is only being read.
1119
- *
1120
- * \returns half
1121
- * - \p i converted to half.
1122
- * \internal
1123
- * \exception-guarantee no-throw guarantee
1124
- * \behavior reentrant, thread safe
1125
- * \endinternal
1126
- */
1127
- __CUDA_FP16_DECL__ __half __ll2half_rd(const long long int i);
1128
- /**
1129
- * \ingroup CUDA_MATH__HALF_MISC
1130
- * \brief Convert a signed 64-bit integer to a half in round-up mode.
1131
- *
1132
- * \details Convert the signed 64-bit integer value \p i to a half-precision floating-point
1133
- * value in round-up mode.
1134
- * \param[in] i - long long int. Is only being read.
1135
- *
1136
- * \returns half
1137
- * - \p i converted to half.
1138
- * \internal
1139
- * \exception-guarantee no-throw guarantee
1140
- * \behavior reentrant, thread safe
1141
- * \endinternal
1142
- */
1143
- __CUDA_FP16_DECL__ __half __ll2half_ru(const long long int i);
1144
-
1145
- /**
1146
- * \ingroup CUDA_MATH__HALF_FUNCTIONS
1147
- * \brief Truncate input argument to the integral part.
1148
- *
1149
- * \details Round \p h to the nearest integer value that does not exceed \p h in
1150
- * magnitude.
1151
- * \param[in] h - half. Is only being read.
1152
- *
1153
- * \returns half
1154
- * - The truncated integer value.
1155
- * \internal
1156
- * \exception-guarantee no-throw guarantee
1157
- * \behavior reentrant, thread safe
1158
- * \endinternal
1159
- */
1160
- __CUDA_FP16_DECL__ __half htrunc(const __half h);
1161
- /**
1162
- * \ingroup CUDA_MATH__HALF_FUNCTIONS
1163
- * \brief Calculate ceiling of the input argument.
1164
- *
1165
- * \details Compute the smallest integer value not less than \p h.
1166
- * \param[in] h - half. Is only being read.
1167
- *
1168
- * \returns half
1169
- * - The smallest integer value not less than \p h.
1170
- * \internal
1171
- * \exception-guarantee no-throw guarantee
1172
- * \behavior reentrant, thread safe
1173
- * \endinternal
1174
- */
1175
- __CUDA_FP16_DECL__ __half hceil(const __half h);
1176
- /**
1177
- * \ingroup CUDA_MATH__HALF_FUNCTIONS
1178
- * \brief Calculate the largest integer less than or equal to \p h.
1179
- *
1180
- * \details Calculate the largest integer value which is less than or equal to \p h.
1181
- * \param[in] h - half. Is only being read.
1182
- *
1183
- * \returns half
1184
- * - The largest integer value which is less than or equal to \p h.
1185
- * \internal
1186
- * \exception-guarantee no-throw guarantee
1187
- * \behavior reentrant, thread safe
1188
- * \endinternal
1189
- */
1190
- __CUDA_FP16_DECL__ __half hfloor(const __half h);
1191
- /**
1192
- * \ingroup CUDA_MATH__HALF_FUNCTIONS
1193
- * \brief Round input to nearest integer value in half-precision floating-point
1194
- * number.
1195
- *
1196
- * \details Round \p h to the nearest integer value in half-precision floating-point
1197
- * format, with halfway cases rounded to the nearest even integer value.
1198
- * \param[in] h - half. Is only being read.
1199
- *
1200
- * \returns half
1201
- * - The nearest integer to \p h.
1202
- * \internal
1203
- * \exception-guarantee no-throw guarantee
1204
- * \behavior reentrant, thread safe
1205
- * \endinternal
1206
- */
1207
- __CUDA_FP16_DECL__ __half hrint(const __half h);
1208
-
1209
- /**
1210
- * \ingroup CUDA_MATH__HALF2_FUNCTIONS
1211
- * \brief Truncate \p half2 vector input argument to the integral part.
1212
- *
1213
- * \details Round each component of vector \p h to the nearest integer value that does
1214
- * not exceed \p h in magnitude.
1215
- * \param[in] h - half2. Is only being read.
1216
- *
1217
- * \returns half2
1218
- * - The truncated \p h.
1219
- * \internal
1220
- * \exception-guarantee no-throw guarantee
1221
- * \behavior reentrant, thread safe
1222
- * \endinternal
1223
- */
1224
- __CUDA_FP16_DECL__ __half2 h2trunc(const __half2 h);
1225
- /**
1226
- * \ingroup CUDA_MATH__HALF2_FUNCTIONS
1227
- * \brief Calculate \p half2 vector ceiling of the input argument.
1228
- *
1229
- * \details For each component of vector \p h compute the smallest integer value not less
1230
- * than \p h.
1231
- * \param[in] h - half2. Is only being read.
1232
- *
1233
- * \returns half2
1234
- * - The vector of smallest integers not less than \p h.
1235
- * \internal
1236
- * \exception-guarantee no-throw guarantee
1237
- * \behavior reentrant, thread safe
1238
- * \endinternal
1239
- */
1240
- __CUDA_FP16_DECL__ __half2 h2ceil(const __half2 h);
1241
- /**
1242
- * \ingroup CUDA_MATH__HALF2_FUNCTIONS
1243
- * \brief Calculate the largest integer less than or equal to \p h.
1244
- *
1245
- * \details For each component of vector \p h calculate the largest integer value which
1246
- * is less than or equal to \p h.
1247
- * \param[in] h - half2. Is only being read.
1248
- *
1249
- * \returns half2
1250
- * - The vector of largest integers which is less than or equal to \p h.
1251
- * \internal
1252
- * \exception-guarantee no-throw guarantee
1253
- * \behavior reentrant, thread safe
1254
- * \endinternal
1255
- */
1256
- __CUDA_FP16_DECL__ __half2 h2floor(const __half2 h);
1257
- /**
1258
- * \ingroup CUDA_MATH__HALF2_FUNCTIONS
1259
- * \brief Round input to nearest integer value in half-precision floating-point
1260
- * number.
1261
- *
1262
- * \details Round each component of \p half2 vector \p h to the nearest integer value in
1263
- * half-precision floating-point format, with halfway cases rounded to the
1264
- * nearest even integer value.
1265
- * \param[in] h - half2. Is only being read.
1266
- *
1267
- * \returns half2
1268
- * - The vector of rounded integer values.
1269
- * \internal
1270
- * \exception-guarantee no-throw guarantee
1271
- * \behavior reentrant, thread safe
1272
- * \endinternal
1273
- */
1274
- __CUDA_FP16_DECL__ __half2 h2rint(const __half2 h);
1275
-
1276
- /**
1277
- * \ingroup CUDA_MATH__HALF_MISC
1278
- * \brief Returns \p half2 with both halves equal to the input value.
1279
- *
1280
- * \details Returns \p half2 number with both halves equal to the input \p a \p half
1281
- * number.
1282
- * \param[in] a - half. Is only being read.
1283
- *
1284
- * \returns half2
1285
- * - The vector which has both its halves equal to the input \p a.
1286
- * \internal
1287
- * \exception-guarantee no-throw guarantee
1288
- * \behavior reentrant, thread safe
1289
- * \endinternal
1290
- */
1291
- __CUDA_FP16_DECL__ __half2 __half2half2(const __half a);
1292
- /**
1293
- * \ingroup CUDA_MATH__HALF_MISC
1294
- * \brief Swaps both halves of the \p half2 input.
1295
- *
1296
- * \details Swaps both halves of the \p half2 input and returns a new \p half2 number
1297
- * with swapped halves.
1298
- * \param[in] a - half2. Is only being read.
1299
- *
1300
- * \returns half2
1301
- * - \p a with its halves being swapped.
1302
- * \internal
1303
- * \exception-guarantee no-throw guarantee
1304
- * \behavior reentrant, thread safe
1305
- * \endinternal
1306
- */
1307
- __CUDA_FP16_DECL__ __half2 __lowhigh2highlow(const __half2 a);
1308
- /**
1309
- * \ingroup CUDA_MATH__HALF_MISC
1310
- * \brief Extracts low 16 bits from each of the two \p half2 inputs and combines
1311
- * into one \p half2 number.
1312
- *
1313
- * \details Extracts low 16 bits from each of the two \p half2 inputs and combines into
1314
- * one \p half2 number. Low 16 bits from input \p a is stored in low 16 bits of
1315
- * the return value, low 16 bits from input \p b is stored in high 16 bits of
1316
- * the return value.
1317
- * \param[in] a - half2. Is only being read.
1318
- * \param[in] b - half2. Is only being read.
1319
- *
1320
- * \returns half2
1321
- * - The low 16 bits of \p a and of \p b.
1322
- * \internal
1323
- * \exception-guarantee no-throw guarantee
1324
- * \behavior reentrant, thread safe
1325
- * \endinternal
1326
- */
1327
- __CUDA_FP16_DECL__ __half2 __lows2half2(const __half2 a, const __half2 b);
1328
- /**
1329
- * \ingroup CUDA_MATH__HALF_MISC
1330
- * \brief Extracts high 16 bits from each of the two \p half2 inputs and
1331
- * combines into one \p half2 number.
1332
- *
1333
- * \details Extracts high 16 bits from each of the two \p half2 inputs and combines into
1334
- * one \p half2 number. High 16 bits from input \p a is stored in low 16 bits of
1335
- * the return value, high 16 bits from input \p b is stored in high 16 bits of
1336
- * the return value.
1337
- * \param[in] a - half2. Is only being read.
1338
- * \param[in] b - half2. Is only being read.
1339
- *
1340
- * \returns half2
1341
- * - The high 16 bits of \p a and of \p b.
1342
- * \internal
1343
- * \exception-guarantee no-throw guarantee
1344
- * \behavior reentrant, thread safe
1345
- * \endinternal
1346
- */
1347
- __CUDA_FP16_DECL__ __half2 __highs2half2(const __half2 a, const __half2 b);
1348
- /**
1349
- * \ingroup CUDA_MATH__HALF_MISC
1350
- * \brief Returns high 16 bits of \p half2 input.
1351
- *
1352
- * \details Returns high 16 bits of \p half2 input \p a.
1353
- * \param[in] a - half2. Is only being read.
1354
- *
1355
- * \returns half
1356
- * - The high 16 bits of the input.
1357
- * \internal
1358
- * \exception-guarantee no-throw guarantee
1359
- * \behavior reentrant, thread safe
1360
- * \endinternal
1361
- */
1362
- __CUDA_FP16_DECL__ __half __high2half(const __half2 a);
1363
- /**
1364
- * \ingroup CUDA_MATH__HALF_MISC
1365
- * \brief Returns low 16 bits of \p half2 input.
1366
- *
1367
- * \details Returns low 16 bits of \p half2 input \p a.
1368
- * \param[in] a - half2. Is only being read.
1369
- *
1370
- * \returns half
1371
- * - Returns \p half which contains low 16 bits of the input \p a.
1372
- * \internal
1373
- * \exception-guarantee no-throw guarantee
1374
- * \behavior reentrant, thread safe
1375
- * \endinternal
1376
- */
1377
- __CUDA_FP16_DECL__ __half __low2half(const __half2 a);
1378
- /**
1379
- * \ingroup CUDA_MATH__HALF_COMPARISON
1380
- * \brief Checks if the input \p half number is infinite.
1381
- *
1382
- * \details Checks if the input \p half number \p a is infinite.
1383
- * \param[in] a - half. Is only being read.
1384
- *
1385
- * \returns int
1386
- * - -1 iff \p a is equal to negative infinity,
1387
- * - 1 iff \p a is equal to positive infinity,
1388
- * - 0 otherwise.
1389
- * \internal
1390
- * \exception-guarantee no-throw guarantee
1391
- * \behavior reentrant, thread safe
1392
- * \endinternal
1393
- */
1394
- __CUDA_FP16_DECL__ int __hisinf(const __half a);
1395
- /**
1396
- * \ingroup CUDA_MATH__HALF_MISC
1397
- * \brief Combines two \p half numbers into one \p half2 number.
1398
- *
1399
- * \details Combines two input \p half number \p a and \p b into one \p half2 number.
1400
- * Input \p a is stored in low 16 bits of the return value, input \p b is stored
1401
- * in high 16 bits of the return value.
1402
- * \param[in] a - half. Is only being read.
1403
- * \param[in] b - half. Is only being read.
1404
- *
1405
- * \returns half2
1406
- * - The half2 with one half equal to \p a and the other to \p b.
1407
- * \internal
1408
- * \exception-guarantee no-throw guarantee
1409
- * \behavior reentrant, thread safe
1410
- * \endinternal
1411
- */
1412
- __CUDA_FP16_DECL__ __half2 __halves2half2(const __half a, const __half b);
1413
- /**
1414
- * \ingroup CUDA_MATH__HALF_MISC
1415
- * \brief Extracts low 16 bits from \p half2 input.
1416
- *
1417
- * \details Extracts low 16 bits from \p half2 input \p a and returns a new \p half2
1418
- * number which has both halves equal to the extracted bits.
1419
- * \param[in] a - half2. Is only being read.
1420
- *
1421
- * \returns half2
1422
- * - The half2 with both halves equal to the low 16 bits of the input.
1423
- * \internal
1424
- * \exception-guarantee no-throw guarantee
1425
- * \behavior reentrant, thread safe
1426
- * \endinternal
1427
- */
1428
- __CUDA_FP16_DECL__ __half2 __low2half2(const __half2 a);
1429
- /**
1430
- * \ingroup CUDA_MATH__HALF_MISC
1431
- * \brief Extracts high 16 bits from \p half2 input.
1432
- *
1433
- * \details Extracts high 16 bits from \p half2 input \p a and returns a new \p half2
1434
- * number which has both halves equal to the extracted bits.
1435
- * \param[in] a - half2. Is only being read.
1436
- *
1437
- * \returns half2
1438
- * - The half2 with both halves equal to the high 16 bits of the input.
1439
- * \internal
1440
- * \exception-guarantee no-throw guarantee
1441
- * \behavior reentrant, thread safe
1442
- * \endinternal
1443
- */
1444
- __CUDA_FP16_DECL__ __half2 __high2half2(const __half2 a);
1445
-
1446
- /**
1447
- * \ingroup CUDA_MATH__HALF_MISC
1448
- * \brief Reinterprets bits in a \p half as a signed short integer.
1449
- *
1450
- * \details Reinterprets the bits in the half-precision floating-point number \p h
1451
- * as a signed short integer.
1452
- * \param[in] h - half. Is only being read.
1453
- *
1454
- * \returns short int
1455
- * - The reinterpreted value.
1456
- * \internal
1457
- * \exception-guarantee no-throw guarantee
1458
- * \behavior reentrant, thread safe
1459
- * \endinternal
1460
- */
1461
- __CUDA_FP16_DECL__ short int __half_as_short(const __half h);
1462
- /**
1463
- * \ingroup CUDA_MATH__HALF_MISC
1464
- * \brief Reinterprets bits in a \p half as an unsigned short integer.
1465
- *
1466
- * \details Reinterprets the bits in the half-precision floating-point \p h
1467
- * as an unsigned short number.
1468
- * \param[in] h - half. Is only being read.
1469
- *
1470
- * \returns unsigned short int
1471
- * - The reinterpreted value.
1472
- * \internal
1473
- * \exception-guarantee no-throw guarantee
1474
- * \behavior reentrant, thread safe
1475
- * \endinternal
1476
- */
1477
- __CUDA_FP16_DECL__ unsigned short int __half_as_ushort(const __half h);
1478
- /**
1479
- * \ingroup CUDA_MATH__HALF_MISC
1480
- * \brief Reinterprets bits in a signed short integer as a \p half.
1481
- *
1482
- * \details Reinterprets the bits in the signed short integer \p i as a
1483
- * half-precision floating-point number.
1484
- * \param[in] i - short int. Is only being read.
1485
- *
1486
- * \returns half
1487
- * - The reinterpreted value.
1488
- * \internal
1489
- * \exception-guarantee no-throw guarantee
1490
- * \behavior reentrant, thread safe
1491
- * \endinternal
1492
- */
1493
- __CUDA_FP16_DECL__ __half __short_as_half(const short int i);
1494
- /**
1495
- * \ingroup CUDA_MATH__HALF_MISC
1496
- * \brief Reinterprets bits in an unsigned short integer as a \p half.
1497
- *
1498
- * \details Reinterprets the bits in the unsigned short integer \p i as a
1499
- * half-precision floating-point number.
1500
- * \param[in] i - unsigned short int. Is only being read.
1501
- *
1502
- * \returns half
1503
- * - The reinterpreted value.
1504
- * \internal
1505
- * \exception-guarantee no-throw guarantee
1506
- * \behavior reentrant, thread safe
1507
- * \endinternal
1508
- */
1509
- __CUDA_FP16_DECL__ __half __ushort_as_half(const unsigned short int i);
1510
- /**
1511
- * \ingroup CUDA_MATH__HALF_COMPARISON
1512
- * \brief Calculates \p half maximum of two input values.
1513
- *
1514
- * \details Calculates \p half max(\p a, \p b)
1515
- * defined as (\p a > \p b) ? \p a : \p b.
1516
- * - If either of inputs is NaN, the other input is returned.
1517
- * - If both inputs are NaNs, then canonical NaN is returned.
1518
- * - If values of both inputs are 0.0, then +0.0 > -0.0
1519
- * \param[in] a - half. Is only being read.
1520
- * \param[in] b - half. Is only being read.
1521
- *
1522
- * \returns half
1523
- * \internal
1524
- * \exception-guarantee no-throw guarantee
1525
- * \behavior reentrant, thread safe
1526
- * \endinternal
1527
- */
1528
- __CUDA_FP16_DECL__ __half __hmax(const __half a, const __half b);
1529
- /**
1530
- * \ingroup CUDA_MATH__HALF_COMPARISON
1531
- * \brief Calculates \p half minimum of two input values.
1532
- *
1533
- * \details Calculates \p half min(\p a, \p b)
1534
- * defined as (\p a < \p b) ? \p a : \p b.
1535
- * - If either of inputs is NaN, the other input is returned.
1536
- * - If both inputs are NaNs, then canonical NaN is returned.
1537
- * - If values of both inputs are 0.0, then +0.0 > -0.0
1538
- * \param[in] a - half. Is only being read.
1539
- * \param[in] b - half. Is only being read.
1540
- *
1541
- * \returns half
1542
- * \internal
1543
- * \exception-guarantee no-throw guarantee
1544
- * \behavior reentrant, thread safe
1545
- * \endinternal
1546
- */
1547
- __CUDA_FP16_DECL__ __half __hmin(const __half a, const __half b);
1548
- /**
1549
- * \ingroup CUDA_MATH__HALF2_COMPARISON
1550
- * \brief Calculates \p half2 vector maximum of two inputs.
1551
- *
1552
- * \details Calculates \p half2 vector max(\p a, \p b).
1553
- * Elementwise \p half operation is defined as
1554
- * (\p a > \p b) ? \p a : \p b.
1555
- * - If either of inputs is NaN, the other input is returned.
1556
- * - If both inputs are NaNs, then canonical NaN is returned.
1557
- * - If values of both inputs are 0.0, then +0.0 > -0.0
1558
- * \param[in] a - half2. Is only being read.
1559
- * \param[in] b - half2. Is only being read.
1560
- *
1561
- * \returns half2
1562
- * - The result of elementwise maximum of vectors \p a and \p b
1563
- * \internal
1564
- * \exception-guarantee no-throw guarantee
1565
- * \behavior reentrant, thread safe
1566
- * \endinternal
1567
- */
1568
- __CUDA_FP16_DECL__ __half2 __hmax2(const __half2 a, const __half2 b);
1569
- /**
1570
- * \ingroup CUDA_MATH__HALF2_COMPARISON
1571
- * \brief Calculates \p half2 vector minimum of two inputs.
1572
- *
1573
- * \details Calculates \p half2 vector min(\p a, \p b).
1574
- * Elementwise \p half operation is defined as
1575
- * (\p a < \p b) ? \p a : \p b.
1576
- * - If either of inputs is NaN, the other input is returned.
1577
- * - If both inputs are NaNs, then canonical NaN is returned.
1578
- * - If values of both inputs are 0.0, then +0.0 > -0.0
1579
- * \param[in] a - half2. Is only being read.
1580
- * \param[in] b - half2. Is only being read.
1581
- *
1582
- * \returns half2
1583
- * - The result of elementwise minimum of vectors \p a and \p b
1584
- * \internal
1585
- * \exception-guarantee no-throw guarantee
1586
- * \behavior reentrant, thread safe
1587
- * \endinternal
1588
- */
1589
- __CUDA_FP16_DECL__ __half2 __hmin2(const __half2 a, const __half2 b);
1590
-
1591
- #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 300)
1592
- #if !defined warpSize && !defined __local_warpSize
1593
- #define warpSize 32
1594
- #define __local_warpSize
1595
- #endif
1596
-
1597
- #if defined(_WIN32)
1598
- # define __DEPRECATED__(msg) __declspec(deprecated(msg))
1599
- #elif (defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 5 && !defined(__clang__))))
1600
- # define __DEPRECATED__(msg) __attribute__((deprecated))
1601
- #else
1602
- # define __DEPRECATED__(msg) __attribute__((deprecated(msg)))
1603
- #endif
1604
-
1605
- #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ < 700
1606
- #define __WSB_DEPRECATION_MESSAGE(x) __CUDA_FP16_STRINGIFY(x) "() is deprecated in favor of " __CUDA_FP16_STRINGIFY(x) "_sync() and may be removed in a future release (Use -Wno-deprecated-declarations to suppress this warning)."
1607
-
1608
- __CUDA_FP16_DECL__ __DEPRECATED__(__WSB_DEPRECATION_MESSAGE(__shfl)) __half2 __shfl(const __half2 var, const int delta, const int width = warpSize);
1609
- __CUDA_FP16_DECL__ __DEPRECATED__(__WSB_DEPRECATION_MESSAGE(__shfl_up)) __half2 __shfl_up(const __half2 var, const unsigned int delta, const int width = warpSize);
1610
- __CUDA_FP16_DECL__ __DEPRECATED__(__WSB_DEPRECATION_MESSAGE(__shfl_down))__half2 __shfl_down(const __half2 var, const unsigned int delta, const int width = warpSize);
1611
- __CUDA_FP16_DECL__ __DEPRECATED__(__WSB_DEPRECATION_MESSAGE(__shfl_xor)) __half2 __shfl_xor(const __half2 var, const int delta, const int width = warpSize);
1612
- __CUDA_FP16_DECL__ __DEPRECATED__(__WSB_DEPRECATION_MESSAGE(__shfl)) __half __shfl(const __half var, const int delta, const int width = warpSize);
1613
- __CUDA_FP16_DECL__ __DEPRECATED__(__WSB_DEPRECATION_MESSAGE(__shfl_up)) __half __shfl_up(const __half var, const unsigned int delta, const int width = warpSize);
1614
- __CUDA_FP16_DECL__ __DEPRECATED__(__WSB_DEPRECATION_MESSAGE(__shfl_down)) __half __shfl_down(const __half var, const unsigned int delta, const int width = warpSize);
1615
- __CUDA_FP16_DECL__ __DEPRECATED__(__WSB_DEPRECATION_MESSAGE(__shfl_xor)) __half __shfl_xor(const __half var, const int delta, const int width = warpSize);
1616
- #endif
1617
-
1618
- /**
1619
- * \ingroup CUDA_MATH__HALF_MISC
1620
- * \brief Exchange a variable between threads within a warp. Direct copy from indexed thread.
1621
- *
1622
- * \details Returns the value of var held by the thread whose ID is given by delta.
1623
- * If width is less than warpSize then each subsection of the warp behaves as a separate
1624
- * entity with a starting logical thread ID of 0. If delta is outside the range [0:width-1],
1625
- * the value returned corresponds to the value of var held by the delta modulo width (i.e.
1626
- * within the same subsection). width must have a value which is a power of 2;
1627
- * results are undefined if width is not a power of 2, or is a number greater than
1628
- * warpSize.
1629
- * \param[in] mask - unsigned int. Is only being read.
1630
- * \param[in] var - half2. Is only being read.
1631
- * \param[in] delta - int. Is only being read.
1632
- * \param[in] width - int. Is only being read.
1633
- *
1634
- * \returns Returns the 4-byte word referenced by var from the source thread ID as half2.
1635
- * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1636
- * \internal
1637
- * \exception-guarantee no-throw guarantee
1638
- * \behavior not reentrant, not thread safe
1639
- * \endinternal
1640
- */
1641
- __CUDA_FP16_DECL__ __half2 __shfl_sync(const unsigned mask, const __half2 var, const int delta, const int width = warpSize);
1642
- /**
1643
- * \ingroup CUDA_MATH__HALF_MISC
1644
- * \brief Exchange a variable between threads within a warp. Copy from a thread with lower ID relative to the caller.
1645
- *
1646
- * \details Calculates a source thread ID by subtracting delta from the caller's lane ID.
1647
- * The value of var held by the resulting lane ID is returned: in effect, var is shifted up
1648
- * the warp by delta threads. If width is less than warpSize then each subsection of the warp
1649
- * behaves as a separate entity with a starting logical thread ID of 0. The source thread index
1650
- * will not wrap around the value of width, so effectively the lower delta threads will be unchanged.
1651
- * width must have a value which is a power of 2; results are undefined if width is not a power of 2,
1652
- * or is a number greater than warpSize.
1653
- * \param[in] mask - unsigned int. Is only being read.
1654
- * \param[in] var - half2. Is only being read.
1655
- * \param[in] delta - int. Is only being read.
1656
- * \param[in] width - int. Is only being read.
1657
- *
1658
- * \returns Returns the 4-byte word referenced by var from the source thread ID as half2.
1659
- * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1660
- * \note_ref_guide_warp_shuffle
1661
- * \internal
1662
- * \exception-guarantee no-throw guarantee
1663
- * \behavior not reentrant, not thread safe
1664
- * \endinternal
1665
- */
1666
- __CUDA_FP16_DECL__ __half2 __shfl_up_sync(const unsigned mask, const __half2 var, const unsigned int delta, const int width = warpSize);
1667
- /**
1668
- * \ingroup CUDA_MATH__HALF_MISC
1669
- * \brief Exchange a variable between threads within a warp. Copy from a thread with higher ID relative to the caller.
1670
- *
1671
- * \details Calculates a source thread ID by adding delta to the caller's thread ID.
1672
- * The value of var held by the resulting thread ID is returned: this has the effect
1673
- * of shifting var down the warp by delta threads. If width is less than warpSize then
1674
- * each subsection of the warp behaves as a separate entity with a starting logical
1675
- * thread ID of 0. As for __shfl_up_sync(), the ID number of the source thread
1676
- * will not wrap around the value of width and so the upper delta threads
1677
- * will remain unchanged.
1678
- * \param[in] mask - unsigned int. Is only being read.
1679
- * \param[in] var - half2. Is only being read.
1680
- * \param[in] delta - int. Is only being read.
1681
- * \param[in] width - int. Is only being read.
1682
- *
1683
- * \returns Returns the 4-byte word referenced by var from the source thread ID as half2.
1684
- * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1685
- * \note_ref_guide_warp_shuffle
1686
- * \internal
1687
- * \exception-guarantee no-throw guarantee
1688
- * \behavior not reentrant, not thread safe
1689
- * \endinternal
1690
- */
1691
- __CUDA_FP16_DECL__ __half2 __shfl_down_sync(const unsigned mask, const __half2 var, const unsigned int delta, const int width = warpSize);
1692
- /**
1693
- * \ingroup CUDA_MATH__HALF_MISC
1694
- * \brief Exchange a variable between threads within a warp. Copy from a thread based on bitwise XOR of own thread ID.
1695
- *
1696
- * \details Calculates a source thread ID by performing a bitwise XOR of the caller's thread ID with mask:
1697
- * the value of var held by the resulting thread ID is returned. If width is less than warpSize then each
1698
- * group of width consecutive threads are able to access elements from earlier groups of threads,
1699
- * however if they attempt to access elements from later groups of threads their own value of var
1700
- * will be returned. This mode implements a butterfly addressing pattern such as is used in tree
1701
- * reduction and broadcast.
1702
- * \param[in] mask - unsigned int. Is only being read.
1703
- * \param[in] var - half2. Is only being read.
1704
- * \param[in] delta - int. Is only being read.
1705
- * \param[in] width - int. Is only being read.
1706
- *
1707
- * \returns Returns the 4-byte word referenced by var from the source thread ID as half2.
1708
- * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1709
- * \note_ref_guide_warp_shuffle
1710
- * \internal
1711
- * \exception-guarantee no-throw guarantee
1712
- * \behavior not reentrant, not thread safe
1713
- * \endinternal
1714
- */
1715
- __CUDA_FP16_DECL__ __half2 __shfl_xor_sync(const unsigned mask, const __half2 var, const int delta, const int width = warpSize);
1716
- /**
1717
- * \ingroup CUDA_MATH__HALF_MISC
1718
- * \brief Exchange a variable between threads within a warp. Direct copy from indexed thread.
1719
- *
1720
- * \details Returns the value of var held by the thread whose ID is given by delta.
1721
- * If width is less than warpSize then each subsection of the warp behaves as a separate
1722
- * entity with a starting logical thread ID of 0. If delta is outside the range [0:width-1],
1723
- * the value returned corresponds to the value of var held by the delta modulo width (i.e.
1724
- * within the same subsection). width must have a value which is a power of 2;
1725
- * results are undefined if width is not a power of 2, or is a number greater than
1726
- * warpSize.
1727
- * \param[in] mask - unsigned int. Is only being read.
1728
- * \param[in] var - half. Is only being read.
1729
- * \param[in] delta - int. Is only being read.
1730
- * \param[in] width - int. Is only being read.
1731
- *
1732
- * \returns Returns the 2-byte word referenced by var from the source thread ID as half.
1733
- * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1734
- * \note_ref_guide_warp_shuffle
1735
- * \internal
1736
- * \exception-guarantee no-throw guarantee
1737
- * \behavior not reentrant, not thread safe
1738
- * \endinternal
1739
- */
1740
- __CUDA_FP16_DECL__ __half __shfl_sync(const unsigned mask, const __half var, const int delta, const int width = warpSize);
1741
- /**
1742
- * \ingroup CUDA_MATH__HALF_MISC
1743
- * \brief Exchange a variable between threads within a warp. Copy from a thread with lower ID relative to the caller.
1744
- * \details Calculates a source thread ID by subtracting delta from the caller's lane ID.
1745
- * The value of var held by the resulting lane ID is returned: in effect, var is shifted up
1746
- * the warp by delta threads. If width is less than warpSize then each subsection of the warp
1747
- * behaves as a separate entity with a starting logical thread ID of 0. The source thread index
1748
- * will not wrap around the value of width, so effectively the lower delta threads will be unchanged.
1749
- * width must have a value which is a power of 2; results are undefined if width is not a power of 2,
1750
- * or is a number greater than warpSize.
1751
- * \param[in] mask - unsigned int. Is only being read.
1752
- * \param[in] var - half. Is only being read.
1753
- * \param[in] delta - int. Is only being read.
1754
- * \param[in] width - int. Is only being read.
1755
- *
1756
- * \returns Returns the 2-byte word referenced by var from the source thread ID as half.
1757
- * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1758
- * \note_ref_guide_warp_shuffle
1759
- * \internal
1760
- * \exception-guarantee no-throw guarantee
1761
- * \behavior not reentrant, not thread safe
1762
- * \endinternal
1763
- */
1764
- __CUDA_FP16_DECL__ __half __shfl_up_sync(const unsigned mask, const __half var, const unsigned int delta, const int width = warpSize);
1765
- /**
1766
- * \ingroup CUDA_MATH__HALF_MISC
1767
- * \brief Exchange a variable between threads within a warp. Copy from a thread with higher ID relative to the caller.
1768
- *
1769
- * \details Calculates a source thread ID by adding delta to the caller's thread ID.
1770
- * The value of var held by the resulting thread ID is returned: this has the effect
1771
- * of shifting var down the warp by delta threads. If width is less than warpSize then
1772
- * each subsection of the warp behaves as a separate entity with a starting logical
1773
- * thread ID of 0. As for __shfl_up_sync(), the ID number of the source thread
1774
- * will not wrap around the value of width and so the upper delta threads
1775
- * will remain unchanged.
1776
- * \param[in] mask - unsigned int. Is only being read.
1777
- * \param[in] var - half. Is only being read.
1778
- * \param[in] delta - int. Is only being read.
1779
- * \param[in] width - int. Is only being read.
1780
- *
1781
- * \returns Returns the 2-byte word referenced by var from the source thread ID as half.
1782
- * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1783
- * \note_ref_guide_warp_shuffle
1784
- * \internal
1785
- * \exception-guarantee no-throw guarantee
1786
- * \behavior not reentrant, not thread safe
1787
- * \endinternal
1788
- */
1789
- __CUDA_FP16_DECL__ __half __shfl_down_sync(const unsigned mask, const __half var, const unsigned int delta, const int width = warpSize);
1790
- /**
1791
- * \ingroup CUDA_MATH__HALF_MISC
1792
- * \brief Exchange a variable between threads within a warp. Copy from a thread based on bitwise XOR of own thread ID.
1793
- *
1794
- * \details Calculates a source thread ID by performing a bitwise XOR of the caller's thread ID with mask:
1795
- * the value of var held by the resulting thread ID is returned. If width is less than warpSize then each
1796
- * group of width consecutive threads are able to access elements from earlier groups of threads,
1797
- * however if they attempt to access elements from later groups of threads their own value of var
1798
- * will be returned. This mode implements a butterfly addressing pattern such as is used in tree
1799
- * reduction and broadcast.
1800
- * \param[in] mask - unsigned int. Is only being read.
1801
- * \param[in] var - half. Is only being read.
1802
- * \param[in] delta - int. Is only being read.
1803
- * \param[in] width - int. Is only being read.
1804
- *
1805
- * \returns Returns the 2-byte word referenced by var from the source thread ID as half.
1806
- * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1807
- * \note_ref_guide_warp_shuffle
1808
- * \internal
1809
- * \exception-guarantee no-throw guarantee
1810
- * \behavior not reentrant, not thread safe
1811
- * \endinternal
1812
- */
1813
- __CUDA_FP16_DECL__ __half __shfl_xor_sync(const unsigned mask, const __half var, const int delta, const int width = warpSize);
1814
-
1815
- #if defined(__local_warpSize)
1816
- #undef warpSize
1817
- #undef __local_warpSize
1818
- #endif
1819
- #endif /*!defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 300) */
1820
-
1821
- #if defined(__cplusplus) && ( !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 320) )
1822
- /**
1823
- * \ingroup CUDA_MATH__HALF_MISC
1824
- * \brief Generates a `ld.global.nc` load instruction.
1825
- * \param[in] ptr - memory location
1826
- * \returns The value pointed by `ptr`
1827
- */
1828
- __CUDA_FP16_DECL__ __half2 __ldg(const __half2 *const ptr);
1829
- /**
1830
- * \ingroup CUDA_MATH__HALF_MISC
1831
- * \brief Generates a `ld.global.nc` load instruction.
1832
- * \param[in] ptr - memory location
1833
- * \returns The value pointed by `ptr`
1834
- */
1835
- __CUDA_FP16_DECL__ __half __ldg(const __half *const ptr);
1836
- /**
1837
- * \ingroup CUDA_MATH__HALF_MISC
1838
- * \brief Generates a `ld.global.cg` load instruction.
1839
- * \param[in] ptr - memory location
1840
- * \returns The value pointed by `ptr`
1841
- */
1842
- __CUDA_FP16_DECL__ __half2 __ldcg(const __half2 *const ptr);
1843
- /**
1844
- * \ingroup CUDA_MATH__HALF_MISC
1845
- * \brief Generates a `ld.global.cg` load instruction.
1846
- * \param[in] ptr - memory location
1847
- * \returns The value pointed by `ptr`
1848
- */
1849
- __CUDA_FP16_DECL__ __half __ldcg(const __half *const ptr);
1850
- /**
1851
- * \ingroup CUDA_MATH__HALF_MISC
1852
- * \brief Generates a `ld.global.ca` load instruction.
1853
- * \param[in] ptr - memory location
1854
- * \returns The value pointed by `ptr`
1855
- */
1856
- __CUDA_FP16_DECL__ __half2 __ldca(const __half2 *const ptr);
1857
- /**
1858
- * \ingroup CUDA_MATH__HALF_MISC
1859
- * \brief Generates a `ld.global.ca` load instruction.
1860
- * \param[in] ptr - memory location
1861
- * \returns The value pointed by `ptr`
1862
- */
1863
- __CUDA_FP16_DECL__ __half __ldca(const __half *const ptr);
1864
- /**
1865
- * \ingroup CUDA_MATH__HALF_MISC
1866
- * \brief Generates a `ld.global.cs` load instruction.
1867
- * \param[in] ptr - memory location
1868
- * \returns The value pointed by `ptr`
1869
- */
1870
- __CUDA_FP16_DECL__ __half2 __ldcs(const __half2 *const ptr);
1871
- /**
1872
- * \ingroup CUDA_MATH__HALF_MISC
1873
- * \brief Generates a `ld.global.cs` load instruction.
1874
- * \param[in] ptr - memory location
1875
- * \returns The value pointed by `ptr`
1876
- */
1877
- __CUDA_FP16_DECL__ __half __ldcs(const __half *const ptr);
1878
- /**
1879
- * \ingroup CUDA_MATH__HALF_MISC
1880
- * \brief Generates a `ld.global.lu` load instruction.
1881
- * \param[in] ptr - memory location
1882
- * \returns The value pointed by `ptr`
1883
- */
1884
- __CUDA_FP16_DECL__ __half2 __ldlu(const __half2 *const ptr);
1885
- /**
1886
- * \ingroup CUDA_MATH__HALF_MISC
1887
- * \brief Generates a `ld.global.lu` load instruction.
1888
- * \param[in] ptr - memory location
1889
- * \returns The value pointed by `ptr`
1890
- */
1891
- __CUDA_FP16_DECL__ __half __ldlu(const __half *const ptr);
1892
- /**
1893
- * \ingroup CUDA_MATH__HALF_MISC
1894
- * \brief Generates a `ld.global.cv` load instruction.
1895
- * \param[in] ptr - memory location
1896
- * \returns The value pointed by `ptr`
1897
- */
1898
- __CUDA_FP16_DECL__ __half2 __ldcv(const __half2 *const ptr);
1899
- /**
1900
- * \ingroup CUDA_MATH__HALF_MISC
1901
- * \brief Generates a `ld.global.cv` load instruction.
1902
- * \param[in] ptr - memory location
1903
- * \returns The value pointed by `ptr`
1904
- */
1905
- __CUDA_FP16_DECL__ __half __ldcv(const __half *const ptr);
1906
- /**
1907
- * \ingroup CUDA_MATH__HALF_MISC
1908
- * \brief Generates a `st.global.wb` store instruction.
1909
- * \param[out] ptr - memory location
1910
- * \param[in] value - the value to be stored
1911
- */
1912
- __CUDA_FP16_DECL__ void __stwb(__half2 *const ptr, const __half2 value);
1913
- /**
1914
- * \ingroup CUDA_MATH__HALF_MISC
1915
- * \brief Generates a `st.global.wb` store instruction.
1916
- * \param[out] ptr - memory location
1917
- * \param[in] value - the value to be stored
1918
- */
1919
- __CUDA_FP16_DECL__ void __stwb(__half *const ptr, const __half value);
1920
- /**
1921
- * \ingroup CUDA_MATH__HALF_MISC
1922
- * \brief Generates a `st.global.cg` store instruction.
1923
- * \param[out] ptr - memory location
1924
- * \param[in] value - the value to be stored
1925
- */
1926
- __CUDA_FP16_DECL__ void __stcg(__half2 *const ptr, const __half2 value);
1927
- /**
1928
- * \ingroup CUDA_MATH__HALF_MISC
1929
- * \brief Generates a `st.global.cg` store instruction.
1930
- * \param[out] ptr - memory location
1931
- * \param[in] value - the value to be stored
1932
- */
1933
- __CUDA_FP16_DECL__ void __stcg(__half *const ptr, const __half value);
1934
- /**
1935
- * \ingroup CUDA_MATH__HALF_MISC
1936
- * \brief Generates a `st.global.cs` store instruction.
1937
- * \param[out] ptr - memory location
1938
- * \param[in] value - the value to be stored
1939
- */
1940
- __CUDA_FP16_DECL__ void __stcs(__half2 *const ptr, const __half2 value);
1941
- /**
1942
- * \ingroup CUDA_MATH__HALF_MISC
1943
- * \brief Generates a `st.global.cs` store instruction.
1944
- * \param[out] ptr - memory location
1945
- * \param[in] value - the value to be stored
1946
- */
1947
- __CUDA_FP16_DECL__ void __stcs(__half *const ptr, const __half value);
1948
- /**
1949
- * \ingroup CUDA_MATH__HALF_MISC
1950
- * \brief Generates a `st.global.wt` store instruction.
1951
- * \param[out] ptr - memory location
1952
- * \param[in] value - the value to be stored
1953
- */
1954
- __CUDA_FP16_DECL__ void __stwt(__half2 *const ptr, const __half2 value);
1955
- /**
1956
- * \ingroup CUDA_MATH__HALF_MISC
1957
- * \brief Generates a `st.global.wt` store instruction.
1958
- * \param[out] ptr - memory location
1959
- * \param[in] value - the value to be stored
1960
- */
1961
- __CUDA_FP16_DECL__ void __stwt(__half *const ptr, const __half value);
1962
- #endif /*defined(__cplusplus) && ( !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 320) )*/
1963
-
1964
- #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
1965
- /**
1966
- * \ingroup CUDA_MATH__HALF2_COMPARISON
1967
- * \brief Performs half2 vector if-equal comparison.
1968
- *
1969
- * \details Performs \p half2 vector if-equal comparison of inputs \p a and \p b.
1970
- * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
1971
- * NaN inputs generate false results.
1972
- * \param[in] a - half2. Is only being read.
1973
- * \param[in] b - half2. Is only being read.
1974
- *
1975
- * \returns half2
1976
- * - The vector result of if-equal comparison of vectors \p a and \p b.
1977
- * \internal
1978
- * \exception-guarantee no-throw guarantee
1979
- * \behavior reentrant, thread safe
1980
- * \endinternal
1981
- */
1982
- __CUDA_FP16_DECL__ __half2 __heq2(const __half2 a, const __half2 b);
1983
- /**
1984
- * \ingroup CUDA_MATH__HALF2_COMPARISON
1985
- * \brief Performs \p half2 vector not-equal comparison.
1986
- *
1987
- * \details Performs \p half2 vector not-equal comparison of inputs \p a and \p b.
1988
- * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
1989
- * NaN inputs generate false results.
1990
- * \param[in] a - half2. Is only being read.
1991
- * \param[in] b - half2. Is only being read.
1992
- *
1993
- * \returns half2
1994
- * - The vector result of not-equal comparison of vectors \p a and \p b.
1995
- * \internal
1996
- * \exception-guarantee no-throw guarantee
1997
- * \behavior reentrant, thread safe
1998
- * \endinternal
1999
- */
2000
- __CUDA_FP16_DECL__ __half2 __hne2(const __half2 a, const __half2 b);
2001
- /**
2002
- * \ingroup CUDA_MATH__HALF2_COMPARISON
2003
- * \brief Performs \p half2 vector less-equal comparison.
2004
- *
2005
- * \details Performs \p half2 vector less-equal comparison of inputs \p a and \p b.
2006
- * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
2007
- * NaN inputs generate false results.
2008
- * \param[in] a - half2. Is only being read.
2009
- * \param[in] b - half2. Is only being read.
2010
- *
2011
- * \returns half2
2012
- * - The \p half2 result of less-equal comparison of vectors \p a and \p b.
2013
- * \internal
2014
- * \exception-guarantee no-throw guarantee
2015
- * \behavior reentrant, thread safe
2016
- * \endinternal
2017
- */
2018
- __CUDA_FP16_DECL__ __half2 __hle2(const __half2 a, const __half2 b);
2019
- /**
2020
- * \ingroup CUDA_MATH__HALF2_COMPARISON
2021
- * \brief Performs \p half2 vector greater-equal comparison.
2022
- *
2023
- * \details Performs \p half2 vector greater-equal comparison of inputs \p a and \p b.
2024
- * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
2025
- * NaN inputs generate false results.
2026
- * \param[in] a - half2. Is only being read.
2027
- * \param[in] b - half2. Is only being read.
2028
- *
2029
- * \returns half2
2030
- * - The vector result of greater-equal comparison of vectors \p a and \p b.
2031
- * \internal
2032
- * \exception-guarantee no-throw guarantee
2033
- * \behavior reentrant, thread safe
2034
- * \endinternal
2035
- */
2036
- __CUDA_FP16_DECL__ __half2 __hge2(const __half2 a, const __half2 b);
2037
- /**
2038
- * \ingroup CUDA_MATH__HALF2_COMPARISON
2039
- * \brief Performs \p half2 vector less-than comparison.
2040
- *
2041
- * \details Performs \p half2 vector less-than comparison of inputs \p a and \p b.
2042
- * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
2043
- * NaN inputs generate false results.
2044
- * \param[in] a - half2. Is only being read.
2045
- * \param[in] b - half2. Is only being read.
2046
- *
2047
- * \returns half2
2048
- * - The half2 vector result of less-than comparison of vectors \p a and \p b.
2049
- * \internal
2050
- * \exception-guarantee no-throw guarantee
2051
- * \behavior reentrant, thread safe
2052
- * \endinternal
2053
- */
2054
- __CUDA_FP16_DECL__ __half2 __hlt2(const __half2 a, const __half2 b);
2055
- /**
2056
- * \ingroup CUDA_MATH__HALF2_COMPARISON
2057
- * \brief Performs \p half2 vector greater-than comparison.
2058
- *
2059
- * \details Performs \p half2 vector greater-than comparison of inputs \p a and \p b.
2060
- * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
2061
- * NaN inputs generate false results.
2062
- * \param[in] a - half2. Is only being read.
2063
- * \param[in] b - half2. Is only being read.
2064
- *
2065
- * \returns half2
2066
- * - The vector result of greater-than comparison of vectors \p a and \p b.
2067
- * \internal
2068
- * \exception-guarantee no-throw guarantee
2069
- * \behavior reentrant, thread safe
2070
- * \endinternal
2071
- */
2072
- __CUDA_FP16_DECL__ __half2 __hgt2(const __half2 a, const __half2 b);
2073
- /**
2074
- * \ingroup CUDA_MATH__HALF2_COMPARISON
2075
- * \brief Performs \p half2 vector unordered if-equal comparison.
2076
- *
2077
- * \details Performs \p half2 vector if-equal comparison of inputs \p a and \p b.
2078
- * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
2079
- * NaN inputs generate true results.
2080
- * \param[in] a - half2. Is only being read.
2081
- * \param[in] b - half2. Is only being read.
2082
- *
2083
- * \returns half2
2084
- * - The vector result of unordered if-equal comparison of vectors \p a and \p b.
2085
- * \internal
2086
- * \exception-guarantee no-throw guarantee
2087
- * \behavior reentrant, thread safe
2088
- * \endinternal
2089
- */
2090
- __CUDA_FP16_DECL__ __half2 __hequ2(const __half2 a, const __half2 b);
2091
- /**
2092
- * \ingroup CUDA_MATH__HALF2_COMPARISON
2093
- * \brief Performs \p half2 vector unordered not-equal comparison.
2094
- *
2095
- * \details Performs \p half2 vector not-equal comparison of inputs \p a and \p b.
2096
- * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
2097
- * NaN inputs generate true results.
2098
- * \param[in] a - half2. Is only being read.
2099
- * \param[in] b - half2. Is only being read.
2100
- *
2101
- * \returns half2
2102
- * - The vector result of unordered not-equal comparison of vectors \p a and \p b.
2103
- * \internal
2104
- * \exception-guarantee no-throw guarantee
2105
- * \behavior reentrant, thread safe
2106
- * \endinternal
2107
- */
2108
- __CUDA_FP16_DECL__ __half2 __hneu2(const __half2 a, const __half2 b);
2109
- /**
2110
- * \ingroup CUDA_MATH__HALF2_COMPARISON
2111
- * \brief Performs \p half2 vector unordered less-equal comparison.
2112
- *
2113
- * Performs \p half2 vector less-equal comparison of inputs \p a and \p b.
2114
- * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
2115
- * NaN inputs generate true results.
2116
- * \param[in] a - half2. Is only being read.
2117
- * \param[in] b - half2. Is only being read.
2118
- *
2119
- * \returns half2
2120
- * - The vector result of unordered less-equal comparison of vectors \p a and \p b.
2121
- * \internal
2122
- * \exception-guarantee no-throw guarantee
2123
- * \behavior reentrant, thread safe
2124
- * \endinternal
2125
- */
2126
- __CUDA_FP16_DECL__ __half2 __hleu2(const __half2 a, const __half2 b);
2127
- /**
2128
- * \ingroup CUDA_MATH__HALF2_COMPARISON
2129
- * \brief Performs \p half2 vector unordered greater-equal comparison.
2130
- *
2131
- * \details Performs \p half2 vector greater-equal comparison of inputs \p a and \p b.
2132
- * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
2133
- * NaN inputs generate true results.
2134
- * \param[in] a - half2. Is only being read.
2135
- * \param[in] b - half2. Is only being read.
2136
- *
2137
- * \returns half2
2138
- * - The \p half2 vector result of unordered greater-equal comparison of vectors \p a and \p b.
2139
- * \internal
2140
- * \exception-guarantee no-throw guarantee
2141
- * \behavior reentrant, thread safe
2142
- * \endinternal
2143
- */
2144
- __CUDA_FP16_DECL__ __half2 __hgeu2(const __half2 a, const __half2 b);
2145
- /**
2146
- * \ingroup CUDA_MATH__HALF2_COMPARISON
2147
- * \brief Performs \p half2 vector unordered less-than comparison.
2148
- *
2149
- * \details Performs \p half2 vector less-than comparison of inputs \p a and \p b.
2150
- * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
2151
- * NaN inputs generate true results.
2152
- * \param[in] a - half2. Is only being read.
2153
- * \param[in] b - half2. Is only being read.
2154
- *
2155
- * \returns half2
2156
- * - The vector result of unordered less-than comparison of vectors \p a and \p b.
2157
- * \internal
2158
- * \exception-guarantee no-throw guarantee
2159
- * \behavior reentrant, thread safe
2160
- * \endinternal
2161
- */
2162
- __CUDA_FP16_DECL__ __half2 __hltu2(const __half2 a, const __half2 b);
2163
- /**
2164
- * \ingroup CUDA_MATH__HALF2_COMPARISON
2165
- * \brief Performs \p half2 vector unordered greater-than comparison.
2166
- *
2167
- * \details Performs \p half2 vector greater-than comparison of inputs \p a and \p b.
2168
- * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
2169
- * NaN inputs generate true results.
2170
- * \param[in] a - half2. Is only being read.
2171
- * \param[in] b - half2. Is only being read.
2172
- *
2173
- * \returns half2
2174
- * - The \p half2 vector result of unordered greater-than comparison of vectors \p a and \p b.
2175
- * \internal
2176
- * \exception-guarantee no-throw guarantee
2177
- * \behavior reentrant, thread safe
2178
- * \endinternal
2179
- */
2180
- __CUDA_FP16_DECL__ __half2 __hgtu2(const __half2 a, const __half2 b);
2181
- /**
2182
- * \ingroup CUDA_MATH__HALF2_COMPARISON
2183
- * \brief Determine whether \p half2 argument is a NaN.
2184
- *
2185
- * \details Determine whether each half of input \p half2 number \p a is a NaN.
2186
- * \param[in] a - half2. Is only being read.
2187
- *
2188
- * \returns half2
2189
- * - The half2 with the corresponding \p half results set to
2190
- * 1.0 for NaN, 0.0 otherwise.
2191
- * \internal
2192
- * \exception-guarantee no-throw guarantee
2193
- * \behavior reentrant, thread safe
2194
- * \endinternal
2195
- */
2196
- __CUDA_FP16_DECL__ __half2 __hisnan2(const __half2 a);
2197
- /**
2198
- * \ingroup CUDA_MATH__HALF2_ARITHMETIC
2199
- * \brief Performs \p half2 vector addition in round-to-nearest-even mode.
2200
- *
2201
- * \details Performs \p half2 vector add of inputs \p a and \p b, in round-to-nearest
2202
- * mode.
2203
- * \internal
2204
- * \req DEEPLEARN-SRM_REQ-95
2205
- * \endinternal
2206
- * \param[in] a - half2. Is only being read.
2207
- * \param[in] b - half2. Is only being read.
2208
- *
2209
- * \returns half2
2210
- * - The sum of vectors \p a and \p b.
2211
- * \internal
2212
- * \exception-guarantee no-throw guarantee
2213
- * \behavior reentrant, thread safe
2214
- * \endinternal
2215
- */
2216
- __CUDA_FP16_DECL__ __half2 __hadd2(const __half2 a, const __half2 b);
2217
- /**
2218
- * \ingroup CUDA_MATH__HALF2_ARITHMETIC
2219
- * \brief Performs \p half2 vector subtraction in round-to-nearest-even mode.
2220
- *
2221
- * \details Subtracts \p half2 input vector \p b from input vector \p a in
2222
- * round-to-nearest-even mode.
2223
- * \internal
2224
- * \req DEEPLEARN-SRM_REQ-104
2225
- * \endinternal
2226
- * \param[in] a - half2. Is only being read.
2227
- * \param[in] b - half2. Is only being read.
2228
- *
2229
- * \returns half2
2230
- * - The subtraction of vector \p b from \p a.
2231
- * \internal
2232
- * \exception-guarantee no-throw guarantee
2233
- * \behavior reentrant, thread safe
2234
- * \endinternal
2235
- */
2236
- __CUDA_FP16_DECL__ __half2 __hsub2(const __half2 a, const __half2 b);
2237
- /**
2238
- * \ingroup CUDA_MATH__HALF2_ARITHMETIC
2239
- * \brief Performs \p half2 vector multiplication in round-to-nearest-even mode.
2240
- *
2241
- * \details Performs \p half2 vector multiplication of inputs \p a and \p b, in
2242
- * round-to-nearest-even mode.
2243
- * \internal
2244
- * \req DEEPLEARN-SRM_REQ-102
2245
- * \endinternal
2246
- * \param[in] a - half2. Is only being read.
2247
- * \param[in] b - half2. Is only being read.
2248
- *
2249
- * \returns half2
2250
- * - The result of elementwise multiplying the vectors \p a and \p b.
2251
- * \internal
2252
- * \exception-guarantee no-throw guarantee
2253
- * \behavior reentrant, thread safe
2254
- * \endinternal
2255
- */
2256
- __CUDA_FP16_DECL__ __half2 __hmul2(const __half2 a, const __half2 b);
2257
- /**
2258
- * \ingroup CUDA_MATH__HALF2_ARITHMETIC
2259
- * \brief Performs \p half2 vector addition in round-to-nearest-even mode.
2260
- *
2261
- * \details Performs \p half2 vector add of inputs \p a and \p b, in round-to-nearest
2262
- * mode. Prevents floating-point contractions of mul+add into fma.
2263
- * \internal
2264
- * \req DEEPLEARN-SRM_REQ-95
2265
- * \endinternal
2266
- * \param[in] a - half2. Is only being read.
2267
- * \param[in] b - half2. Is only being read.
2268
- *
2269
- * \returns half2
2270
- * - The sum of vectors \p a and \p b.
2271
- * \internal
2272
- * \exception-guarantee no-throw guarantee
2273
- * \behavior reentrant, thread safe
2274
- * \endinternal
2275
- */
2276
- __CUDA_FP16_DECL__ __half2 __hadd2_rn(const __half2 a, const __half2 b);
2277
- /**
2278
- * \ingroup CUDA_MATH__HALF2_ARITHMETIC
2279
- * \brief Performs \p half2 vector subtraction in round-to-nearest-even mode.
2280
- *
2281
- * \details Subtracts \p half2 input vector \p b from input vector \p a in
2282
- * round-to-nearest-even mode. Prevents floating-point contractions of mul+sub
2283
- * into fma.
2284
- * \internal
2285
- * \req DEEPLEARN-SRM_REQ-104
2286
- * \endinternal
2287
- * \param[in] a - half2. Is only being read.
2288
- * \param[in] b - half2. Is only being read.
2289
- *
2290
- * \returns half2
2291
- * - The subtraction of vector \p b from \p a.
2292
- * \internal
2293
- * \exception-guarantee no-throw guarantee
2294
- * \behavior reentrant, thread safe
2295
- * \endinternal
2296
- */
2297
- __CUDA_FP16_DECL__ __half2 __hsub2_rn(const __half2 a, const __half2 b);
2298
- /**
2299
- * \ingroup CUDA_MATH__HALF2_ARITHMETIC
2300
- * \brief Performs \p half2 vector multiplication in round-to-nearest-even mode.
2301
- *
2302
- * \details Performs \p half2 vector multiplication of inputs \p a and \p b, in
2303
- * round-to-nearest-even mode. Prevents floating-point contractions of
2304
- * mul+add or sub into fma.
2305
- * \internal
2306
- * \req DEEPLEARN-SRM_REQ-102
2307
- * \endinternal
2308
- * \param[in] a - half2. Is only being read.
2309
- * \param[in] b - half2. Is only being read.
2310
- *
2311
- * \returns half2
2312
- * - The result of elementwise multiplying the vectors \p a and \p b.
2313
- * \internal
2314
- * \exception-guarantee no-throw guarantee
2315
- * \behavior reentrant, thread safe
2316
- * \endinternal
2317
- */
2318
- __CUDA_FP16_DECL__ __half2 __hmul2_rn(const __half2 a, const __half2 b);
2319
- /**
2320
- * \ingroup CUDA_MATH__HALF2_ARITHMETIC
2321
- * \brief Performs \p half2 vector division in round-to-nearest-even mode.
2322
- *
2323
- * \details Divides \p half2 input vector \p a by input vector \p b in round-to-nearest
2324
- * mode.
2325
- * \internal
2326
- * \req DEEPLEARN-SRM_REQ-103
2327
- * \endinternal
2328
- * \param[in] a - half2. Is only being read.
2329
- * \param[in] b - half2. Is only being read.
2330
- *
2331
- * \returns half2
2332
- * - The elementwise division of \p a with \p b.
2333
- * \internal
2334
- * \exception-guarantee no-throw guarantee
2335
- * \behavior reentrant, thread safe
2336
- * \endinternal
2337
- */
2338
- __CUDA_FP16_DECL__ __half2 __h2div(const __half2 a, const __half2 b);
2339
- /**
2340
- * \ingroup CUDA_MATH__HALF2_ARITHMETIC
2341
- * \brief Calculates the absolute value of both halves of the input \p half2 number and
2342
- * returns the result.
2343
- *
2344
- * \details Calculates the absolute value of both halves of the input \p half2 number and
2345
- * returns the result.
2346
- * \param[in] a - half2. Is only being read.
2347
- *
2348
- * \returns half2
2349
- * - Returns \p a with the absolute value of both halves.
2350
- * \internal
2351
- * \exception-guarantee no-throw guarantee
2352
- * \behavior reentrant, thread safe
2353
- * \endinternal
2354
- */
2355
- __CUDA_FP16_DECL__ __half2 __habs2(const __half2 a);
2356
- /**
2357
- * \ingroup CUDA_MATH__HALF2_ARITHMETIC
2358
- * \brief Performs \p half2 vector addition in round-to-nearest-even mode, with
2359
- * saturation to [0.0, 1.0].
2360
- *
2361
- * \details Performs \p half2 vector add of inputs \p a and \p b, in round-to-nearest
2362
- * mode, and clamps the results to range [0.0, 1.0]. NaN results are flushed to
2363
- * +0.0.
2364
- * \param[in] a - half2. Is only being read.
2365
- * \param[in] b - half2. Is only being read.
2366
- *
2367
- * \returns half2
2368
- * - The sum of \p a and \p b, with respect to saturation.
2369
- * \internal
2370
- * \exception-guarantee no-throw guarantee
2371
- * \behavior reentrant, thread safe
2372
- * \endinternal
2373
- */
2374
- __CUDA_FP16_DECL__ __half2 __hadd2_sat(const __half2 a, const __half2 b);
2375
- /**
2376
- * \ingroup CUDA_MATH__HALF2_ARITHMETIC
2377
- * \brief Performs \p half2 vector subtraction in round-to-nearest-even mode,
2378
- * with saturation to [0.0, 1.0].
2379
- *
2380
- * \details Subtracts \p half2 input vector \p b from input vector \p a in
2381
- * round-to-nearest-even mode, and clamps the results to range [0.0, 1.0]. NaN
2382
- * results are flushed to +0.0.
2383
- * \param[in] a - half2. Is only being read.
2384
- * \param[in] b - half2. Is only being read.
2385
- *
2386
- * \returns half2
2387
- * - The subtraction of vector \p b from \p a, with respect to saturation.
2388
- * \internal
2389
- * \exception-guarantee no-throw guarantee
2390
- * \behavior reentrant, thread safe
2391
- * \endinternal
2392
- */
2393
- __CUDA_FP16_DECL__ __half2 __hsub2_sat(const __half2 a, const __half2 b);
2394
- /**
2395
- * \ingroup CUDA_MATH__HALF2_ARITHMETIC
2396
- * \brief Performs \p half2 vector multiplication in round-to-nearest-even mode,
2397
- * with saturation to [0.0, 1.0].
2398
- *
2399
- * \details Performs \p half2 vector multiplication of inputs \p a and \p b, in
2400
- * round-to-nearest-even mode, and clamps the results to range [0.0, 1.0]. NaN
2401
- * results are flushed to +0.0.
2402
- * \param[in] a - half2. Is only being read.
2403
- * \param[in] b - half2. Is only being read.
2404
- *
2405
- * \returns half2
2406
- * - The result of elementwise multiplication of vectors \p a and \p b,
2407
- * with respect to saturation.
2408
- * \internal
2409
- * \exception-guarantee no-throw guarantee
2410
- * \behavior reentrant, thread safe
2411
- * \endinternal
2412
- */
2413
- __CUDA_FP16_DECL__ __half2 __hmul2_sat(const __half2 a, const __half2 b);
2414
- /**
2415
- * \ingroup CUDA_MATH__HALF2_ARITHMETIC
2416
- * \brief Performs \p half2 vector fused multiply-add in round-to-nearest-even
2417
- * mode.
2418
- *
2419
- * \details Performs \p half2 vector multiply on inputs \p a and \p b,
2420
- * then performs a \p half2 vector add of the result with \p c,
2421
- * rounding the result once in round-to-nearest-even mode.
2422
- * \internal
2423
- * \req DEEPLEARN-SRM_REQ-105
2424
- * \endinternal
2425
- * \param[in] a - half2. Is only being read.
2426
- * \param[in] b - half2. Is only being read.
2427
- * \param[in] c - half2. Is only being read.
2428
- *
2429
- * \returns half2
2430
- * - The result of elementwise fused multiply-add operation on vectors \p a, \p b, and \p c.
2431
- * \internal
2432
- * \exception-guarantee no-throw guarantee
2433
- * \behavior reentrant, thread safe
2434
- * \endinternal
2435
- */
2436
- __CUDA_FP16_DECL__ __half2 __hfma2(const __half2 a, const __half2 b, const __half2 c);
2437
- /**
2438
- * \ingroup CUDA_MATH__HALF2_ARITHMETIC
2439
- * \brief Performs \p half2 vector fused multiply-add in round-to-nearest-even
2440
- * mode, with saturation to [0.0, 1.0].
2441
- *
2442
- * \details Performs \p half2 vector multiply on inputs \p a and \p b,
2443
- * then performs a \p half2 vector add of the result with \p c,
2444
- * rounding the result once in round-to-nearest-even mode, and clamps the
2445
- * results to range [0.0, 1.0]. NaN results are flushed to +0.0.
2446
- * \param[in] a - half2. Is only being read.
2447
- * \param[in] b - half2. Is only being read.
2448
- * \param[in] c - half2. Is only being read.
2449
- *
2450
- * \returns half2
2451
- * - The result of elementwise fused multiply-add operation on vectors \p a, \p b, and \p c,
2452
- * with respect to saturation.
2453
- * \internal
2454
- * \exception-guarantee no-throw guarantee
2455
- * \behavior reentrant, thread safe
2456
- * \endinternal
2457
- */
2458
- __CUDA_FP16_DECL__ __half2 __hfma2_sat(const __half2 a, const __half2 b, const __half2 c);
2459
- /**
2460
- * \ingroup CUDA_MATH__HALF2_ARITHMETIC
2461
- * \brief Negates both halves of the input \p half2 number and returns the
2462
- * result.
2463
- *
2464
- * \details Negates both halves of the input \p half2 number \p a and returns the result.
2465
- * \internal
2466
- * \req DEEPLEARN-SRM_REQ-101
2467
- * \endinternal
2468
- * \param[in] a - half2. Is only being read.
2469
- *
2470
- * \returns half2
2471
- * - Returns \p a with both halves negated.
2472
- * \internal
2473
- * \exception-guarantee no-throw guarantee
2474
- * \behavior reentrant, thread safe
2475
- * \endinternal
2476
- */
2477
- __CUDA_FP16_DECL__ __half2 __hneg2(const __half2 a);
2478
- /**
2479
- * \ingroup CUDA_MATH__HALF_ARITHMETIC
2480
- * \brief Calculates the absolute value of input \p half number and returns the result.
2481
- *
2482
- * \details Calculates the absolute value of input \p half number and returns the result.
2483
- * \param[in] a - half. Is only being read.
2484
- *
2485
- * \returns half
2486
- * - The absolute value of \p a.
2487
- * \internal
2488
- * \exception-guarantee no-throw guarantee
2489
- * \behavior reentrant, thread safe
2490
- * \endinternal
2491
- */
2492
- __CUDA_FP16_DECL__ __half __habs(const __half a);
2493
- /**
2494
- * \ingroup CUDA_MATH__HALF_ARITHMETIC
2495
- * \brief Performs \p half addition in round-to-nearest-even mode.
2496
- *
2497
- * \details Performs \p half addition of inputs \p a and \p b, in round-to-nearest-even
2498
- * mode.
2499
- * \internal
2500
- * \req DEEPLEARN-SRM_REQ-94
2501
- * \endinternal
2502
- * \param[in] a - half. Is only being read.
2503
- * \param[in] b - half. Is only being read.
2504
- *
2505
- * \returns half
2506
- * - The sum of \p a and \p b.
2507
- * \internal
2508
- * \exception-guarantee no-throw guarantee
2509
- * \behavior reentrant, thread safe
2510
- * \endinternal
2511
- */
2512
- __CUDA_FP16_DECL__ __half __hadd(const __half a, const __half b);
2513
- /**
2514
- * \ingroup CUDA_MATH__HALF_ARITHMETIC
2515
- * \brief Performs \p half subtraction in round-to-nearest-even mode.
2516
- *
2517
- * \details Subtracts \p half input \p b from input \p a in round-to-nearest
2518
- * mode.
2519
- * \internal
2520
- * \req DEEPLEARN-SRM_REQ-97
2521
- * \endinternal
2522
- * \param[in] a - half. Is only being read.
2523
- * \param[in] b - half. Is only being read.
2524
- *
2525
- * \returns half
2526
- * - The result of subtracting \p b from \p a.
2527
- * \internal
2528
- * \exception-guarantee no-throw guarantee
2529
- * \behavior reentrant, thread safe
2530
- * \endinternal
2531
- */
2532
- __CUDA_FP16_DECL__ __half __hsub(const __half a, const __half b);
2533
- /**
2534
- * \ingroup CUDA_MATH__HALF_ARITHMETIC
2535
- * \brief Performs \p half multiplication in round-to-nearest-even mode.
2536
- *
2537
- * \details Performs \p half multiplication of inputs \p a and \p b, in round-to-nearest
2538
- * mode.
2539
- * \internal
2540
- * \req DEEPLEARN-SRM_REQ-99
2541
- * \endinternal
2542
- * \param[in] a - half. Is only being read.
2543
- * \param[in] b - half. Is only being read.
2544
- *
2545
- * \returns half
2546
- * - The result of multiplying \p a and \p b.
2547
- */
2548
- __CUDA_FP16_DECL__ __half __hmul(const __half a, const __half b);
2549
- /**
2550
- * \ingroup CUDA_MATH__HALF_ARITHMETIC
2551
- * \brief Performs \p half addition in round-to-nearest-even mode.
2552
- *
2553
- * \details Performs \p half addition of inputs \p a and \p b, in round-to-nearest-even
2554
- * mode. Prevents floating-point contractions of mul+add into fma.
2555
- * \internal
2556
- * \req DEEPLEARN-SRM_REQ-94
2557
- * \endinternal
2558
- * \param[in] a - half. Is only being read.
2559
- * \param[in] b - half. Is only being read.
2560
- *
2561
- * \returns half
2562
- * - The sum of \p a and \p b.
2563
- * \internal
2564
- * \exception-guarantee no-throw guarantee
2565
- * \behavior reentrant, thread safe
2566
- * \endinternal
2567
- */
2568
- __CUDA_FP16_DECL__ __half __hadd_rn(const __half a, const __half b);
2569
- /**
2570
- * \ingroup CUDA_MATH__HALF_ARITHMETIC
2571
- * \brief Performs \p half subtraction in round-to-nearest-even mode.
2572
- *
2573
- * \details Subtracts \p half input \p b from input \p a in round-to-nearest
2574
- * mode. Prevents floating-point contractions of mul+sub into fma.
2575
- * \internal
2576
- * \req DEEPLEARN-SRM_REQ-97
2577
- * \endinternal
2578
- * \param[in] a - half. Is only being read.
2579
- * \param[in] b - half. Is only being read.
2580
- *
2581
- * \returns half
2582
- * - The result of subtracting \p b from \p a.
2583
- * \internal
2584
- * \exception-guarantee no-throw guarantee
2585
- * \behavior reentrant, thread safe
2586
- * \endinternal
2587
- */
2588
- __CUDA_FP16_DECL__ __half __hsub_rn(const __half a, const __half b);
2589
- /**
2590
- * \ingroup CUDA_MATH__HALF_ARITHMETIC
2591
- * \brief Performs \p half multiplication in round-to-nearest-even mode.
2592
- *
2593
- * \details Performs \p half multiplication of inputs \p a and \p b, in round-to-nearest
2594
- * mode. Prevents floating-point contractions of mul+add or sub into fma.
2595
- * \internal
2596
- * \req DEEPLEARN-SRM_REQ-99
2597
- * \endinternal
2598
- * \param[in] a - half. Is only being read.
2599
- * \param[in] b - half. Is only being read.
2600
- *
2601
- * \returns half
2602
- * - The result of multiplying \p a and \p b.
2603
- */
2604
- __CUDA_FP16_DECL__ __half __hmul_rn(const __half a, const __half b);
2605
- /**
2606
- * \ingroup CUDA_MATH__HALF_ARITHMETIC
2607
- * \brief Performs \p half division in round-to-nearest-even mode.
2608
- *
2609
- * \details Divides \p half input \p a by input \p b in round-to-nearest
2610
- * mode.
2611
- * \internal
2612
- * \req DEEPLEARN-SRM_REQ-98
2613
- * \endinternal
2614
- * \param[in] a - half. Is only being read.
2615
- * \param[in] b - half. Is only being read.
2616
- *
2617
- * \returns half
2618
- * - The result of dividing \p a by \p b.
2619
- * \internal
2620
- * \exception-guarantee no-throw guarantee
2621
- * \behavior reentrant, thread safe
2622
- * \endinternal
2623
- */
2624
- __CUDA_FP16_DECL__ __half __hdiv(const __half a, const __half b);
2625
- /**
2626
- * \ingroup CUDA_MATH__HALF_ARITHMETIC
2627
- * \brief Performs \p half addition in round-to-nearest-even mode, with
2628
- * saturation to [0.0, 1.0].
2629
- *
2630
- * \details Performs \p half add of inputs \p a and \p b, in round-to-nearest-even mode,
2631
- * and clamps the result to range [0.0, 1.0]. NaN results are flushed to +0.0.
2632
- * \param[in] a - half. Is only being read.
2633
- * \param[in] b - half. Is only being read.
2634
- *
2635
- * \returns half
2636
- * - The sum of \p a and \p b, with respect to saturation.
2637
- * \internal
2638
- * \exception-guarantee no-throw guarantee
2639
- * \behavior reentrant, thread safe
2640
- * \endinternal
2641
- */
2642
- __CUDA_FP16_DECL__ __half __hadd_sat(const __half a, const __half b);
2643
- /**
2644
- * \ingroup CUDA_MATH__HALF_ARITHMETIC
2645
- * \brief Performs \p half subtraction in round-to-nearest-even mode, with
2646
- * saturation to [0.0, 1.0].
2647
- *
2648
- * \details Subtracts \p half input \p b from input \p a in round-to-nearest
2649
- * mode,
2650
- * and clamps the result to range [0.0, 1.0]. NaN results are flushed to +0.0.
2651
- * \param[in] a - half. Is only being read.
2652
- * \param[in] b - half. Is only being read.
2653
- *
2654
- * \returns half
2655
- * - The result of subtraction of \p b from \p a, with respect to saturation.
2656
- * \internal
2657
- * \exception-guarantee no-throw guarantee
2658
- * \behavior reentrant, thread safe
2659
- * \endinternal
2660
- */
2661
- __CUDA_FP16_DECL__ __half __hsub_sat(const __half a, const __half b);
2662
- /**
2663
- * \ingroup CUDA_MATH__HALF_ARITHMETIC
2664
- * \brief Performs \p half multiplication in round-to-nearest-even mode, with
2665
- * saturation to [0.0, 1.0].
2666
- *
2667
- * \details Performs \p half multiplication of inputs \p a and \p b, in round-to-nearest
2668
- * mode, and clamps the result to range [0.0, 1.0]. NaN results are flushed to
2669
- * +0.0.
2670
- * \param[in] a - half. Is only being read.
2671
- * \param[in] b - half. Is only being read.
2672
- *
2673
- * \returns half
2674
- * - The result of multiplying \p a and \p b, with respect to saturation.
2675
- * \internal
2676
- * \exception-guarantee no-throw guarantee
2677
- * \behavior reentrant, thread safe
2678
- * \endinternal
2679
- */
2680
- __CUDA_FP16_DECL__ __half __hmul_sat(const __half a, const __half b);
2681
- /**
2682
- * \ingroup CUDA_MATH__HALF_ARITHMETIC
2683
- * \brief Performs \p half fused multiply-add in round-to-nearest-even mode.
2684
- *
2685
- * \details Performs \p half multiply on inputs \p a and \p b,
2686
- * then performs a \p half add of the result with \p c,
2687
- * rounding the result once in round-to-nearest-even mode.
2688
- * \internal
2689
- * \req DEEPLEARN-SRM_REQ-96
2690
- * \endinternal
2691
- * \param[in] a - half. Is only being read.
2692
- * \param[in] b - half. Is only being read.
2693
- * \param[in] c - half. Is only being read.
2694
- *
2695
- * \returns half
2696
- * - The result of fused multiply-add operation on \p
2697
- * a, \p b, and \p c.
2698
- * \internal
2699
- * \exception-guarantee no-throw guarantee
2700
- * \behavior reentrant, thread safe
2701
- * \endinternal
2702
- */
2703
- __CUDA_FP16_DECL__ __half __hfma(const __half a, const __half b, const __half c);
2704
- /**
2705
- * \ingroup CUDA_MATH__HALF_ARITHMETIC
2706
- * \brief Performs \p half fused multiply-add in round-to-nearest-even mode,
2707
- * with saturation to [0.0, 1.0].
2708
- *
2709
- * \details Performs \p half multiply on inputs \p a and \p b,
2710
- * then performs a \p half add of the result with \p c,
2711
- * rounding the result once in round-to-nearest-even mode, and clamps the result
2712
- * to range [0.0, 1.0]. NaN results are flushed to +0.0.
2713
- * \param[in] a - half. Is only being read.
2714
- * \param[in] b - half. Is only being read.
2715
- * \param[in] c - half. Is only being read.
2716
- *
2717
- * \returns half
2718
- * - The result of fused multiply-add operation on \p
2719
- * a, \p b, and \p c, with respect to saturation.
2720
- * \internal
2721
- * \exception-guarantee no-throw guarantee
2722
- * \behavior reentrant, thread safe
2723
- * \endinternal
2724
- */
2725
- __CUDA_FP16_DECL__ __half __hfma_sat(const __half a, const __half b, const __half c);
2726
- /**
2727
- * \ingroup CUDA_MATH__HALF_ARITHMETIC
2728
- * \brief Negates input \p half number and returns the result.
2729
- *
2730
- * \details Negates input \p half number and returns the result.
2731
- * \internal
2732
- * \req DEEPLEARN-SRM_REQ-100
2733
- * \endinternal
2734
- * \param[in] a - half. Is only being read.
2735
- *
2736
- * \returns half
2737
- * - minus a
2738
- * \internal
2739
- * \exception-guarantee no-throw guarantee
2740
- * \behavior reentrant, thread safe
2741
- * \endinternal
2742
- */
2743
- __CUDA_FP16_DECL__ __half __hneg(const __half a);
2744
- /**
2745
- * \ingroup CUDA_MATH__HALF2_COMPARISON
2746
- * \brief Performs \p half2 vector if-equal comparison and returns boolean true
2747
- * iff both \p half results are true, boolean false otherwise.
2748
- *
2749
- * \details Performs \p half2 vector if-equal comparison of inputs \p a and \p b.
2750
- * The bool result is set to true only if both \p half if-equal comparisons
2751
- * evaluate to true, or false otherwise.
2752
- * NaN inputs generate false results.
2753
- * \param[in] a - half2. Is only being read.
2754
- * \param[in] b - half2. Is only being read.
2755
- *
2756
- * \returns bool
2757
- * - true if both \p half results of if-equal comparison
2758
- * of vectors \p a and \p b are true;
2759
- * - false otherwise.
2760
- * \internal
2761
- * \exception-guarantee no-throw guarantee
2762
- * \behavior reentrant, thread safe
2763
- * \endinternal
2764
- */
2765
- __CUDA_FP16_DECL__ bool __hbeq2(const __half2 a, const __half2 b);
2766
- /**
2767
- * \ingroup CUDA_MATH__HALF2_COMPARISON
2768
- * \brief Performs \p half2 vector not-equal comparison and returns boolean
2769
- * true iff both \p half results are true, boolean false otherwise.
2770
- *
2771
- * \details Performs \p half2 vector not-equal comparison of inputs \p a and \p b.
2772
- * The bool result is set to true only if both \p half not-equal comparisons
2773
- * evaluate to true, or false otherwise.
2774
- * NaN inputs generate false results.
2775
- * \param[in] a - half2. Is only being read.
2776
- * \param[in] b - half2. Is only being read.
2777
- *
2778
- * \returns bool
2779
- * - true if both \p half results of not-equal comparison
2780
- * of vectors \p a and \p b are true,
2781
- * - false otherwise.
2782
- * \internal
2783
- * \exception-guarantee no-throw guarantee
2784
- * \behavior reentrant, thread safe
2785
- * \endinternal
2786
- */
2787
- __CUDA_FP16_DECL__ bool __hbne2(const __half2 a, const __half2 b);
2788
- /**
2789
- * \ingroup CUDA_MATH__HALF2_COMPARISON
2790
- * \brief Performs \p half2 vector less-equal comparison and returns boolean
2791
- * true iff both \p half results are true, boolean false otherwise.
2792
- *
2793
- * \details Performs \p half2 vector less-equal comparison of inputs \p a and \p b.
2794
- * The bool result is set to true only if both \p half less-equal comparisons
2795
- * evaluate to true, or false otherwise.
2796
- * NaN inputs generate false results.
2797
- * \param[in] a - half2. Is only being read.
2798
- * \param[in] b - half2. Is only being read.
2799
- *
2800
- * \returns bool
2801
- * - true if both \p half results of less-equal comparison
2802
- * of vectors \p a and \p b are true;
2803
- * - false otherwise.
2804
- * \internal
2805
- * \exception-guarantee no-throw guarantee
2806
- * \behavior reentrant, thread safe
2807
- * \endinternal
2808
- */
2809
- __CUDA_FP16_DECL__ bool __hble2(const __half2 a, const __half2 b);
2810
- /**
2811
- * \ingroup CUDA_MATH__HALF2_COMPARISON
2812
- * \brief Performs \p half2 vector greater-equal comparison and returns boolean
2813
- * true iff both \p half results are true, boolean false otherwise.
2814
- *
2815
- * \details Performs \p half2 vector greater-equal comparison of inputs \p a and \p b.
2816
- * The bool result is set to true only if both \p half greater-equal comparisons
2817
- * evaluate to true, or false otherwise.
2818
- * NaN inputs generate false results.
2819
- * \param[in] a - half2. Is only being read.
2820
- * \param[in] b - half2. Is only being read.
2821
- *
2822
- * \returns bool
2823
- * - true if both \p half results of greater-equal
2824
- * comparison of vectors \p a and \p b are true;
2825
- * - false otherwise.
2826
- * \internal
2827
- * \exception-guarantee no-throw guarantee
2828
- * \behavior reentrant, thread safe
2829
- * \endinternal
2830
- */
2831
- __CUDA_FP16_DECL__ bool __hbge2(const __half2 a, const __half2 b);
2832
- /**
2833
- * \ingroup CUDA_MATH__HALF2_COMPARISON
2834
- * \brief Performs \p half2 vector less-than comparison and returns boolean
2835
- * true iff both \p half results are true, boolean false otherwise.
2836
- *
2837
- * \details Performs \p half2 vector less-than comparison of inputs \p a and \p b.
2838
- * The bool result is set to true only if both \p half less-than comparisons
2839
- * evaluate to true, or false otherwise.
2840
- * NaN inputs generate false results.
2841
- * \param[in] a - half2. Is only being read.
2842
- * \param[in] b - half2. Is only being read.
2843
- *
2844
- * \returns bool
2845
- * - true if both \p half results of less-than comparison
2846
- * of vectors \p a and \p b are true;
2847
- * - false otherwise.
2848
- * \internal
2849
- * \exception-guarantee no-throw guarantee
2850
- * \behavior reentrant, thread safe
2851
- * \endinternal
2852
- */
2853
- __CUDA_FP16_DECL__ bool __hblt2(const __half2 a, const __half2 b);
2854
- /**
2855
- * \ingroup CUDA_MATH__HALF2_COMPARISON
2856
- * \brief Performs \p half2 vector greater-than comparison and returns boolean
2857
- * true iff both \p half results are true, boolean false otherwise.
2858
- *
2859
- * \details Performs \p half2 vector greater-than comparison of inputs \p a and \p b.
2860
- * The bool result is set to true only if both \p half greater-than comparisons
2861
- * evaluate to true, or false otherwise.
2862
- * NaN inputs generate false results.
2863
- * \param[in] a - half2. Is only being read.
2864
- * \param[in] b - half2. Is only being read.
2865
- *
2866
- * \returns bool
2867
- * - true if both \p half results of greater-than
2868
- * comparison of vectors \p a and \p b are true;
2869
- * - false otherwise.
2870
- * \internal
2871
- * \exception-guarantee no-throw guarantee
2872
- * \behavior reentrant, thread safe
2873
- * \endinternal
2874
- */
2875
- __CUDA_FP16_DECL__ bool __hbgt2(const __half2 a, const __half2 b);
2876
- /**
2877
- * \ingroup CUDA_MATH__HALF2_COMPARISON
2878
- * \brief Performs \p half2 vector unordered if-equal comparison and returns
2879
- * boolean true iff both \p half results are true, boolean false otherwise.
2880
- *
2881
- * \details Performs \p half2 vector if-equal comparison of inputs \p a and \p b.
2882
- * The bool result is set to true only if both \p half if-equal comparisons
2883
- * evaluate to true, or false otherwise.
2884
- * NaN inputs generate true results.
2885
- * \param[in] a - half2. Is only being read.
2886
- * \param[in] b - half2. Is only being read.
2887
- *
2888
- * \returns bool
2889
- * - true if both \p half results of unordered if-equal
2890
- * comparison of vectors \p a and \p b are true;
2891
- * - false otherwise.
2892
- * \internal
2893
- * \exception-guarantee no-throw guarantee
2894
- * \behavior reentrant, thread safe
2895
- * \endinternal
2896
- */
2897
- __CUDA_FP16_DECL__ bool __hbequ2(const __half2 a, const __half2 b);
2898
- /**
2899
- * \ingroup CUDA_MATH__HALF2_COMPARISON
2900
- * \brief Performs \p half2 vector unordered not-equal comparison and returns
2901
- * boolean true iff both \p half results are true, boolean false otherwise.
2902
- *
2903
- * \details Performs \p half2 vector not-equal comparison of inputs \p a and \p b.
2904
- * The bool result is set to true only if both \p half not-equal comparisons
2905
- * evaluate to true, or false otherwise.
2906
- * NaN inputs generate true results.
2907
- * \param[in] a - half2. Is only being read.
2908
- * \param[in] b - half2. Is only being read.
2909
- *
2910
- * \returns bool
2911
- * - true if both \p half results of unordered not-equal
2912
- * comparison of vectors \p a and \p b are true;
2913
- * - false otherwise.
2914
- * \internal
2915
- * \exception-guarantee no-throw guarantee
2916
- * \behavior reentrant, thread safe
2917
- * \endinternal
2918
- */
2919
- __CUDA_FP16_DECL__ bool __hbneu2(const __half2 a, const __half2 b);
2920
- /**
2921
- * \ingroup CUDA_MATH__HALF2_COMPARISON
2922
- * \brief Performs \p half2 vector unordered less-equal comparison and returns
2923
- * boolean true iff both \p half results are true, boolean false otherwise.
2924
- *
2925
- * \details Performs \p half2 vector less-equal comparison of inputs \p a and \p b.
2926
- * The bool result is set to true only if both \p half less-equal comparisons
2927
- * evaluate to true, or false otherwise.
2928
- * NaN inputs generate true results.
2929
- * \param[in] a - half2. Is only being read.
2930
- * \param[in] b - half2. Is only being read.
2931
- *
2932
- * \returns bool
2933
- * - true if both \p half results of unordered less-equal
2934
- * comparison of vectors \p a and \p b are true;
2935
- * - false otherwise.
2936
- * \internal
2937
- * \exception-guarantee no-throw guarantee
2938
- * \behavior reentrant, thread safe
2939
- * \endinternal
2940
- */
2941
- __CUDA_FP16_DECL__ bool __hbleu2(const __half2 a, const __half2 b);
2942
- /**
2943
- * \ingroup CUDA_MATH__HALF2_COMPARISON
2944
- * \brief Performs \p half2 vector unordered greater-equal comparison and
2945
- * returns boolean true iff both \p half results are true, boolean false
2946
- * otherwise.
2947
- *
2948
- * \details Performs \p half2 vector greater-equal comparison of inputs \p a and \p b.
2949
- * The bool result is set to true only if both \p half greater-equal comparisons
2950
- * evaluate to true, or false otherwise.
2951
- * NaN inputs generate true results.
2952
- * \param[in] a - half2. Is only being read.
2953
- * \param[in] b - half2. Is only being read.
2954
- *
2955
- * \returns bool
2956
- * - true if both \p half results of unordered
2957
- * greater-equal comparison of vectors \p a and \p b are true;
2958
- * - false otherwise.
2959
- * \internal
2960
- * \exception-guarantee no-throw guarantee
2961
- * \behavior reentrant, thread safe
2962
- * \endinternal
2963
- */
2964
- __CUDA_FP16_DECL__ bool __hbgeu2(const __half2 a, const __half2 b);
2965
- /**
2966
- * \ingroup CUDA_MATH__HALF2_COMPARISON
2967
- * \brief Performs \p half2 vector unordered less-than comparison and returns
2968
- * boolean true iff both \p half results are true, boolean false otherwise.
2969
- *
2970
- * \details Performs \p half2 vector less-than comparison of inputs \p a and \p b.
2971
- * The bool result is set to true only if both \p half less-than comparisons
2972
- * evaluate to true, or false otherwise.
2973
- * NaN inputs generate true results.
2974
- * \param[in] a - half2. Is only being read.
2975
- * \param[in] b - half2. Is only being read.
2976
- *
2977
- * \returns bool
2978
- * - true if both \p half results of unordered less-than comparison of
2979
- * vectors \p a and \p b are true;
2980
- * - false otherwise.
2981
- * \internal
2982
- * \exception-guarantee no-throw guarantee
2983
- * \behavior reentrant, thread safe
2984
- * \endinternal
2985
- */
2986
- __CUDA_FP16_DECL__ bool __hbltu2(const __half2 a, const __half2 b);
2987
- /**
2988
- * \ingroup CUDA_MATH__HALF2_COMPARISON
2989
- * \brief Performs \p half2 vector unordered greater-than comparison and
2990
- * returns boolean true iff both \p half results are true, boolean false
2991
- * otherwise.
2992
- *
2993
- * \details Performs \p half2 vector greater-than comparison of inputs \p a and \p b.
2994
- * The bool result is set to true only if both \p half greater-than comparisons
2995
- * evaluate to true, or false otherwise.
2996
- * NaN inputs generate true results.
2997
- * \param[in] a - half2. Is only being read.
2998
- * \param[in] b - half2. Is only being read.
2999
- *
3000
- * \returns bool
3001
- * - true if both \p half results of unordered
3002
- * greater-than comparison of vectors \p a and \p b are true;
3003
- * - false otherwise.
3004
- * \internal
3005
- * \exception-guarantee no-throw guarantee
3006
- * \behavior reentrant, thread safe
3007
- * \endinternal
3008
- */
3009
- __CUDA_FP16_DECL__ bool __hbgtu2(const __half2 a, const __half2 b);
3010
- /**
3011
- * \ingroup CUDA_MATH__HALF_COMPARISON
3012
- * \brief Performs \p half if-equal comparison.
3013
- *
3014
- * \details Performs \p half if-equal comparison of inputs \p a and \p b.
3015
- * NaN inputs generate false results.
3016
- * \param[in] a - half. Is only being read.
3017
- * \param[in] b - half. Is only being read.
3018
- *
3019
- * \returns bool
3020
- * - The boolean result of if-equal comparison of \p a and \p b.
3021
- * \internal
3022
- * \exception-guarantee no-throw guarantee
3023
- * \behavior reentrant, thread safe
3024
- * \endinternal
3025
- */
3026
- __CUDA_FP16_DECL__ bool __heq(const __half a, const __half b);
3027
- /**
3028
- * \ingroup CUDA_MATH__HALF_COMPARISON
3029
- * \brief Performs \p half not-equal comparison.
3030
- *
3031
- * \details Performs \p half not-equal comparison of inputs \p a and \p b.
3032
- * NaN inputs generate false results.
3033
- * \param[in] a - half. Is only being read.
3034
- * \param[in] b - half. Is only being read.
3035
- *
3036
- * \returns bool
3037
- * - The boolean result of not-equal comparison of \p a and \p b.
3038
- * \internal
3039
- * \exception-guarantee no-throw guarantee
3040
- * \behavior reentrant, thread safe
3041
- * \endinternal
3042
- */
3043
- __CUDA_FP16_DECL__ bool __hne(const __half a, const __half b);
3044
- /**
3045
- * \ingroup CUDA_MATH__HALF_COMPARISON
3046
- * \brief Performs \p half less-equal comparison.
3047
- *
3048
- * \details Performs \p half less-equal comparison of inputs \p a and \p b.
3049
- * NaN inputs generate false results.
3050
- * \param[in] a - half. Is only being read.
3051
- * \param[in] b - half. Is only being read.
3052
- *
3053
- * \returns bool
3054
- * - The boolean result of less-equal comparison of \p a and \p b.
3055
- * \internal
3056
- * \exception-guarantee no-throw guarantee
3057
- * \behavior reentrant, thread safe
3058
- * \endinternal
3059
- */
3060
- __CUDA_FP16_DECL__ bool __hle(const __half a, const __half b);
3061
- /**
3062
- * \ingroup CUDA_MATH__HALF_COMPARISON
3063
- * \brief Performs \p half greater-equal comparison.
3064
- *
3065
- * \details Performs \p half greater-equal comparison of inputs \p a and \p b.
3066
- * NaN inputs generate false results.
3067
- * \param[in] a - half. Is only being read.
3068
- * \param[in] b - half. Is only being read.
3069
- *
3070
- * \returns bool
3071
- * - The boolean result of greater-equal comparison of \p a and \p b.
3072
- * \internal
3073
- * \exception-guarantee no-throw guarantee
3074
- * \behavior reentrant, thread safe
3075
- * \endinternal
3076
- */
3077
- __CUDA_FP16_DECL__ bool __hge(const __half a, const __half b);
3078
- /**
3079
- * \ingroup CUDA_MATH__HALF_COMPARISON
3080
- * \brief Performs \p half less-than comparison.
3081
- *
3082
- * \details Performs \p half less-than comparison of inputs \p a and \p b.
3083
- * NaN inputs generate false results.
3084
- * \param[in] a - half. Is only being read.
3085
- * \param[in] b - half. Is only being read.
3086
- *
3087
- * \returns bool
3088
- * - The boolean result of less-than comparison of \p a and \p b.
3089
- * \internal
3090
- * \exception-guarantee no-throw guarantee
3091
- * \behavior reentrant, thread safe
3092
- * \endinternal
3093
- */
3094
- __CUDA_FP16_DECL__ bool __hlt(const __half a, const __half b);
3095
- /**
3096
- * \ingroup CUDA_MATH__HALF_COMPARISON
3097
- * \brief Performs \p half greater-than comparison.
3098
- *
3099
- * \details Performs \p half greater-than comparison of inputs \p a and \p b.
3100
- * NaN inputs generate false results.
3101
- * \param[in] a - half. Is only being read.
3102
- * \param[in] b - half. Is only being read.
3103
- *
3104
- * \returns bool
3105
- * - The boolean result of greater-than comparison of \p a and \p b.
3106
- * \internal
3107
- * \exception-guarantee no-throw guarantee
3108
- * \behavior reentrant, thread safe
3109
- * \endinternal
3110
- */
3111
- __CUDA_FP16_DECL__ bool __hgt(const __half a, const __half b);
3112
- /**
3113
- * \ingroup CUDA_MATH__HALF_COMPARISON
3114
- * \brief Performs \p half unordered if-equal comparison.
3115
- *
3116
- * \details Performs \p half if-equal comparison of inputs \p a and \p b.
3117
- * NaN inputs generate true results.
3118
- * \param[in] a - half. Is only being read.
3119
- * \param[in] b - half. Is only being read.
3120
- *
3121
- * \returns bool
3122
- * - The boolean result of unordered if-equal comparison of \p a and
3123
- * \p b.
3124
- * \internal
3125
- * \exception-guarantee no-throw guarantee
3126
- * \behavior reentrant, thread safe
3127
- * \endinternal
3128
- */
3129
- __CUDA_FP16_DECL__ bool __hequ(const __half a, const __half b);
3130
- /**
3131
- * \ingroup CUDA_MATH__HALF_COMPARISON
3132
- * \brief Performs \p half unordered not-equal comparison.
3133
- *
3134
- * \details Performs \p half not-equal comparison of inputs \p a and \p b.
3135
- * NaN inputs generate true results.
3136
- * \param[in] a - half. Is only being read.
3137
- * \param[in] b - half. Is only being read.
3138
- *
3139
- * \returns bool
3140
- * - The boolean result of unordered not-equal comparison of \p a and
3141
- * \p b.
3142
- * \internal
3143
- * \exception-guarantee no-throw guarantee
3144
- * \behavior reentrant, thread safe
3145
- * \endinternal
3146
- */
3147
- __CUDA_FP16_DECL__ bool __hneu(const __half a, const __half b);
3148
- /**
3149
- * \ingroup CUDA_MATH__HALF_COMPARISON
3150
- * \brief Performs \p half unordered less-equal comparison.
3151
- *
3152
- * \details Performs \p half less-equal comparison of inputs \p a and \p b.
3153
- * NaN inputs generate true results.
3154
- * \param[in] a - half. Is only being read.
3155
- * \param[in] b - half. Is only being read.
3156
- *
3157
- * \returns bool
3158
- * - The boolean result of unordered less-equal comparison of \p a and
3159
- * \p b.
3160
- * \internal
3161
- * \exception-guarantee no-throw guarantee
3162
- * \behavior reentrant, thread safe
3163
- * \endinternal
3164
- */
3165
- __CUDA_FP16_DECL__ bool __hleu(const __half a, const __half b);
3166
- /**
3167
- * \ingroup CUDA_MATH__HALF_COMPARISON
3168
- * \brief Performs \p half unordered greater-equal comparison.
3169
- *
3170
- * \details Performs \p half greater-equal comparison of inputs \p a and \p b.
3171
- * NaN inputs generate true results.
3172
- * \param[in] a - half. Is only being read.
3173
- * \param[in] b - half. Is only being read.
3174
- *
3175
- * \returns bool
3176
- * - The boolean result of unordered greater-equal comparison of \p a
3177
- * and \p b.
3178
- * \internal
3179
- * \exception-guarantee no-throw guarantee
3180
- * \behavior reentrant, thread safe
3181
- * \endinternal
3182
- */
3183
- __CUDA_FP16_DECL__ bool __hgeu(const __half a, const __half b);
3184
- /**
3185
- * \ingroup CUDA_MATH__HALF_COMPARISON
3186
- * \brief Performs \p half unordered less-than comparison.
3187
- *
3188
- * \details Performs \p half less-than comparison of inputs \p a and \p b.
3189
- * NaN inputs generate true results.
3190
- * \param[in] a - half. Is only being read.
3191
- * \param[in] b - half. Is only being read.
3192
- *
3193
- * \returns bool
3194
- * - The boolean result of unordered less-than comparison of \p a and
3195
- * \p b.
3196
- * \internal
3197
- * \exception-guarantee no-throw guarantee
3198
- * \behavior reentrant, thread safe
3199
- * \endinternal
3200
- */
3201
- __CUDA_FP16_DECL__ bool __hltu(const __half a, const __half b);
3202
- /**
3203
- * \ingroup CUDA_MATH__HALF_COMPARISON
3204
- * \brief Performs \p half unordered greater-than comparison.
3205
- *
3206
- * \details Performs \p half greater-than comparison of inputs \p a and \p b.
3207
- * NaN inputs generate true results.
3208
- * \param[in] a - half. Is only being read.
3209
- * \param[in] b - half. Is only being read.
3210
- *
3211
- * \returns bool
3212
- * - The boolean result of unordered greater-than comparison of \p a
3213
- * and \p b.
3214
- * \internal
3215
- * \exception-guarantee no-throw guarantee
3216
- * \behavior reentrant, thread safe
3217
- * \endinternal
3218
- */
3219
- __CUDA_FP16_DECL__ bool __hgtu(const __half a, const __half b);
3220
- /**
3221
- * \ingroup CUDA_MATH__HALF_COMPARISON
3222
- * \brief Determine whether \p half argument is a NaN.
3223
- *
3224
- * \details Determine whether \p half value \p a is a NaN.
3225
- * \param[in] a - half. Is only being read.
3226
- *
3227
- * \returns bool
3228
- * - true iff argument is NaN.
3229
- * \internal
3230
- * \exception-guarantee no-throw guarantee
3231
- * \behavior reentrant, thread safe
3232
- * \endinternal
3233
- */
3234
- __CUDA_FP16_DECL__ bool __hisnan(const __half a);
3235
- #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 800)
3236
- /**
3237
- * \ingroup CUDA_MATH__HALF_COMPARISON
3238
- * \brief Calculates \p half maximum of two input values, NaNs pass through.
3239
- *
3240
- * \details Calculates \p half max(\p a, \p b)
3241
- * defined as (\p a > \p b) ? \p a : \p b.
3242
- * - If either of inputs is NaN, then canonical NaN is returned.
3243
- * - If values of both inputs are 0.0, then +0.0 > -0.0
3244
- * \param[in] a - half. Is only being read.
3245
- * \param[in] b - half. Is only being read.
3246
- *
3247
- * \returns half
3248
- * \internal
3249
- * \exception-guarantee no-throw guarantee
3250
- * \behavior reentrant, thread safe
3251
- * \endinternal
3252
- */
3253
- __CUDA_FP16_DECL__ __half __hmax_nan(const __half a, const __half b);
3254
- /**
3255
- * \ingroup CUDA_MATH__HALF_COMPARISON
3256
- * \brief Calculates \p half minimum of two input values, NaNs pass through.
3257
- *
3258
- * \details Calculates \p half min(\p a, \p b)
3259
- * defined as (\p a < \p b) ? \p a : \p b.
3260
- * - If either of inputs is NaN, then canonical NaN is returned.
3261
- * - If values of both inputs are 0.0, then +0.0 > -0.0
3262
- * \param[in] a - half. Is only being read.
3263
- * \param[in] b - half. Is only being read.
3264
- *
3265
- * \returns half
3266
- * \internal
3267
- * \exception-guarantee no-throw guarantee
3268
- * \behavior reentrant, thread safe
3269
- * \endinternal
3270
- */
3271
- __CUDA_FP16_DECL__ __half __hmin_nan(const __half a, const __half b);
3272
- /**
3273
- * \ingroup CUDA_MATH__HALF_ARITHMETIC
3274
- * \brief Performs \p half fused multiply-add in round-to-nearest-even mode with relu saturation.
3275
- *
3276
- * \details Performs \p half multiply on inputs \p a and \p b,
3277
- * then performs a \p half add of the result with \p c,
3278
- * rounding the result once in round-to-nearest-even mode.
3279
- * Then negative result is clamped to 0.
3280
- * NaN result is converted to canonical NaN.
3281
- * \param[in] a - half. Is only being read.
3282
- * \param[in] b - half. Is only being read.
3283
- * \param[in] c - half. Is only being read.
3284
- *
3285
- * \returns half
3286
- * - The result of fused multiply-add operation on \p
3287
- * a, \p b, and \p c with relu saturation.
3288
- * \internal
3289
- * \exception-guarantee no-throw guarantee
3290
- * \behavior reentrant, thread safe
3291
- * \endinternal
3292
- */
3293
- __CUDA_FP16_DECL__ __half __hfma_relu(const __half a, const __half b, const __half c);
3294
- /**
3295
- * \ingroup CUDA_MATH__HALF2_COMPARISON
3296
- * \brief Calculates \p half2 vector maximum of two inputs, NaNs pass through.
3297
- *
3298
- * \details Calculates \p half2 vector max(\p a, \p b).
3299
- * Elementwise \p half operation is defined as
3300
- * (\p a > \p b) ? \p a : \p b.
3301
- * - If either of inputs is NaN, then canonical NaN is returned.
3302
- * - If values of both inputs are 0.0, then +0.0 > -0.0
3303
- * \param[in] a - half2. Is only being read.
3304
- * \param[in] b - half2. Is only being read.
3305
- *
3306
- * \returns half2
3307
- * - The result of elementwise maximum of vectors \p a and \p b, with NaNs pass through
3308
- * \internal
3309
- * \exception-guarantee no-throw guarantee
3310
- * \behavior reentrant, thread safe
3311
- * \endinternal
3312
- */
3313
- __CUDA_FP16_DECL__ __half2 __hmax2_nan(const __half2 a, const __half2 b);
3314
- /**
3315
- * \ingroup CUDA_MATH__HALF2_COMPARISON
3316
- * \brief Calculates \p half2 vector minimum of two inputs, NaNs pass through.
3317
- *
3318
- * \details Calculates \p half2 vector min(\p a, \p b).
3319
- * Elementwise \p half operation is defined as
3320
- * (\p a < \p b) ? \p a : \p b.
3321
- * - If either of inputs is NaN, then canonical NaN is returned.
3322
- * - If values of both inputs are 0.0, then +0.0 > -0.0
3323
- * \param[in] a - half2. Is only being read.
3324
- * \param[in] b - half2. Is only being read.
3325
- *
3326
- * \returns half2
3327
- * - The result of elementwise minimum of vectors \p a and \p b, with NaNs pass through
3328
- * \internal
3329
- * \exception-guarantee no-throw guarantee
3330
- * \behavior reentrant, thread safe
3331
- * \endinternal
3332
- */
3333
- __CUDA_FP16_DECL__ __half2 __hmin2_nan(const __half2 a, const __half2 b);
3334
- /**
3335
- * \ingroup CUDA_MATH__HALF2_ARITHMETIC
3336
- * \brief Performs \p half2 vector fused multiply-add in round-to-nearest-even
3337
- * mode with relu saturation.
3338
- *
3339
- * \details Performs \p half2 vector multiply on inputs \p a and \p b,
3340
- * then performs a \p half2 vector add of the result with \p c,
3341
- * rounding the result once in round-to-nearest-even mode.
3342
- * Then negative result is clamped to 0.
3343
- * NaN result is converted to canonical NaN.
3344
- * \param[in] a - half2. Is only being read.
3345
- * \param[in] b - half2. Is only being read.
3346
- * \param[in] c - half2. Is only being read.
3347
- *
3348
- * \returns half2
3349
- * - The result of elementwise fused multiply-add operation on vectors \p a, \p b, and \p c with relu saturation.
3350
- * \internal
3351
- * \exception-guarantee no-throw guarantee
3352
- * \behavior reentrant, thread safe
3353
- * \endinternal
3354
- */
3355
- __CUDA_FP16_DECL__ __half2 __hfma2_relu(const __half2 a, const __half2 b, const __half2 c);
3356
- #endif /* !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 800) */
3357
- /**
3358
- * \ingroup CUDA_MATH__HALF2_ARITHMETIC
3359
- * \brief Performs fast complex multiply-accumulate
3360
- *
3361
- * \details Interprets vector \p half2 input pairs \p a, \p b, and \p c as
3362
- * complex numbers in \p half precision and performs
3363
- * complex multiply-accumulate operation: a*b + c
3364
- * \param[in] a - half2. Is only being read.
3365
- * \param[in] b - half2. Is only being read.
3366
- * \param[in] c - half2. Is only being read.
3367
- *
3368
- * \returns half2
3369
- * - The result of complex multiply-accumulate operation on complex numbers \p a, \p b, and \p c
3370
- * \internal
3371
- * \exception-guarantee no-throw guarantee
3372
- * \behavior reentrant, thread safe
3373
- * \endinternal
3374
- */
3375
- __CUDA_FP16_DECL__ __half2 __hcmadd(const __half2 a, const __half2 b, const __half2 c);
3376
- /**
3377
- * \ingroup CUDA_MATH__HALF_FUNCTIONS
3378
- * \brief Calculates \p half square root in round-to-nearest-even mode.
3379
- *
3380
- * \details Calculates \p half square root of input \p a in round-to-nearest-even mode.
3381
- * \param[in] a - half. Is only being read.
3382
- *
3383
- * \returns half
3384
- * - The square root of \p a.
3385
- * \internal
3386
- * \exception-guarantee no-throw guarantee
3387
- * \behavior reentrant, thread safe
3388
- * \endinternal
3389
- */
3390
- __CUDA_FP16_DECL__ __half hsqrt(const __half a);
3391
- /**
3392
- * \ingroup CUDA_MATH__HALF_FUNCTIONS
3393
- * \brief Calculates \p half reciprocal square root in round-to-nearest-even
3394
- * mode.
3395
- *
3396
- * \details Calculates \p half reciprocal square root of input \p a in round-to-nearest
3397
- * mode.
3398
- * \param[in] a - half. Is only being read.
3399
- *
3400
- * \returns half
3401
- * - The reciprocal square root of \p a.
3402
- * \internal
3403
- * \exception-guarantee no-throw guarantee
3404
- * \behavior reentrant, thread safe
3405
- * \endinternal
3406
- */
3407
- __CUDA_FP16_DECL__ __half hrsqrt(const __half a);
3408
- /**
3409
- * \ingroup CUDA_MATH__HALF_FUNCTIONS
3410
- * \brief Calculates \p half reciprocal in round-to-nearest-even mode.
3411
- *
3412
- * \details Calculates \p half reciprocal of input \p a in round-to-nearest-even mode.
3413
- * \param[in] a - half. Is only being read.
3414
- *
3415
- * \returns half
3416
- * - The reciprocal of \p a.
3417
- * \internal
3418
- * \exception-guarantee no-throw guarantee
3419
- * \behavior reentrant, thread safe
3420
- * \endinternal
3421
- */
3422
- __CUDA_FP16_DECL__ __half hrcp(const __half a);
3423
- /**
3424
- * \ingroup CUDA_MATH__HALF_FUNCTIONS
3425
- * \brief Calculates \p half natural logarithm in round-to-nearest-even mode.
3426
- *
3427
- * \details Calculates \p half natural logarithm of input \p a in round-to-nearest-even
3428
- * mode.
3429
- * \param[in] a - half. Is only being read.
3430
- *
3431
- * \returns half
3432
- * - The natural logarithm of \p a.
3433
- * \internal
3434
- * \exception-guarantee no-throw guarantee
3435
- * \behavior reentrant, thread safe
3436
- * \endinternal
3437
- */
3438
- __CUDA_FP16_DECL__ __half hlog(const __half a);
3439
- /**
3440
- * \ingroup CUDA_MATH__HALF_FUNCTIONS
3441
- * \brief Calculates \p half binary logarithm in round-to-nearest-even mode.
3442
- *
3443
- * \details Calculates \p half binary logarithm of input \p a in round-to-nearest-even
3444
- * mode.
3445
- * \param[in] a - half. Is only being read.
3446
- *
3447
- * \returns half
3448
- * - The binary logarithm of \p a.
3449
- * \internal
3450
- * \exception-guarantee no-throw guarantee
3451
- * \behavior reentrant, thread safe
3452
- * \endinternal
3453
- */
3454
- __CUDA_FP16_DECL__ __half hlog2(const __half a);
3455
- /**
3456
- * \ingroup CUDA_MATH__HALF_FUNCTIONS
3457
- * \brief Calculates \p half decimal logarithm in round-to-nearest-even mode.
3458
- *
3459
- * \details Calculates \p half decimal logarithm of input \p a in round-to-nearest-even
3460
- * mode.
3461
- * \param[in] a - half. Is only being read.
3462
- *
3463
- * \returns half
3464
- * - The decimal logarithm of \p a.
3465
- * \internal
3466
- * \exception-guarantee no-throw guarantee
3467
- * \behavior reentrant, thread safe
3468
- * \endinternal
3469
- */
3470
- __CUDA_FP16_DECL__ __half hlog10(const __half a);
3471
- /**
3472
- * \ingroup CUDA_MATH__HALF_FUNCTIONS
3473
- * \brief Calculates \p half natural exponential function in round-to-nearest
3474
- * mode.
3475
- *
3476
- * \details Calculates \p half natural exponential function of input \p a in
3477
- * round-to-nearest-even mode.
3478
- * \param[in] a - half. Is only being read.
3479
- *
3480
- * \returns half
3481
- * - The natural exponential function on \p a.
3482
- * \internal
3483
- * \exception-guarantee no-throw guarantee
3484
- * \behavior reentrant, thread safe
3485
- * \endinternal
3486
- */
3487
- __CUDA_FP16_DECL__ __half hexp(const __half a);
3488
- /**
3489
- * \ingroup CUDA_MATH__HALF_FUNCTIONS
3490
- * \brief Calculates \p half binary exponential function in round-to-nearest
3491
- * mode.
3492
- *
3493
- * \details Calculates \p half binary exponential function of input \p a in
3494
- * round-to-nearest-even mode.
3495
- * \param[in] a - half. Is only being read.
3496
- *
3497
- * \returns half
3498
- * - The binary exponential function on \p a.
3499
- * \internal
3500
- * \exception-guarantee no-throw guarantee
3501
- * \behavior reentrant, thread safe
3502
- * \endinternal
3503
- */
3504
- __CUDA_FP16_DECL__ __half hexp2(const __half a);
3505
- /**
3506
- * \ingroup CUDA_MATH__HALF_FUNCTIONS
3507
- * \brief Calculates \p half decimal exponential function in round-to-nearest
3508
- * mode.
3509
- *
3510
- * \details Calculates \p half decimal exponential function of input \p a in
3511
- * round-to-nearest-even mode.
3512
- * \param[in] a - half. Is only being read.
3513
- *
3514
- * \returns half
3515
- * - The decimal exponential function on \p a.
3516
- * \internal
3517
- * \exception-guarantee no-throw guarantee
3518
- * \behavior reentrant, thread safe
3519
- * \endinternal
3520
- */
3521
- __CUDA_FP16_DECL__ __half hexp10(const __half a);
3522
- /**
3523
- * \ingroup CUDA_MATH__HALF_FUNCTIONS
3524
- * \brief Calculates \p half cosine in round-to-nearest-even mode.
3525
- *
3526
- * \details Calculates \p half cosine of input \p a in round-to-nearest-even mode.
3527
- * \param[in] a - half. Is only being read.
3528
- *
3529
- * \returns half
3530
- * - The cosine of \p a.
3531
- * \internal
3532
- * \exception-guarantee no-throw guarantee
3533
- * \behavior reentrant, thread safe
3534
- * \endinternal
3535
- */
3536
- __CUDA_FP16_DECL__ __half hcos(const __half a);
3537
- /**
3538
- * \ingroup CUDA_MATH__HALF_FUNCTIONS
3539
- * \brief Calculates \p half sine in round-to-nearest-even mode.
3540
- *
3541
- * \details Calculates \p half sine of input \p a in round-to-nearest-even mode.
3542
- * \param[in] a - half. Is only being read.
3543
- *
3544
- * \returns half
3545
- * - The sine of \p a.
3546
- * \internal
3547
- * \exception-guarantee no-throw guarantee
3548
- * \behavior reentrant, thread safe
3549
- * \endinternal
3550
- */
3551
- __CUDA_FP16_DECL__ __half hsin(const __half a);
3552
- /**
3553
- * \ingroup CUDA_MATH__HALF2_FUNCTIONS
3554
- * \brief Calculates \p half2 vector square root in round-to-nearest-even mode.
3555
- *
3556
- * \details Calculates \p half2 square root of input vector \p a in round-to-nearest
3557
- * mode.
3558
- * \param[in] a - half2. Is only being read.
3559
- *
3560
- * \returns half2
3561
- * - The elementwise square root on vector \p a.
3562
- * \internal
3563
- * \exception-guarantee no-throw guarantee
3564
- * \behavior reentrant, thread safe
3565
- * \endinternal
3566
- */
3567
- __CUDA_FP16_DECL__ __half2 h2sqrt(const __half2 a);
3568
- /**
3569
- * \ingroup CUDA_MATH__HALF2_FUNCTIONS
3570
- * \brief Calculates \p half2 vector reciprocal square root in round-to-nearest
3571
- * mode.
3572
- *
3573
- * \details Calculates \p half2 reciprocal square root of input vector \p a in
3574
- * round-to-nearest-even mode.
3575
- * \param[in] a - half2. Is only being read.
3576
- *
3577
- * \returns half2
3578
- * - The elementwise reciprocal square root on vector \p a.
3579
- * \internal
3580
- * \exception-guarantee no-throw guarantee
3581
- * \behavior reentrant, thread safe
3582
- * \endinternal
3583
- */
3584
- __CUDA_FP16_DECL__ __half2 h2rsqrt(const __half2 a);
3585
- /**
3586
- * \ingroup CUDA_MATH__HALF2_FUNCTIONS
3587
- * \brief Calculates \p half2 vector reciprocal in round-to-nearest-even mode.
3588
- *
3589
- * \details Calculates \p half2 reciprocal of input vector \p a in round-to-nearest-even
3590
- * mode.
3591
- * \param[in] a - half2. Is only being read.
3592
- *
3593
- * \returns half2
3594
- * - The elementwise reciprocal on vector \p a.
3595
- * \internal
3596
- * \exception-guarantee no-throw guarantee
3597
- * \behavior reentrant, thread safe
3598
- * \endinternal
3599
- */
3600
- __CUDA_FP16_DECL__ __half2 h2rcp(const __half2 a);
3601
- /**
3602
- * \ingroup CUDA_MATH__HALF2_FUNCTIONS
3603
- * \brief Calculates \p half2 vector natural logarithm in round-to-nearest-even
3604
- * mode.
3605
- *
3606
- * \details Calculates \p half2 natural logarithm of input vector \p a in
3607
- * round-to-nearest-even mode.
3608
- * \param[in] a - half2. Is only being read.
3609
- *
3610
- * \returns half2
3611
- * - The elementwise natural logarithm on vector \p a.
3612
- * \internal
3613
- * \exception-guarantee no-throw guarantee
3614
- * \behavior reentrant, thread safe
3615
- * \endinternal
3616
- */
3617
- __CUDA_FP16_DECL__ __half2 h2log(const __half2 a);
3618
- /**
3619
- * \ingroup CUDA_MATH__HALF2_FUNCTIONS
3620
- * \brief Calculates \p half2 vector binary logarithm in round-to-nearest-even
3621
- * mode.
3622
- *
3623
- * \details Calculates \p half2 binary logarithm of input vector \p a in round-to-nearest
3624
- * mode.
3625
- * \param[in] a - half2. Is only being read.
3626
- *
3627
- * \returns half2
3628
- * - The elementwise binary logarithm on vector \p a.
3629
- * \internal
3630
- * \exception-guarantee no-throw guarantee
3631
- * \behavior reentrant, thread safe
3632
- * \endinternal
3633
- */
3634
- __CUDA_FP16_DECL__ __half2 h2log2(const __half2 a);
3635
- /**
3636
- * \ingroup CUDA_MATH__HALF2_FUNCTIONS
3637
- * \brief Calculates \p half2 vector decimal logarithm in round-to-nearest-even
3638
- * mode.
3639
- *
3640
- * \details Calculates \p half2 decimal logarithm of input vector \p a in
3641
- * round-to-nearest-even mode.
3642
- * \param[in] a - half2. Is only being read.
3643
- *
3644
- * \returns half2
3645
- * - The elementwise decimal logarithm on vector \p a.
3646
- * \internal
3647
- * \exception-guarantee no-throw guarantee
3648
- * \behavior reentrant, thread safe
3649
- * \endinternal
3650
- */
3651
- __CUDA_FP16_DECL__ __half2 h2log10(const __half2 a);
3652
- /**
3653
- * \ingroup CUDA_MATH__HALF2_FUNCTIONS
3654
- * \brief Calculates \p half2 vector exponential function in round-to-nearest
3655
- * mode.
3656
- *
3657
- * \details Calculates \p half2 exponential function of input vector \p a in
3658
- * round-to-nearest-even mode.
3659
- * \param[in] a - half2. Is only being read.
3660
- *
3661
- * \returns half2
3662
- * - The elementwise exponential function on vector \p a.
3663
- * \internal
3664
- * \exception-guarantee no-throw guarantee
3665
- * \behavior reentrant, thread safe
3666
- * \endinternal
3667
- */
3668
- __CUDA_FP16_DECL__ __half2 h2exp(const __half2 a);
3669
- /**
3670
- * \ingroup CUDA_MATH__HALF2_FUNCTIONS
3671
- * \brief Calculates \p half2 vector binary exponential function in
3672
- * round-to-nearest-even mode.
3673
- *
3674
- * \details Calculates \p half2 binary exponential function of input vector \p a in
3675
- * round-to-nearest-even mode.
3676
- * \param[in] a - half2. Is only being read.
3677
- *
3678
- * \returns half2
3679
- * - The elementwise binary exponential function on vector \p a.
3680
- * \internal
3681
- * \exception-guarantee no-throw guarantee
3682
- * \behavior reentrant, thread safe
3683
- * \endinternal
3684
- */
3685
- __CUDA_FP16_DECL__ __half2 h2exp2(const __half2 a);
3686
- /**
3687
- * \ingroup CUDA_MATH__HALF2_FUNCTIONS
3688
- * \brief Calculates \p half2 vector decimal exponential function in
3689
- * round-to-nearest-even mode.
3690
- *
3691
- * \details Calculates \p half2 decimal exponential function of input vector \p a in
3692
- * round-to-nearest-even mode.
3693
- * \param[in] a - half2. Is only being read.
3694
- *
3695
- * \returns half2
3696
- * - The elementwise decimal exponential function on vector \p a.
3697
- * \internal
3698
- * \exception-guarantee no-throw guarantee
3699
- * \behavior reentrant, thread safe
3700
- * \endinternal
3701
- */
3702
- __CUDA_FP16_DECL__ __half2 h2exp10(const __half2 a);
3703
- /**
3704
- * \ingroup CUDA_MATH__HALF2_FUNCTIONS
3705
- * \brief Calculates \p half2 vector cosine in round-to-nearest-even mode.
3706
- *
3707
- * \details Calculates \p half2 cosine of input vector \p a in round-to-nearest-even
3708
- * mode.
3709
- * \param[in] a - half2. Is only being read.
3710
- *
3711
- * \returns half2
3712
- * - The elementwise cosine on vector \p a.
3713
- * \internal
3714
- * \exception-guarantee no-throw guarantee
3715
- * \behavior reentrant, thread safe
3716
- * \endinternal
3717
- */
3718
- __CUDA_FP16_DECL__ __half2 h2cos(const __half2 a);
3719
- /**
3720
- * \ingroup CUDA_MATH__HALF2_FUNCTIONS
3721
- * \brief Calculates \p half2 vector sine in round-to-nearest-even mode.
3722
- *
3723
- * \details Calculates \p half2 sine of input vector \p a in round-to-nearest-even mode.
3724
- * \param[in] a - half2. Is only being read.
3725
- *
3726
- * \returns half2
3727
- * - The elementwise sine on vector \p a.
3728
- * \internal
3729
- * \exception-guarantee no-throw guarantee
3730
- * \behavior reentrant, thread safe
3731
- * \endinternal
3732
- */
3733
- __CUDA_FP16_DECL__ __half2 h2sin(const __half2 a);
3734
-
3735
- #endif /*if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)*/
3736
-
3737
- #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 600)
3738
-
3739
- /**
3740
- * \ingroup CUDA_MATH__HALF2_ARITHMETIC
3741
- * \brief Vector add \p val to the value stored at \p address in global or shared memory, and writes this
3742
- * value back to \p address. The atomicity of the add operation is guaranteed separately for each of the
3743
- * two __half elements; the entire __half2 is not guaranteed to be atomic as a single 32-bit access.
3744
- *
3745
- * \details The location of \p address must be in global or shared memory. This operation has undefined
3746
- * behavior otherwise. This operation is only supported by devices of compute capability 6.x and higher.
3747
- *
3748
- * \param[in] address - half2*. An address in global or shared memory.
3749
- * \param[in] val - half2. The value to be added.
3750
- *
3751
- * \returns half2
3752
- * - The old value read from \p address.
3753
- *
3754
- * \note_ref_guide_atomic
3755
- */
3756
- __CUDA_FP16_DECL__ __half2 atomicAdd(__half2 *const address, const __half2 val);
3757
-
3758
- #endif /*if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 600)*/
3759
-
3760
- #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 700)
3761
-
3762
- /**
3763
- * \ingroup CUDA_MATH__HALF_ARITHMETIC
3764
- * \brief Adds \p val to the value stored at \p address in global or shared memory, and writes this value
3765
- * back to \p address. This operation is performed in one atomic operation.
3766
- *
3767
- * \details The location of \p address must be in global or shared memory. This operation has undefined
3768
- * behavior otherwise. This operation is only supported by devices of compute capability 7.x and higher.
3769
- *
3770
- * \param[in] address - half*. An address in global or shared memory.
3771
- * \param[in] val - half. The value to be added.
3772
- *
3773
- * \returns half
3774
- * - The old value read from \p address.
3775
- *
3776
- * \note_ref_guide_atomic
3777
- */
3778
- __CUDA_FP16_DECL__ __half atomicAdd(__half *const address, const __half val);
3779
-
3780
- #endif /*if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 700)*/
3781
-
3782
- #endif /* defined(__CUDACC__) */
3783
-
3784
- #undef __CUDA_FP16_DECL__
3785
- #undef __CUDA_HOSTDEVICE_FP16_DECL__
3786
-
3787
- #endif /* defined(__cplusplus) */
3788
-
3789
- /* Note the .hpp file is included even for host-side compilation, to capture the "half" & "half2" definitions */
3790
- #include "cuda_fp16.hpp"
3791
- #undef ___CUDA_FP16_STRINGIFY_INNERMOST
3792
- #undef __CUDA_FP16_STRINGIFY
3793
-
3794
- #endif /* end of include guard: __CUDA_FP16_H__ */