numba-cuda 0.8.1__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. _numba_cuda_redirector.py +17 -13
  2. numba_cuda/VERSION +1 -1
  3. numba_cuda/_version.py +4 -1
  4. numba_cuda/numba/cuda/__init__.py +6 -2
  5. numba_cuda/numba/cuda/api.py +129 -86
  6. numba_cuda/numba/cuda/api_util.py +3 -3
  7. numba_cuda/numba/cuda/args.py +12 -16
  8. numba_cuda/numba/cuda/cg.py +6 -6
  9. numba_cuda/numba/cuda/codegen.py +74 -43
  10. numba_cuda/numba/cuda/compiler.py +232 -113
  11. numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
  12. numba_cuda/numba/cuda/cuda_fp16.h +661 -661
  13. numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
  14. numba_cuda/numba/cuda/cuda_paths.py +291 -99
  15. numba_cuda/numba/cuda/cudadecl.py +125 -69
  16. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
  17. numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
  18. numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
  19. numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
  20. numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
  21. numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
  22. numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
  23. numba_cuda/numba/cuda/cudadrv/error.py +6 -2
  24. numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
  25. numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
  26. numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
  27. numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
  28. numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
  29. numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
  30. numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
  31. numba_cuda/numba/cuda/cudaimpl.py +317 -233
  32. numba_cuda/numba/cuda/cudamath.py +1 -1
  33. numba_cuda/numba/cuda/debuginfo.py +8 -6
  34. numba_cuda/numba/cuda/decorators.py +75 -45
  35. numba_cuda/numba/cuda/descriptor.py +1 -1
  36. numba_cuda/numba/cuda/device_init.py +69 -18
  37. numba_cuda/numba/cuda/deviceufunc.py +143 -98
  38. numba_cuda/numba/cuda/dispatcher.py +300 -213
  39. numba_cuda/numba/cuda/errors.py +13 -10
  40. numba_cuda/numba/cuda/extending.py +1 -1
  41. numba_cuda/numba/cuda/initialize.py +5 -3
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
  43. numba_cuda/numba/cuda/intrinsics.py +31 -27
  44. numba_cuda/numba/cuda/kernels/reduction.py +13 -13
  45. numba_cuda/numba/cuda/kernels/transpose.py +3 -6
  46. numba_cuda/numba/cuda/libdevice.py +317 -317
  47. numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
  48. numba_cuda/numba/cuda/locks.py +16 -0
  49. numba_cuda/numba/cuda/mathimpl.py +62 -57
  50. numba_cuda/numba/cuda/models.py +1 -5
  51. numba_cuda/numba/cuda/nvvmutils.py +103 -88
  52. numba_cuda/numba/cuda/printimpl.py +9 -5
  53. numba_cuda/numba/cuda/random.py +46 -36
  54. numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
  55. numba_cuda/numba/cuda/runtime/__init__.py +1 -1
  56. numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
  57. numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
  58. numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
  59. numba_cuda/numba/cuda/runtime/nrt.py +48 -43
  60. numba_cuda/numba/cuda/simulator/__init__.py +22 -12
  61. numba_cuda/numba/cuda/simulator/api.py +38 -22
  62. numba_cuda/numba/cuda/simulator/compiler.py +2 -2
  63. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
  64. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
  65. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
  66. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
  67. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
  68. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
  69. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
  70. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
  71. numba_cuda/numba/cuda/simulator/kernel.py +43 -34
  72. numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
  73. numba_cuda/numba/cuda/simulator/reduction.py +1 -0
  74. numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
  75. numba_cuda/numba/cuda/simulator_init.py +2 -4
  76. numba_cuda/numba/cuda/stubs.py +139 -102
  77. numba_cuda/numba/cuda/target.py +64 -47
  78. numba_cuda/numba/cuda/testing.py +24 -19
  79. numba_cuda/numba/cuda/tests/__init__.py +14 -12
  80. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
  81. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
  88. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
  89. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
  90. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
  91. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
  92. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
  93. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
  94. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
  95. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
  98. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
  100. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
  101. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
  102. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
  103. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
  104. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
  105. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
  107. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
  109. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
  110. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
  111. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
  112. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
  113. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
  115. numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
  117. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
  118. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
  119. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
  120. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
  121. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
  122. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
  123. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
  124. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
  126. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
  127. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
  128. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
  129. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
  131. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
  132. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
  133. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
  134. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
  135. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
  136. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
  137. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
  138. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
  139. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
  140. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
  141. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
  142. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
  143. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
  144. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
  145. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
  148. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
  149. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
  150. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
  151. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
  152. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
  153. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
  154. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
  155. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
  156. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
  157. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
  158. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
  159. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
  160. numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
  161. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
  162. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
  163. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
  164. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
  165. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
  166. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
  167. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
  168. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
  169. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
  170. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
  171. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
  172. numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
  173. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
  174. numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
  175. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
  176. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
  177. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
  178. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
  179. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
  180. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
  182. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
  183. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
  184. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
  185. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
  186. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
  187. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
  188. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
  192. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
  193. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
  194. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
  195. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
  197. numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
  198. numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
  199. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
  200. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
  201. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
  202. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
  203. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
  204. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
  206. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
  207. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
  208. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
  209. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
  210. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
  211. numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
  212. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
  213. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
  214. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
  215. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
  216. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
  217. numba_cuda/numba/cuda/types.py +5 -2
  218. numba_cuda/numba/cuda/ufuncs.py +382 -362
  219. numba_cuda/numba/cuda/utils.py +2 -2
  220. numba_cuda/numba/cuda/vector_types.py +2 -2
  221. numba_cuda/numba/cuda/vectorizers.py +37 -32
  222. {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
  223. numba_cuda-0.9.0.dist-info/RECORD +253 -0
  224. {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
  225. numba_cuda-0.8.1.dist-info/RECORD +0 -251
  226. {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
  227. {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -112,33 +112,33 @@
112
112
  /* Forward-declaration of structures defined in "cuda_fp16.hpp" */
113
113
 
114
114
  /**
115
- * \brief half datatype
116
- *
117
- * \details This structure implements the datatype for storing
118
- * half-precision floating-point numbers. The structure implements
119
- * assignment operators and type conversions.
120
- * 16 bits are being used in total: 1 sign bit, 5 bits for the exponent,
121
- * and the significand is being stored in 10 bits.
122
- * The total precision is 11 bits. There are 15361 representable
123
- * numbers within the interval [0.0, 1.0], endpoints included.
124
- * On average we have log10(2**11) ~ 3.311 decimal digits.
125
- *
115
+ * \brief half datatype
116
+ *
117
+ * \details This structure implements the datatype for storing
118
+ * half-precision floating-point numbers. The structure implements
119
+ * assignment operators and type conversions.
120
+ * 16 bits are being used in total: 1 sign bit, 5 bits for the exponent,
121
+ * and the significand is being stored in 10 bits.
122
+ * The total precision is 11 bits. There are 15361 representable
123
+ * numbers within the interval [0.0, 1.0], endpoints included.
124
+ * On average we have log10(2**11) ~ 3.311 decimal digits.
125
+ *
126
126
  * \internal
127
- * \req IEEE 754-2008 compliant implementation of half-precision
128
- * floating-point numbers.
127
+ * \req IEEE 754-2008 compliant implementation of half-precision
128
+ * floating-point numbers.
129
129
  * \endinternal
130
130
  */
131
131
  struct __half;
132
132
 
133
133
  /**
134
134
  * \brief half2 datatype
135
- *
136
- * \details This structure implements the datatype for storing two
137
- * half-precision floating-point numbers.
138
- * The structure implements assignment operators and type conversions.
139
- *
135
+ *
136
+ * \details This structure implements the datatype for storing two
137
+ * half-precision floating-point numbers.
138
+ * The structure implements assignment operators and type conversions.
139
+ *
140
140
  * \internal
141
- * \req Vectorified version of half.
141
+ * \req Vectorified version of half.
142
142
  * \endinternal
143
143
  */
144
144
  struct __half2;
@@ -161,12 +161,12 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __double2half(const double a);
161
161
  /**
162
162
  * \ingroup CUDA_MATH__HALF_MISC
163
163
  * \brief Converts float number to half precision in round-to-nearest-even mode
164
- * and returns \p half with converted value.
165
- *
166
- * \details Converts float number \p a to half precision in round-to-nearest-even mode.
167
- * \param[in] a - float. Is only being read.
164
+ * and returns \p half with converted value.
165
+ *
166
+ * \details Converts float number \p a to half precision in round-to-nearest-even mode.
167
+ * \param[in] a - float. Is only being read.
168
168
  * \returns half
169
- * \retval a converted to half.
169
+ * \retval a converted to half.
170
170
  * \internal
171
171
  * \exception-guarantee no-throw guarantee
172
172
  * \behavior reentrant, thread safe
@@ -179,9 +179,9 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __float2half(const float a);
179
179
  * and returns \p half with converted value.
180
180
  *
181
181
  * \details Converts float number \p a to half precision in round-to-nearest-even mode.
182
- * \param[in] a - float. Is only being read.
182
+ * \param[in] a - float. Is only being read.
183
183
  * \returns half
184
- * \retval a converted to half.
184
+ * \retval a converted to half.
185
185
  * \internal
186
186
  * \exception-guarantee no-throw guarantee
187
187
  * \behavior reentrant, thread safe
@@ -192,11 +192,11 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __float2half_rn(const float a);
192
192
  * \ingroup CUDA_MATH__HALF_MISC
193
193
  * \brief Converts float number to half precision in round-towards-zero mode
194
194
  * and returns \p half with converted value.
195
- *
195
+ *
196
196
  * \details Converts float number \p a to half precision in round-towards-zero mode.
197
- * \param[in] a - float. Is only being read.
197
+ * \param[in] a - float. Is only being read.
198
198
  * \returns half
199
- * \retval a converted to half.
199
+ * \retval a converted to half.
200
200
  * \internal
201
201
  * \exception-guarantee no-throw guarantee
202
202
  * \behavior reentrant, thread safe
@@ -207,12 +207,12 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __float2half_rz(const float a);
207
207
  * \ingroup CUDA_MATH__HALF_MISC
208
208
  * \brief Converts float number to half precision in round-down mode
209
209
  * and returns \p half with converted value.
210
- *
210
+ *
211
211
  * \details Converts float number \p a to half precision in round-down mode.
212
- * \param[in] a - float. Is only being read.
213
- *
212
+ * \param[in] a - float. Is only being read.
213
+ *
214
214
  * \returns half
215
- * \retval a converted to half.
215
+ * \retval a converted to half.
216
216
  * \internal
217
217
  * \exception-guarantee no-throw guarantee
218
218
  * \behavior reentrant, thread safe
@@ -223,12 +223,12 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __float2half_rd(const float a);
223
223
  * \ingroup CUDA_MATH__HALF_MISC
224
224
  * \brief Converts float number to half precision in round-up mode
225
225
  * and returns \p half with converted value.
226
- *
226
+ *
227
227
  * \details Converts float number \p a to half precision in round-up mode.
228
- * \param[in] a - float. Is only being read.
229
- *
228
+ * \param[in] a - float. Is only being read.
229
+ *
230
230
  * \returns half
231
- * \retval a converted to half.
231
+ * \retval a converted to half.
232
232
  * \internal
233
233
  * \exception-guarantee no-throw guarantee
234
234
  * \behavior reentrant, thread safe
@@ -238,12 +238,12 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __float2half_ru(const float a);
238
238
  /**
239
239
  * \ingroup CUDA_MATH__HALF_MISC
240
240
  * \brief Converts \p half number to float.
241
- *
241
+ *
242
242
  * \details Converts half number \p a to float.
243
- * \param[in] a - float. Is only being read.
244
- *
243
+ * \param[in] a - float. Is only being read.
244
+ *
245
245
  * \returns float
246
- * \retval a converted to float.
246
+ * \retval a converted to float.
247
247
  * \internal
248
248
  * \exception-guarantee no-throw guarantee
249
249
  * \behavior reentrant, thread safe
@@ -257,7 +257,7 @@ __CUDA_HOSTDEVICE_FP16_DECL__ float __half2float(const __half a);
257
257
  *
258
258
  * \details Converts input \p a to half precision in round-to-nearest-even mode and
259
259
  * populates both halves of \p half2 with converted value.
260
- * \param[in] a - float. Is only being read.
260
+ * \param[in] a - float. Is only being read.
261
261
  *
262
262
  * \returns half2
263
263
  * \retval The \p half2 value with both halves equal to the converted half
@@ -277,9 +277,9 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half2 __float2half2_rn(const float a);
277
277
  * and combines the results into one \p half2 number. Low 16 bits of the return
278
278
  * value correspond to the input \p a, high 16 bits correspond to the input \p
279
279
  * b.
280
- * \param[in] a - float. Is only being read.
281
- * \param[in] b - float. Is only being read.
282
- *
280
+ * \param[in] a - float. Is only being read.
281
+ * \param[in] b - float. Is only being read.
282
+ *
283
283
  * \returns half2
284
284
  * \retval The \p half2 value with corresponding halves equal to the
285
285
  * converted input floats.
@@ -292,11 +292,11 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half2 __floats2half2_rn(const float a, const flo
292
292
  /**
293
293
  * \ingroup CUDA_MATH__HALF_MISC
294
294
  * \brief Converts low 16 bits of \p half2 to float and returns the result
295
- *
295
+ *
296
296
  * \details Converts low 16 bits of \p half2 input \p a to 32-bit floating-point number
297
297
  * and returns the result.
298
- * \param[in] a - half2. Is only being read.
299
- *
298
+ * \param[in] a - half2. Is only being read.
299
+ *
300
300
  * \returns float
301
301
  * \retval The low 16 bits of \p a converted to float.
302
302
  * \internal
@@ -308,11 +308,11 @@ __CUDA_HOSTDEVICE_FP16_DECL__ float __low2float(const __half2 a);
308
308
  /**
309
309
  * \ingroup CUDA_MATH__HALF_MISC
310
310
  * \brief Converts high 16 bits of \p half2 to float and returns the result
311
- *
311
+ *
312
312
  * \details Converts high 16 bits of \p half2 input \p a to 32-bit floating-point number
313
313
  * and returns the result.
314
- * \param[in] a - half2. Is only being read.
315
- *
314
+ * \param[in] a - half2. Is only being read.
315
+ *
316
316
  * \returns float
317
317
  * \retval The high 16 bits of \p a converted to float.
318
318
  * \internal
@@ -327,13 +327,13 @@ __CUDA_HOSTDEVICE_FP16_DECL__ float __high2float(const __half2 a);
327
327
  * \ingroup CUDA_MATH__HALF_MISC
328
328
  * \brief Converts both components of float2 number to half precision in
329
329
  * round-to-nearest-even mode and returns \p half2 with converted values.
330
- *
330
+ *
331
331
  * \details Converts both components of float2 to half precision in round-to-nearest
332
332
  * mode and combines the results into one \p half2 number. Low 16 bits of the
333
333
  * return value correspond to \p a.x and high 16 bits of the return value
334
334
  * correspond to \p a.y.
335
- * \param[in] a - float2. Is only being read.
336
- *
335
+ * \param[in] a - float2. Is only being read.
336
+ *
337
337
  * \returns half2
338
338
  * \retval The \p half2 which has corresponding halves equal to the
339
339
  * converted float2 components.
@@ -346,11 +346,11 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half2 __float22half2_rn(const float2 a);
346
346
  /**
347
347
  * \ingroup CUDA_MATH__HALF_MISC
348
348
  * \brief Converts both halves of \p half2 to float2 and returns the result.
349
- *
349
+ *
350
350
  * \details Converts both halves of \p half2 input \p a to float2 and returns the
351
351
  * result.
352
- * \param[in] a - half2. Is only being read.
353
- *
352
+ * \param[in] a - half2. Is only being read.
353
+ *
354
354
  * \returns float2
355
355
  * \retval a converted to float2.
356
356
  * \internal
@@ -362,13 +362,13 @@ __CUDA_HOSTDEVICE_FP16_DECL__ float2 __half22float2(const __half2 a);
362
362
  /**
363
363
  * \ingroup CUDA_MATH__HALF_MISC
364
364
  * \brief Convert a half to a signed integer in round-to-nearest-even mode.
365
- *
365
+ *
366
366
  * \details Convert the half-precision floating-point value \p h to a signed integer in
367
367
  * round-to-nearest-even mode.
368
- * \param[in] h - half. Is only being read.
369
- *
368
+ * \param[in] h - half. Is only being read.
369
+ *
370
370
  * \returns int
371
- * \retval h converted to a signed integer.
371
+ * \retval h converted to a signed integer.
372
372
  * \internal
373
373
  * \exception-guarantee no-throw guarantee
374
374
  * \behavior reentrant, thread safe
@@ -378,13 +378,13 @@ __CUDA_FP16_DECL__ int __half2int_rn(const __half h);
378
378
  /**
379
379
  * \ingroup CUDA_MATH__HALF_MISC
380
380
  * \brief Convert a half to a signed integer in round-towards-zero mode.
381
- *
381
+ *
382
382
  * \details Convert the half-precision floating-point value \p h to a signed integer in
383
383
  * round-towards-zero mode.
384
- * \param[in] h - half. Is only being read.
385
- *
384
+ * \param[in] h - half. Is only being read.
385
+ *
386
386
  * \returns int
387
- * \retval h converted to a signed integer.
387
+ * \retval h converted to a signed integer.
388
388
  * \internal
389
389
  * \exception-guarantee no-throw guarantee
390
390
  * \behavior reentrant, thread safe
@@ -394,13 +394,13 @@ __CUDA_HOSTDEVICE_FP16_DECL__ int __half2int_rz(const __half h);
394
394
  /**
395
395
  * \ingroup CUDA_MATH__HALF_MISC
396
396
  * \brief Convert a half to a signed integer in round-down mode.
397
- *
397
+ *
398
398
  * \details Convert the half-precision floating-point value \p h to a signed integer in
399
399
  * round-down mode.
400
- * \param[in] h - half. Is only being read.
401
- *
400
+ * \param[in] h - half. Is only being read.
401
+ *
402
402
  * \returns int
403
- * \retval h converted to a signed integer.
403
+ * \retval h converted to a signed integer.
404
404
  * \internal
405
405
  * \exception-guarantee no-throw guarantee
406
406
  * \behavior reentrant, thread safe
@@ -410,13 +410,13 @@ __CUDA_FP16_DECL__ int __half2int_rd(const __half h);
410
410
  /**
411
411
  * \ingroup CUDA_MATH__HALF_MISC
412
412
  * \brief Convert a half to a signed integer in round-up mode.
413
- *
413
+ *
414
414
  * \details Convert the half-precision floating-point value \p h to a signed integer in
415
415
  * round-up mode.
416
- * \param[in] h - half. Is only being read.
417
- *
416
+ * \param[in] h - half. Is only being read.
417
+ *
418
418
  * \returns int
419
- * \retval h converted to a signed integer.
419
+ * \retval h converted to a signed integer.
420
420
  * \internal
421
421
  * \exception-guarantee no-throw guarantee
422
422
  * \behavior reentrant, thread safe
@@ -427,13 +427,13 @@ __CUDA_FP16_DECL__ int __half2int_ru(const __half h);
427
427
  /**
428
428
  * \ingroup CUDA_MATH__HALF_MISC
429
429
  * \brief Convert a signed integer to a half in round-to-nearest-even mode.
430
- *
430
+ *
431
431
  * \details Convert the signed integer value \p i to a half-precision floating-point
432
432
  * value in round-to-nearest-even mode.
433
- * \param[in] i - int. Is only being read.
434
- *
433
+ * \param[in] i - int. Is only being read.
434
+ *
435
435
  * \returns half
436
- * \retval i converted to half.
436
+ * \retval i converted to half.
437
437
  * \internal
438
438
  * \exception-guarantee no-throw guarantee
439
439
  * \behavior reentrant, thread safe
@@ -443,13 +443,13 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __int2half_rn(const int i);
443
443
  /**
444
444
  * \ingroup CUDA_MATH__HALF_MISC
445
445
  * \brief Convert a signed integer to a half in round-towards-zero mode.
446
- *
446
+ *
447
447
  * \details Convert the signed integer value \p i to a half-precision floating-point
448
448
  * value in round-towards-zero mode.
449
- * \param[in] i - int. Is only being read.
450
- *
449
+ * \param[in] i - int. Is only being read.
450
+ *
451
451
  * \returns half
452
- * \retval i converted to half.
452
+ * \retval i converted to half.
453
453
  * \internal
454
454
  * \exception-guarantee no-throw guarantee
455
455
  * \behavior reentrant, thread safe
@@ -459,13 +459,13 @@ __CUDA_FP16_DECL__ __half __int2half_rz(const int i);
459
459
  /**
460
460
  * \ingroup CUDA_MATH__HALF_MISC
461
461
  * \brief Convert a signed integer to a half in round-down mode.
462
- *
462
+ *
463
463
  * \details Convert the signed integer value \p i to a half-precision floating-point
464
464
  * value in round-down mode.
465
- * \param[in] i - int. Is only being read.
466
- *
465
+ * \param[in] i - int. Is only being read.
466
+ *
467
467
  * \returns half
468
- * \retval i converted to half.
468
+ * \retval i converted to half.
469
469
  * \internal
470
470
  * \exception-guarantee no-throw guarantee
471
471
  * \behavior reentrant, thread safe
@@ -475,13 +475,13 @@ __CUDA_FP16_DECL__ __half __int2half_rd(const int i);
475
475
  /**
476
476
  * \ingroup CUDA_MATH__HALF_MISC
477
477
  * \brief Convert a signed integer to a half in round-up mode.
478
- *
478
+ *
479
479
  * \details Convert the signed integer value \p i to a half-precision floating-point
480
480
  * value in round-up mode.
481
- * \param[in] i - int. Is only being read.
482
- *
481
+ * \param[in] i - int. Is only being read.
482
+ *
483
483
  * \returns half
484
- * \retval i converted to half.
484
+ * \retval i converted to half.
485
485
  * \internal
486
486
  * \exception-guarantee no-throw guarantee
487
487
  * \behavior reentrant, thread safe
@@ -493,13 +493,13 @@ __CUDA_FP16_DECL__ __half __int2half_ru(const int i);
493
493
  * \ingroup CUDA_MATH__HALF_MISC
494
494
  * \brief Convert a half to a signed short integer in round-to-nearest-even
495
495
  * mode.
496
- *
496
+ *
497
497
  * \details Convert the half-precision floating-point value \p h to a signed short
498
498
  * integer in round-to-nearest-even mode.
499
- * \param[in] h - half. Is only being read.
500
- *
499
+ * \param[in] h - half. Is only being read.
500
+ *
501
501
  * \returns short int
502
- * \retval h converted to a signed short integer.
502
+ * \retval h converted to a signed short integer.
503
503
  * \internal
504
504
  * \exception-guarantee no-throw guarantee
505
505
  * \behavior reentrant, thread safe
@@ -509,13 +509,13 @@ __CUDA_FP16_DECL__ short int __half2short_rn(const __half h);
509
509
  /**
510
510
  * \ingroup CUDA_MATH__HALF_MISC
511
511
  * \brief Convert a half to a signed short integer in round-towards-zero mode.
512
- *
512
+ *
513
513
  * \details Convert the half-precision floating-point value \p h to a signed short
514
514
  * integer in round-towards-zero mode.
515
- * \param[in] h - half. Is only being read.
516
- *
515
+ * \param[in] h - half. Is only being read.
516
+ *
517
517
  * \returns short int
518
- * \retval h converted to a signed short integer.
518
+ * \retval h converted to a signed short integer.
519
519
  * \internal
520
520
  * \exception-guarantee no-throw guarantee
521
521
  * \behavior reentrant, thread safe
@@ -525,13 +525,13 @@ __CUDA_HOSTDEVICE_FP16_DECL__ short int __half2short_rz(const __half h);
525
525
  /**
526
526
  * \ingroup CUDA_MATH__HALF_MISC
527
527
  * \brief Convert a half to a signed short integer in round-down mode.
528
- *
528
+ *
529
529
  * \details Convert the half-precision floating-point value \p h to a signed short
530
530
  * integer in round-down mode.
531
- * \param[in] h - half. Is only being read.
532
- *
531
+ * \param[in] h - half. Is only being read.
532
+ *
533
533
  * \returns short int
534
- * \retval h converted to a signed short integer.
534
+ * \retval h converted to a signed short integer.
535
535
  * \internal
536
536
  * \exception-guarantee no-throw guarantee
537
537
  * \behavior reentrant, thread safe
@@ -541,13 +541,13 @@ __CUDA_FP16_DECL__ short int __half2short_rd(const __half h);
541
541
  /**
542
542
  * \ingroup CUDA_MATH__HALF_MISC
543
543
  * \brief Convert a half to a signed short integer in round-up mode.
544
- *
544
+ *
545
545
  * \details Convert the half-precision floating-point value \p h to a signed short
546
546
  * integer in round-up mode.
547
- * \param[in] h - half. Is only being read.
548
- *
547
+ * \param[in] h - half. Is only being read.
548
+ *
549
549
  * \returns short int
550
- * \retval h converted to a signed short integer.
550
+ * \retval h converted to a signed short integer.
551
551
  * \internal
552
552
  * \exception-guarantee no-throw guarantee
553
553
  * \behavior reentrant, thread safe
@@ -559,13 +559,13 @@ __CUDA_FP16_DECL__ short int __half2short_ru(const __half h);
559
559
  * \ingroup CUDA_MATH__HALF_MISC
560
560
  * \brief Convert a signed short integer to a half in round-to-nearest-even
561
561
  * mode.
562
- *
562
+ *
563
563
  * \details Convert the signed short integer value \p i to a half-precision floating-point
564
564
  * value in round-to-nearest-even mode.
565
- * \param[in] i - short int. Is only being read.
566
- *
565
+ * \param[in] i - short int. Is only being read.
566
+ *
567
567
  * \returns half
568
- * \retval i converted to half.
568
+ * \retval i converted to half.
569
569
  * \internal
570
570
  * \exception-guarantee no-throw guarantee
571
571
  * \behavior reentrant, thread safe
@@ -575,13 +575,13 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __short2half_rn(const short int i);
575
575
  /**
576
576
  * \ingroup CUDA_MATH__HALF_MISC
577
577
  * \brief Convert a signed short integer to a half in round-towards-zero mode.
578
- *
578
+ *
579
579
  * \details Convert the signed short integer value \p i to a half-precision floating-point
580
580
  * value in round-towards-zero mode.
581
- * \param[in] i - short int. Is only being read.
582
- *
581
+ * \param[in] i - short int. Is only being read.
582
+ *
583
583
  * \returns half
584
- * \retval i converted to half.
584
+ * \retval i converted to half.
585
585
  * \internal
586
586
  * \exception-guarantee no-throw guarantee
587
587
  * \behavior reentrant, thread safe
@@ -591,13 +591,13 @@ __CUDA_FP16_DECL__ __half __short2half_rz(const short int i);
591
591
  /**
592
592
  * \ingroup CUDA_MATH__HALF_MISC
593
593
  * \brief Convert a signed short integer to a half in round-down mode.
594
- *
594
+ *
595
595
  * \details Convert the signed short integer value \p i to a half-precision floating-point
596
596
  * value in round-down mode.
597
- * \param[in] i - short int. Is only being read.
598
- *
597
+ * \param[in] i - short int. Is only being read.
598
+ *
599
599
  * \returns half
600
- * \retval i converted to half.
600
+ * \retval i converted to half.
601
601
  * \internal
602
602
  * \exception-guarantee no-throw guarantee
603
603
  * \behavior reentrant, thread safe
@@ -607,13 +607,13 @@ __CUDA_FP16_DECL__ __half __short2half_rd(const short int i);
607
607
  /**
608
608
  * \ingroup CUDA_MATH__HALF_MISC
609
609
  * \brief Convert a signed short integer to a half in round-up mode.
610
- *
610
+ *
611
611
  * \details Convert the signed short integer value \p i to a half-precision floating-point
612
612
  * value in round-up mode.
613
- * \param[in] i - short int. Is only being read.
614
- *
613
+ * \param[in] i - short int. Is only being read.
614
+ *
615
615
  * \returns half
616
- * \retval i converted to half.
616
+ * \retval i converted to half.
617
617
  * \internal
618
618
  * \exception-guarantee no-throw guarantee
619
619
  * \behavior reentrant, thread safe
@@ -624,13 +624,13 @@ __CUDA_FP16_DECL__ __half __short2half_ru(const short int i);
624
624
  /**
625
625
  * \ingroup CUDA_MATH__HALF_MISC
626
626
  * \brief Convert a half to an unsigned integer in round-to-nearest-even mode.
627
- *
627
+ *
628
628
  * \details Convert the half-precision floating-point value \p h to an unsigned integer
629
629
  * in round-to-nearest-even mode.
630
- * \param[in] h - half. Is only being read.
631
- *
630
+ * \param[in] h - half. Is only being read.
631
+ *
632
632
  * \returns unsigned int
633
- * \retval h converted to an unsigned integer.
633
+ * \retval h converted to an unsigned integer.
634
634
  * \internal
635
635
  * \exception-guarantee no-throw guarantee
636
636
  * \behavior reentrant, thread safe
@@ -640,13 +640,13 @@ __CUDA_FP16_DECL__ unsigned int __half2uint_rn(const __half h);
640
640
  /**
641
641
  * \ingroup CUDA_MATH__HALF_MISC
642
642
  * \brief Convert a half to an unsigned integer in round-towards-zero mode.
643
- *
643
+ *
644
644
  * \details Convert the half-precision floating-point value \p h to an unsigned integer
645
645
  * in round-towards-zero mode.
646
- * \param[in] h - half. Is only being read.
647
- *
646
+ * \param[in] h - half. Is only being read.
647
+ *
648
648
  * \returns unsigned int
649
- * \retval h converted to an unsigned integer.
649
+ * \retval h converted to an unsigned integer.
650
650
  * \internal
651
651
  * \exception-guarantee no-throw guarantee
652
652
  * \behavior reentrant, thread safe
@@ -659,10 +659,10 @@ __CUDA_HOSTDEVICE_FP16_DECL__ unsigned int __half2uint_rz(const __half h);
659
659
  *
660
660
  * \details Convert the half-precision floating-point value \p h to an unsigned integer
661
661
  * in round-down mode.
662
- * \param[in] h - half. Is only being read.
662
+ * \param[in] h - half. Is only being read.
663
663
  *
664
664
  * \returns unsigned int
665
- * \retval h converted to an unsigned integer.
665
+ * \retval h converted to an unsigned integer.
666
666
  * \internal
667
667
  * \exception-guarantee no-throw guarantee
668
668
  * \behavior reentrant, thread safe
@@ -675,10 +675,10 @@ __CUDA_FP16_DECL__ unsigned int __half2uint_rd(const __half h);
675
675
  *
676
676
  * \details Convert the half-precision floating-point value \p h to an unsigned integer
677
677
  * in round-up mode.
678
- * \param[in] h - half. Is only being read.
678
+ * \param[in] h - half. Is only being read.
679
679
  *
680
680
  * \returns unsigned int
681
- * \retval h converted to an unsigned integer.
681
+ * \retval h converted to an unsigned integer.
682
682
  * \internal
683
683
  * \exception-guarantee no-throw guarantee
684
684
  * \behavior reentrant, thread safe
@@ -689,13 +689,13 @@ __CUDA_FP16_DECL__ unsigned int __half2uint_ru(const __half h);
689
689
  /**
690
690
  * \ingroup CUDA_MATH__HALF_MISC
691
691
  * \brief Convert an unsigned integer to a half in round-to-nearest-even mode.
692
- *
692
+ *
693
693
  * \details Convert the unsigned integer value \p i to a half-precision floating-point
694
694
  * value in round-to-nearest-even mode.
695
- * \param[in] i - unsigned int. Is only being read.
696
- *
695
+ * \param[in] i - unsigned int. Is only being read.
696
+ *
697
697
  * \returns half
698
- * \retval i converted to half.
698
+ * \retval i converted to half.
699
699
  * \internal
700
700
  * \exception-guarantee no-throw guarantee
701
701
  * \behavior reentrant, thread safe
@@ -705,13 +705,13 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __uint2half_rn(const unsigned int i);
705
705
  /**
706
706
  * \ingroup CUDA_MATH__HALF_MISC
707
707
  * \brief Convert an unsigned integer to a half in round-towards-zero mode.
708
- *
708
+ *
709
709
  * \details Convert the unsigned integer value \p i to a half-precision floating-point
710
710
  * value in round-towards-zero mode.
711
- * \param[in] i - unsigned int. Is only being read.
712
- *
711
+ * \param[in] i - unsigned int. Is only being read.
712
+ *
713
713
  * \returns half
714
- * \retval i converted to half.
714
+ * \retval i converted to half.
715
715
  * \internal
716
716
  * \exception-guarantee no-throw guarantee
717
717
  * \behavior reentrant, thread safe
@@ -721,13 +721,13 @@ __CUDA_FP16_DECL__ __half __uint2half_rz(const unsigned int i);
721
721
  /**
722
722
  * \ingroup CUDA_MATH__HALF_MISC
723
723
  * \brief Convert an unsigned integer to a half in round-down mode.
724
- *
724
+ *
725
725
  * \details Convert the unsigned integer value \p i to a half-precision floating-point
726
726
  * value in round-down mode.
727
- * \param[in] i - unsigned int. Is only being read.
728
- *
727
+ * \param[in] i - unsigned int. Is only being read.
728
+ *
729
729
  * \returns half
730
- * \retval i converted to half.
730
+ * \retval i converted to half.
731
731
  * \internal
732
732
  * \exception-guarantee no-throw guarantee
733
733
  * \behavior reentrant, thread safe
@@ -737,13 +737,13 @@ __CUDA_FP16_DECL__ __half __uint2half_rd(const unsigned int i);
737
737
  /**
738
738
  * \ingroup CUDA_MATH__HALF_MISC
739
739
  * \brief Convert an unsigned integer to a half in round-up mode.
740
- *
740
+ *
741
741
  * \details Convert the unsigned integer value \p i to a half-precision floating-point
742
742
  * value in round-up mode.
743
- * \param[in] i - unsigned int. Is only being read.
744
- *
743
+ * \param[in] i - unsigned int. Is only being read.
744
+ *
745
745
  * \returns half
746
- * \retval i converted to half.
746
+ * \retval i converted to half.
747
747
  * \internal
748
748
  * \exception-guarantee no-throw guarantee
749
749
  * \behavior reentrant, thread safe
@@ -755,13 +755,13 @@ __CUDA_FP16_DECL__ __half __uint2half_ru(const unsigned int i);
755
755
  * \ingroup CUDA_MATH__HALF_MISC
756
756
  * \brief Convert a half to an unsigned short integer in round-to-nearest-even
757
757
  * mode.
758
- *
758
+ *
759
759
  * \details Convert the half-precision floating-point value \p h to an unsigned short
760
760
  * integer in round-to-nearest-even mode.
761
- * \param[in] h - half. Is only being read.
762
- *
761
+ * \param[in] h - half. Is only being read.
762
+ *
763
763
  * \returns unsigned short int
764
- * \retval h converted to an unsigned short integer.
764
+ * \retval h converted to an unsigned short integer.
765
765
  * \internal
766
766
  * \exception-guarantee no-throw guarantee
767
767
  * \behavior reentrant, thread safe
@@ -772,13 +772,13 @@ __CUDA_FP16_DECL__ unsigned short int __half2ushort_rn(const __half h);
772
772
  * \ingroup CUDA_MATH__HALF_MISC
773
773
  * \brief Convert a half to an unsigned short integer in round-towards-zero
774
774
  * mode.
775
- *
775
+ *
776
776
  * \details Convert the half-precision floating-point value \p h to an unsigned short
777
777
  * integer in round-towards-zero mode.
778
- * \param[in] h - half. Is only being read.
779
- *
778
+ * \param[in] h - half. Is only being read.
779
+ *
780
780
  * \returns unsigned short int
781
- * \retval h converted to an unsigned short integer.
781
+ * \retval h converted to an unsigned short integer.
782
782
  * \internal
783
783
  * \exception-guarantee no-throw guarantee
784
784
  * \behavior reentrant, thread safe
@@ -788,25 +788,25 @@ __CUDA_HOSTDEVICE_FP16_DECL__ unsigned short int __half2ushort_rz(const __half h
788
788
  /**
789
789
  * \ingroup CUDA_MATH__HALF_MISC
790
790
  * \brief Convert a half to an unsigned short integer in round-down mode.
791
- *
791
+ *
792
792
  * \details Convert the half-precision floating-point value \p h to an unsigned short
793
793
  * integer in round-down mode.
794
- * \param[in] h - half. Is only being read.
795
- *
794
+ * \param[in] h - half. Is only being read.
795
+ *
796
796
  * \returns unsigned short int
797
- * \retval h converted to an unsigned short integer.
797
+ * \retval h converted to an unsigned short integer.
798
798
  */
799
799
  __CUDA_FP16_DECL__ unsigned short int __half2ushort_rd(const __half h);
800
800
  /**
801
801
  * \ingroup CUDA_MATH__HALF_MISC
802
802
  * \brief Convert a half to an unsigned short integer in round-up mode.
803
- *
803
+ *
804
804
  * \details Convert the half-precision floating-point value \p h to an unsigned short
805
805
  * integer in round-up mode.
806
- * \param[in] h - half. Is only being read.
807
- *
806
+ * \param[in] h - half. Is only being read.
807
+ *
808
808
  * \returns unsigned short int
809
- * \retval h converted to an unsigned short integer.
809
+ * \retval h converted to an unsigned short integer.
810
810
  */
811
811
  __CUDA_FP16_DECL__ unsigned short int __half2ushort_ru(const __half h);
812
812
 
@@ -814,13 +814,13 @@ __CUDA_FP16_DECL__ unsigned short int __half2ushort_ru(const __half h);
814
814
  * \ingroup CUDA_MATH__HALF_MISC
815
815
  * \brief Convert an unsigned short integer to a half in round-to-nearest-even
816
816
  * mode.
817
- *
817
+ *
818
818
  * \details Convert the unsigned short integer value \p i to a half-precision floating-point
819
819
  * value in round-to-nearest-even mode.
820
- * \param[in] i - unsigned short int. Is only being read.
821
- *
820
+ * \param[in] i - unsigned short int. Is only being read.
821
+ *
822
822
  * \returns half
823
- * \retval i converted to half.
823
+ * \retval i converted to half.
824
824
  * \internal
825
825
  * \exception-guarantee no-throw guarantee
826
826
  * \behavior reentrant, thread safe
@@ -831,13 +831,13 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __ushort2half_rn(const unsigned short int i
831
831
  * \ingroup CUDA_MATH__HALF_MISC
832
832
  * \brief Convert an unsigned short integer to a half in round-towards-zero
833
833
  * mode.
834
- *
834
+ *
835
835
  * \details Convert the unsigned short integer value \p i to a half-precision floating-point
836
836
  * value in round-towards-zero mode.
837
- * \param[in] i - unsigned short int. Is only being read.
838
- *
837
+ * \param[in] i - unsigned short int. Is only being read.
838
+ *
839
839
  * \returns half
840
- * \retval i converted to half.
840
+ * \retval i converted to half.
841
841
  * \internal
842
842
  * \exception-guarantee no-throw guarantee
843
843
  * \behavior reentrant, thread safe
@@ -847,13 +847,13 @@ __CUDA_FP16_DECL__ __half __ushort2half_rz(const unsigned short int i);
847
847
  /**
848
848
  * \ingroup CUDA_MATH__HALF_MISC
849
849
  * \brief Convert an unsigned short integer to a half in round-down mode.
850
- *
850
+ *
851
851
  * \details Convert the unsigned short integer value \p i to a half-precision floating-point
852
852
  * value in round-down mode.
853
- * \param[in] i - unsigned short int. Is only being read.
854
- *
853
+ * \param[in] i - unsigned short int. Is only being read.
854
+ *
855
855
  * \returns half
856
- * \retval i converted to half.
856
+ * \retval i converted to half.
857
857
  * \internal
858
858
  * \exception-guarantee no-throw guarantee
859
859
  * \behavior reentrant, thread safe
@@ -863,13 +863,13 @@ __CUDA_FP16_DECL__ __half __ushort2half_rd(const unsigned short int i);
863
863
  /**
864
864
  * \ingroup CUDA_MATH__HALF_MISC
865
865
  * \brief Convert an unsigned short integer to a half in round-up mode.
866
- *
866
+ *
867
867
  * \details Convert the unsigned short integer value \p i to a half-precision floating-point
868
868
  * value in round-up mode.
869
- * \param[in] i - unsigned short int. Is only being read.
870
- *
869
+ * \param[in] i - unsigned short int. Is only being read.
870
+ *
871
871
  * \returns half
872
- * \retval i converted to half.
872
+ * \retval i converted to half.
873
873
  * \internal
874
874
  * \exception-guarantee no-throw guarantee
875
875
  * \behavior reentrant, thread safe
@@ -881,13 +881,13 @@ __CUDA_FP16_DECL__ __half __ushort2half_ru(const unsigned short int i);
881
881
  * \ingroup CUDA_MATH__HALF_MISC
882
882
  * \brief Convert a half to an unsigned 64-bit integer in round-to-nearest-even
883
883
  * mode.
884
- *
884
+ *
885
885
  * \details Convert the half-precision floating-point value \p h to an unsigned 64-bit
886
886
  * integer in round-to-nearest-even mode.
887
- * \param[in] h - half. Is only being read.
888
- *
887
+ * \param[in] h - half. Is only being read.
888
+ *
889
889
  * \returns unsigned long long int
890
- * \retval h converted to an unsigned 64-bit integer.
890
+ * \retval h converted to an unsigned 64-bit integer.
891
891
  * \internal
892
892
  * \exception-guarantee no-throw guarantee
893
893
  * \behavior reentrant, thread safe
@@ -898,13 +898,13 @@ __CUDA_FP16_DECL__ unsigned long long int __half2ull_rn(const __half h);
898
898
  * \ingroup CUDA_MATH__HALF_MISC
899
899
  * \brief Convert a half to an unsigned 64-bit integer in round-towards-zero
900
900
  * mode.
901
- *
901
+ *
902
902
  * \details Convert the half-precision floating-point value \p h to an unsigned 64-bit
903
903
  * integer in round-towards-zero mode.
904
- * \param[in] h - half. Is only being read.
905
- *
904
+ * \param[in] h - half. Is only being read.
905
+ *
906
906
  * \returns unsigned long long int
907
- * \retval h converted to an unsigned 64-bit integer.
907
+ * \retval h converted to an unsigned 64-bit integer.
908
908
  * \internal
909
909
  * \exception-guarantee no-throw guarantee
910
910
  * \behavior reentrant, thread safe
@@ -914,13 +914,13 @@ __CUDA_HOSTDEVICE_FP16_DECL__ unsigned long long int __half2ull_rz(const __half
914
914
  /**
915
915
  * \ingroup CUDA_MATH__HALF_MISC
916
916
  * \brief Convert a half to an unsigned 64-bit integer in round-down mode.
917
- *
917
+ *
918
918
  * \details Convert the half-precision floating-point value \p h to an unsigned 64-bit
919
919
  * integer in round-down mode.
920
- * \param[in] h - half. Is only being read.
921
- *
920
+ * \param[in] h - half. Is only being read.
921
+ *
922
922
  * \returns unsigned long long int
923
- * \retval h converted to an unsigned 64-bit integer.
923
+ * \retval h converted to an unsigned 64-bit integer.
924
924
  * \internal
925
925
  * \exception-guarantee no-throw guarantee
926
926
  * \behavior reentrant, thread safe
@@ -930,13 +930,13 @@ __CUDA_FP16_DECL__ unsigned long long int __half2ull_rd(const __half h);
930
930
  /**
931
931
  * \ingroup CUDA_MATH__HALF_MISC
932
932
  * \brief Convert a half to an unsigned 64-bit integer in round-up mode.
933
- *
933
+ *
934
934
  * \details Convert the half-precision floating-point value \p h to an unsigned 64-bit
935
935
  * integer in round-up mode.
936
- * \param[in] h - half. Is only being read.
937
- *
936
+ * \param[in] h - half. Is only being read.
937
+ *
938
938
  * \returns unsigned long long int
939
- * \retval h converted to an unsigned 64-bit integer.
939
+ * \retval h converted to an unsigned 64-bit integer.
940
940
  * \internal
941
941
  * \exception-guarantee no-throw guarantee
942
942
  * \behavior reentrant, thread safe
@@ -948,13 +948,13 @@ __CUDA_FP16_DECL__ unsigned long long int __half2ull_ru(const __half h);
948
948
  * \ingroup CUDA_MATH__HALF_MISC
949
949
  * \brief Convert an unsigned 64-bit integer to a half in round-to-nearest-even
950
950
  * mode.
951
- *
951
+ *
952
952
  * \details Convert the unsigned 64-bit integer value \p i to a half-precision floating-point
953
953
  * value in round-to-nearest-even mode.
954
- * \param[in] i - unsigned long long int. Is only being read.
955
- *
954
+ * \param[in] i - unsigned long long int. Is only being read.
955
+ *
956
956
  * \returns half
957
- * \retval i converted to half.
957
+ * \retval i converted to half.
958
958
  * \internal
959
959
  * \exception-guarantee no-throw guarantee
960
960
  * \behavior reentrant, thread safe
@@ -965,13 +965,13 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __ull2half_rn(const unsigned long long int
965
965
  * \ingroup CUDA_MATH__HALF_MISC
966
966
  * \brief Convert an unsigned 64-bit integer to a half in round-towards-zero
967
967
  * mode.
968
- *
968
+ *
969
969
  * \details Convert the unsigned 64-bit integer value \p i to a half-precision floating-point
970
970
  * value in round-towards-zero mode.
971
- * \param[in] i - unsigned long long int. Is only being read.
972
- *
971
+ * \param[in] i - unsigned long long int. Is only being read.
972
+ *
973
973
  * \returns half
974
- * \retval i converted to half.
974
+ * \retval i converted to half.
975
975
  * \internal
976
976
  * \exception-guarantee no-throw guarantee
977
977
  * \behavior reentrant, thread safe
@@ -981,13 +981,13 @@ __CUDA_FP16_DECL__ __half __ull2half_rz(const unsigned long long int i);
981
981
  /**
982
982
  * \ingroup CUDA_MATH__HALF_MISC
983
983
  * \brief Convert an unsigned 64-bit integer to a half in round-down mode.
984
- *
984
+ *
985
985
  * \details Convert the unsigned 64-bit integer value \p i to a half-precision floating-point
986
986
  * value in round-down mode.
987
- * \param[in] i - unsigned long long int. Is only being read.
988
- *
987
+ * \param[in] i - unsigned long long int. Is only being read.
988
+ *
989
989
  * \returns half
990
- * \retval i converted to half.
990
+ * \retval i converted to half.
991
991
  * \internal
992
992
  * \exception-guarantee no-throw guarantee
993
993
  * \behavior reentrant, thread safe
@@ -997,13 +997,13 @@ __CUDA_FP16_DECL__ __half __ull2half_rd(const unsigned long long int i);
997
997
  /**
998
998
  * \ingroup CUDA_MATH__HALF_MISC
999
999
  * \brief Convert an unsigned 64-bit integer to a half in round-up mode.
1000
- *
1000
+ *
1001
1001
  * \details Convert the unsigned 64-bit integer value \p i to a half-precision floating-point
1002
1002
  * value in round-up mode.
1003
- * \param[in] i - unsigned long long int. Is only being read.
1004
- *
1003
+ * \param[in] i - unsigned long long int. Is only being read.
1004
+ *
1005
1005
  * \returns half
1006
- * \retval i converted to half.
1006
+ * \retval i converted to half.
1007
1007
  * \internal
1008
1008
  * \exception-guarantee no-throw guarantee
1009
1009
  * \behavior reentrant, thread safe
@@ -1015,13 +1015,13 @@ __CUDA_FP16_DECL__ __half __ull2half_ru(const unsigned long long int i);
1015
1015
  * \ingroup CUDA_MATH__HALF_MISC
1016
1016
  * \brief Convert a half to a signed 64-bit integer in round-to-nearest-even
1017
1017
  * mode.
1018
- *
1018
+ *
1019
1019
  * \details Convert the half-precision floating-point value \p h to a signed 64-bit
1020
1020
  * integer in round-to-nearest-even mode.
1021
- * \param[in] h - half. Is only being read.
1022
- *
1021
+ * \param[in] h - half. Is only being read.
1022
+ *
1023
1023
  * \returns long long int
1024
- * \retval h converted to a signed 64-bit integer.
1024
+ * \retval h converted to a signed 64-bit integer.
1025
1025
  * \internal
1026
1026
  * \exception-guarantee no-throw guarantee
1027
1027
  * \behavior reentrant, thread safe
@@ -1031,13 +1031,13 @@ __CUDA_FP16_DECL__ long long int __half2ll_rn(const __half h);
1031
1031
  /**
1032
1032
  * \ingroup CUDA_MATH__HALF_MISC
1033
1033
  * \brief Convert a half to a signed 64-bit integer in round-towards-zero mode.
1034
- *
1034
+ *
1035
1035
  * \details Convert the half-precision floating-point value \p h to a signed 64-bit
1036
1036
  * integer in round-towards-zero mode.
1037
- * \param[in] h - half. Is only being read.
1038
- *
1037
+ * \param[in] h - half. Is only being read.
1038
+ *
1039
1039
  * \returns long long int
1040
- * \retval h converted to a signed 64-bit integer.
1040
+ * \retval h converted to a signed 64-bit integer.
1041
1041
  * \internal
1042
1042
  * \exception-guarantee no-throw guarantee
1043
1043
  * \behavior reentrant, thread safe
@@ -1047,13 +1047,13 @@ __CUDA_HOSTDEVICE_FP16_DECL__ long long int __half2ll_rz(const __half h);
1047
1047
  /**
1048
1048
  * \ingroup CUDA_MATH__HALF_MISC
1049
1049
  * \brief Convert a half to a signed 64-bit integer in round-down mode.
1050
- *
1050
+ *
1051
1051
  * \details Convert the half-precision floating-point value \p h to a signed 64-bit
1052
1052
  * integer in round-down mode.
1053
- * \param[in] h - half. Is only being read.
1054
- *
1053
+ * \param[in] h - half. Is only being read.
1054
+ *
1055
1055
  * \returns long long int
1056
- * \retval h converted to a signed 64-bit integer.
1056
+ * \retval h converted to a signed 64-bit integer.
1057
1057
  * \internal
1058
1058
  * \exception-guarantee no-throw guarantee
1059
1059
  * \behavior reentrant, thread safe
@@ -1063,13 +1063,13 @@ __CUDA_FP16_DECL__ long long int __half2ll_rd(const __half h);
1063
1063
  /**
1064
1064
  * \ingroup CUDA_MATH__HALF_MISC
1065
1065
  * \brief Convert a half to a signed 64-bit integer in round-up mode.
1066
- *
1066
+ *
1067
1067
  * \details Convert the half-precision floating-point value \p h to a signed 64-bit
1068
1068
  * integer in round-up mode.
1069
- * \param[in] h - half. Is only being read.
1070
- *
1069
+ * \param[in] h - half. Is only being read.
1070
+ *
1071
1071
  * \returns long long int
1072
- * \retval h converted to a signed 64-bit integer.
1072
+ * \retval h converted to a signed 64-bit integer.
1073
1073
  * \internal
1074
1074
  * \exception-guarantee no-throw guarantee
1075
1075
  * \behavior reentrant, thread safe
@@ -1081,13 +1081,13 @@ __CUDA_FP16_DECL__ long long int __half2ll_ru(const __half h);
1081
1081
  * \ingroup CUDA_MATH__HALF_MISC
1082
1082
  * \brief Convert a signed 64-bit integer to a half in round-to-nearest-even
1083
1083
  * mode.
1084
- *
1084
+ *
1085
1085
  * \details Convert the signed 64-bit integer value \p i to a half-precision floating-point
1086
1086
  * value in round-to-nearest-even mode.
1087
- * \param[in] i - long long int. Is only being read.
1088
- *
1087
+ * \param[in] i - long long int. Is only being read.
1088
+ *
1089
1089
  * \returns half
1090
- * \retval i converted to half.
1090
+ * \retval i converted to half.
1091
1091
  * \internal
1092
1092
  * \exception-guarantee no-throw guarantee
1093
1093
  * \behavior reentrant, thread safe
@@ -1097,25 +1097,25 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __ll2half_rn(const long long int i);
1097
1097
  /**
1098
1098
  * \ingroup CUDA_MATH__HALF_MISC
1099
1099
  * \brief Convert a signed 64-bit integer to a half in round-towards-zero mode.
1100
- *
1100
+ *
1101
1101
  * \details Convert the signed 64-bit integer value \p i to a half-precision floating-point
1102
1102
  * value in round-towards-zero mode.
1103
- * \param[in] i - long long int. Is only being read.
1104
- *
1103
+ * \param[in] i - long long int. Is only being read.
1104
+ *
1105
1105
  * \returns half
1106
- * \retval i converted to half.
1106
+ * \retval i converted to half.
1107
1107
  */
1108
1108
  __CUDA_FP16_DECL__ __half __ll2half_rz(const long long int i);
1109
1109
  /**
1110
1110
  * \ingroup CUDA_MATH__HALF_MISC
1111
1111
  * \brief Convert a signed 64-bit integer to a half in round-down mode.
1112
- *
1112
+ *
1113
1113
  * \details Convert the signed 64-bit integer value \p i to a half-precision floating-point
1114
1114
  * value in round-down mode.
1115
- * \param[in] i - long long int. Is only being read.
1116
- *
1115
+ * \param[in] i - long long int. Is only being read.
1116
+ *
1117
1117
  * \returns half
1118
- * \retval i converted to half.
1118
+ * \retval i converted to half.
1119
1119
  * \internal
1120
1120
  * \exception-guarantee no-throw guarantee
1121
1121
  * \behavior reentrant, thread safe
@@ -1125,13 +1125,13 @@ __CUDA_FP16_DECL__ __half __ll2half_rd(const long long int i);
1125
1125
  /**
1126
1126
  * \ingroup CUDA_MATH__HALF_MISC
1127
1127
  * \brief Convert a signed 64-bit integer to a half in round-up mode.
1128
- *
1128
+ *
1129
1129
  * \details Convert the signed 64-bit integer value \p i to a half-precision floating-point
1130
1130
  * value in round-up mode.
1131
- * \param[in] i - long long int. Is only being read.
1132
- *
1131
+ * \param[in] i - long long int. Is only being read.
1132
+ *
1133
1133
  * \returns half
1134
- * \retval i converted to half.
1134
+ * \retval i converted to half.
1135
1135
  * \internal
1136
1136
  * \exception-guarantee no-throw guarantee
1137
1137
  * \behavior reentrant, thread safe
@@ -1142,13 +1142,13 @@ __CUDA_FP16_DECL__ __half __ll2half_ru(const long long int i);
1142
1142
  /**
1143
1143
  * \ingroup CUDA_MATH__HALF_FUNCTIONS
1144
1144
  * \brief Truncate input argument to the integral part.
1145
- *
1145
+ *
1146
1146
  * \details Round \p h to the nearest integer value that does not exceed \p h in
1147
1147
  * magnitude.
1148
- * \param[in] h - half. Is only being read.
1149
- *
1148
+ * \param[in] h - half. Is only being read.
1149
+ *
1150
1150
  * \returns half
1151
- * \retval The truncated integer value.
1151
+ * \retval The truncated integer value.
1152
1152
  * \internal
1153
1153
  * \exception-guarantee no-throw guarantee
1154
1154
  * \behavior reentrant, thread safe
@@ -1158,12 +1158,12 @@ __CUDA_FP16_DECL__ __half htrunc(const __half h);
1158
1158
  /**
1159
1159
  * \ingroup CUDA_MATH__HALF_FUNCTIONS
1160
1160
  * \brief Calculate ceiling of the input argument.
1161
- *
1161
+ *
1162
1162
  * \details Compute the smallest integer value not less than \p h.
1163
- * \param[in] h - half. Is only being read.
1164
- *
1163
+ * \param[in] h - half. Is only being read.
1164
+ *
1165
1165
  * \returns half
1166
- * \retval The smallest integer value not less than \p h.
1166
+ * \retval The smallest integer value not less than \p h.
1167
1167
  * \internal
1168
1168
  * \exception-guarantee no-throw guarantee
1169
1169
  * \behavior reentrant, thread safe
@@ -1173,12 +1173,12 @@ __CUDA_FP16_DECL__ __half hceil(const __half h);
1173
1173
  /**
1174
1174
  * \ingroup CUDA_MATH__HALF_FUNCTIONS
1175
1175
  * \brief Calculate the largest integer less than or equal to \p h.
1176
- *
1176
+ *
1177
1177
  * \details Calculate the largest integer value which is less than or equal to \p h.
1178
- * \param[in] h - half. Is only being read.
1179
- *
1178
+ * \param[in] h - half. Is only being read.
1179
+ *
1180
1180
  * \returns half
1181
- * \retval The largest integer value which is less than or equal to \p h.
1181
+ * \retval The largest integer value which is less than or equal to \p h.
1182
1182
  * \internal
1183
1183
  * \exception-guarantee no-throw guarantee
1184
1184
  * \behavior reentrant, thread safe
@@ -1189,13 +1189,13 @@ __CUDA_FP16_DECL__ __half hfloor(const __half h);
1189
1189
  * \ingroup CUDA_MATH__HALF_FUNCTIONS
1190
1190
  * \brief Round input to nearest integer value in half-precision floating-point
1191
1191
  * number.
1192
- *
1192
+ *
1193
1193
  * \details Round \p h to the nearest integer value in half-precision floating-point
1194
1194
  * format, with halfway cases rounded to the nearest even integer value.
1195
- * \param[in] h - half. Is only being read.
1196
- *
1195
+ * \param[in] h - half. Is only being read.
1196
+ *
1197
1197
  * \returns half
1198
- * \retval The nearest integer to \p h.
1198
+ * \retval The nearest integer to \p h.
1199
1199
  * \internal
1200
1200
  * \exception-guarantee no-throw guarantee
1201
1201
  * \behavior reentrant, thread safe
@@ -1206,13 +1206,13 @@ __CUDA_FP16_DECL__ __half hrint(const __half h);
1206
1206
  /**
1207
1207
  * \ingroup CUDA_MATH__HALF2_FUNCTIONS
1208
1208
  * \brief Truncate \p half2 vector input argument to the integral part.
1209
- *
1209
+ *
1210
1210
  * \details Round each component of vector \p h to the nearest integer value that does
1211
1211
  * not exceed \p h in magnitude.
1212
- * \param[in] h - half2. Is only being read.
1213
- *
1212
+ * \param[in] h - half2. Is only being read.
1213
+ *
1214
1214
  * \returns half2
1215
- * \retval The truncated \p h.
1215
+ * \retval The truncated \p h.
1216
1216
  * \internal
1217
1217
  * \exception-guarantee no-throw guarantee
1218
1218
  * \behavior reentrant, thread safe
@@ -1222,13 +1222,13 @@ __CUDA_FP16_DECL__ __half2 h2trunc(const __half2 h);
1222
1222
  /**
1223
1223
  * \ingroup CUDA_MATH__HALF2_FUNCTIONS
1224
1224
  * \brief Calculate \p half2 vector ceiling of the input argument.
1225
- *
1225
+ *
1226
1226
  * \details For each component of vector \p h compute the smallest integer value not less
1227
1227
  * than \p h.
1228
- * \param[in] h - half2. Is only being read.
1229
- *
1228
+ * \param[in] h - half2. Is only being read.
1229
+ *
1230
1230
  * \returns half2
1231
- * \retval The vector of smallest integers not less than \p h.
1231
+ * \retval The vector of smallest integers not less than \p h.
1232
1232
  * \internal
1233
1233
  * \exception-guarantee no-throw guarantee
1234
1234
  * \behavior reentrant, thread safe
@@ -1238,13 +1238,13 @@ __CUDA_FP16_DECL__ __half2 h2ceil(const __half2 h);
1238
1238
  /**
1239
1239
  * \ingroup CUDA_MATH__HALF2_FUNCTIONS
1240
1240
  * \brief Calculate the largest integer less than or equal to \p h.
1241
- *
1241
+ *
1242
1242
  * \details For each component of vector \p h calculate the largest integer value which
1243
1243
  * is less than or equal to \p h.
1244
- * \param[in] h - half2. Is only being read.
1245
- *
1244
+ * \param[in] h - half2. Is only being read.
1245
+ *
1246
1246
  * \returns half2
1247
- * \retval The vector of largest integers which is less than or equal to \p h.
1247
+ * \retval The vector of largest integers which is less than or equal to \p h.
1248
1248
  * \internal
1249
1249
  * \exception-guarantee no-throw guarantee
1250
1250
  * \behavior reentrant, thread safe
@@ -1255,14 +1255,14 @@ __CUDA_FP16_DECL__ __half2 h2floor(const __half2 h);
1255
1255
  * \ingroup CUDA_MATH__HALF2_FUNCTIONS
1256
1256
  * \brief Round input to nearest integer value in half-precision floating-point
1257
1257
  * number.
1258
- *
1258
+ *
1259
1259
  * \details Round each component of \p half2 vector \p h to the nearest integer value in
1260
1260
  * half-precision floating-point format, with halfway cases rounded to the
1261
1261
  * nearest even integer value.
1262
- * \param[in] h - half2. Is only being read.
1263
- *
1262
+ * \param[in] h - half2. Is only being read.
1263
+ *
1264
1264
  * \returns half2
1265
- * \retval The vector of rounded integer values.
1265
+ * \retval The vector of rounded integer values.
1266
1266
  * \internal
1267
1267
  * \exception-guarantee no-throw guarantee
1268
1268
  * \behavior reentrant, thread safe
@@ -1273,13 +1273,13 @@ __CUDA_FP16_DECL__ __half2 h2rint(const __half2 h);
1273
1273
  /**
1274
1274
  * \ingroup CUDA_MATH__HALF_MISC
1275
1275
  * \brief Returns \p half2 with both halves equal to the input value.
1276
- *
1276
+ *
1277
1277
  * \details Returns \p half2 number with both halves equal to the input \p a \p half
1278
1278
  * number.
1279
- * \param[in] a - half. Is only being read.
1280
- *
1279
+ * \param[in] a - half. Is only being read.
1280
+ *
1281
1281
  * \returns half2
1282
- * \retval The vector which has both its halves equal to the input \p a.
1282
+ * \retval The vector which has both its halves equal to the input \p a.
1283
1283
  * \internal
1284
1284
  * \exception-guarantee no-throw guarantee
1285
1285
  * \behavior reentrant, thread safe
@@ -1289,13 +1289,13 @@ __CUDA_FP16_DECL__ __half2 __half2half2(const __half a);
1289
1289
  /**
1290
1290
  * \ingroup CUDA_MATH__HALF_MISC
1291
1291
  * \brief Swaps both halves of the \p half2 input.
1292
- *
1292
+ *
1293
1293
  * \details Swaps both halves of the \p half2 input and returns a new \p half2 number
1294
1294
  * with swapped halves.
1295
- * \param[in] a - half2. Is only being read.
1296
- *
1295
+ * \param[in] a - half2. Is only being read.
1296
+ *
1297
1297
  * \returns half2
1298
- * \retval a with its halves being swapped.
1298
+ * \retval a with its halves being swapped.
1299
1299
  * \internal
1300
1300
  * \exception-guarantee no-throw guarantee
1301
1301
  * \behavior reentrant, thread safe
@@ -1305,17 +1305,17 @@ __CUDA_FP16_DECL__ __half2 __lowhigh2highlow(const __half2 a);
1305
1305
  /**
1306
1306
  * \ingroup CUDA_MATH__HALF_MISC
1307
1307
  * \brief Extracts low 16 bits from each of the two \p half2 inputs and combines
1308
- * into one \p half2 number.
1309
- *
1308
+ * into one \p half2 number.
1309
+ *
1310
1310
  * \details Extracts low 16 bits from each of the two \p half2 inputs and combines into
1311
1311
  * one \p half2 number. Low 16 bits from input \p a is stored in low 16 bits of
1312
1312
  * the return value, low 16 bits from input \p b is stored in high 16 bits of
1313
- * the return value.
1314
- * \param[in] a - half2. Is only being read.
1315
- * \param[in] b - half2. Is only being read.
1316
- *
1313
+ * the return value.
1314
+ * \param[in] a - half2. Is only being read.
1315
+ * \param[in] b - half2. Is only being read.
1316
+ *
1317
1317
  * \returns half2
1318
- * \retval The low 16 bits of \p a and of \p b.
1318
+ * \retval The low 16 bits of \p a and of \p b.
1319
1319
  * \internal
1320
1320
  * \exception-guarantee no-throw guarantee
1321
1321
  * \behavior reentrant, thread safe
@@ -1326,16 +1326,16 @@ __CUDA_FP16_DECL__ __half2 __lows2half2(const __half2 a, const __half2 b);
1326
1326
  * \ingroup CUDA_MATH__HALF_MISC
1327
1327
  * \brief Extracts high 16 bits from each of the two \p half2 inputs and
1328
1328
  * combines into one \p half2 number.
1329
- *
1329
+ *
1330
1330
  * \details Extracts high 16 bits from each of the two \p half2 inputs and combines into
1331
1331
  * one \p half2 number. High 16 bits from input \p a is stored in low 16 bits of
1332
1332
  * the return value, high 16 bits from input \p b is stored in high 16 bits of
1333
1333
  * the return value.
1334
- * \param[in] a - half2. Is only being read.
1335
- * \param[in] b - half2. Is only being read.
1336
- *
1334
+ * \param[in] a - half2. Is only being read.
1335
+ * \param[in] b - half2. Is only being read.
1336
+ *
1337
1337
  * \returns half2
1338
- * \retval The high 16 bits of \p a and of \p b.
1338
+ * \retval The high 16 bits of \p a and of \p b.
1339
1339
  * \internal
1340
1340
  * \exception-guarantee no-throw guarantee
1341
1341
  * \behavior reentrant, thread safe
@@ -1347,10 +1347,10 @@ __CUDA_FP16_DECL__ __half2 __highs2half2(const __half2 a, const __half2 b);
1347
1347
  * \brief Returns high 16 bits of \p half2 input.
1348
1348
  *
1349
1349
  * \details Returns high 16 bits of \p half2 input \p a.
1350
- * \param[in] a - half2. Is only being read.
1350
+ * \param[in] a - half2. Is only being read.
1351
1351
  *
1352
1352
  * \returns half
1353
- * \retval The high 16 bits of the input.
1353
+ * \retval The high 16 bits of the input.
1354
1354
  * \internal
1355
1355
  * \exception-guarantee no-throw guarantee
1356
1356
  * \behavior reentrant, thread safe
@@ -1362,10 +1362,10 @@ __CUDA_FP16_DECL__ __half __high2half(const __half2 a);
1362
1362
  * \brief Returns low 16 bits of \p half2 input.
1363
1363
  *
1364
1364
  * \details Returns low 16 bits of \p half2 input \p a.
1365
- * \param[in] a - half2. Is only being read.
1365
+ * \param[in] a - half2. Is only being read.
1366
1366
  *
1367
1367
  * \returns half
1368
- * \retval Returns \p half which contains low 16 bits of the input \p a.
1368
+ * \retval Returns \p half which contains low 16 bits of the input \p a.
1369
1369
  * \internal
1370
1370
  * \exception-guarantee no-throw guarantee
1371
1371
  * \behavior reentrant, thread safe
@@ -1375,14 +1375,14 @@ __CUDA_FP16_DECL__ __half __low2half(const __half2 a);
1375
1375
  /**
1376
1376
  * \ingroup CUDA_MATH__HALF_COMPARISON
1377
1377
  * \brief Checks if the input \p half number is infinite.
1378
- *
1379
- * \details Checks if the input \p half number \p a is infinite.
1380
- * \param[in] a - half. Is only being read.
1381
- *
1382
- * \returns int
1383
- * \retval -1 iff \p a is equal to negative infinity,
1384
- * \retval 1 iff \p a is equal to positive infinity,
1385
- * \retval 0 otherwise.
1378
+ *
1379
+ * \details Checks if the input \p half number \p a is infinite.
1380
+ * \param[in] a - half. Is only being read.
1381
+ *
1382
+ * \returns int
1383
+ * \retval -1 iff \p a is equal to negative infinity,
1384
+ * \retval 1 iff \p a is equal to positive infinity,
1385
+ * \retval 0 otherwise.
1386
1386
  * \internal
1387
1387
  * \exception-guarantee no-throw guarantee
1388
1388
  * \behavior reentrant, thread safe
@@ -1392,15 +1392,15 @@ __CUDA_FP16_DECL__ int __hisinf(const __half a);
1392
1392
  /**
1393
1393
  * \ingroup CUDA_MATH__HALF_MISC
1394
1394
  * \brief Combines two \p half numbers into one \p half2 number.
1395
- *
1395
+ *
1396
1396
  * \details Combines two input \p half number \p a and \p b into one \p half2 number.
1397
1397
  * Input \p a is stored in low 16 bits of the return value, input \p b is stored
1398
1398
  * in high 16 bits of the return value.
1399
- * \param[in] a - half. Is only being read.
1400
- * \param[in] b - half. Is only being read.
1401
- *
1399
+ * \param[in] a - half. Is only being read.
1400
+ * \param[in] b - half. Is only being read.
1401
+ *
1402
1402
  * \returns half2
1403
- * \retval The half2 with one half equal to \p a and the other to \p b.
1403
+ * \retval The half2 with one half equal to \p a and the other to \p b.
1404
1404
  * \internal
1405
1405
  * \exception-guarantee no-throw guarantee
1406
1406
  * \behavior reentrant, thread safe
@@ -1410,13 +1410,13 @@ __CUDA_FP16_DECL__ __half2 __halves2half2(const __half a, const __half b);
1410
1410
  /**
1411
1411
  * \ingroup CUDA_MATH__HALF_MISC
1412
1412
  * \brief Extracts low 16 bits from \p half2 input.
1413
- *
1413
+ *
1414
1414
  * \details Extracts low 16 bits from \p half2 input \p a and returns a new \p half2
1415
1415
  * number which has both halves equal to the extracted bits.
1416
- * \param[in] a - half2. Is only being read.
1417
- *
1416
+ * \param[in] a - half2. Is only being read.
1417
+ *
1418
1418
  * \returns half2
1419
- * \retval The half2 with both halves equal to the low 16 bits of the input.
1419
+ * \retval The half2 with both halves equal to the low 16 bits of the input.
1420
1420
  * \internal
1421
1421
  * \exception-guarantee no-throw guarantee
1422
1422
  * \behavior reentrant, thread safe
@@ -1426,13 +1426,13 @@ __CUDA_FP16_DECL__ __half2 __low2half2(const __half2 a);
1426
1426
  /**
1427
1427
  * \ingroup CUDA_MATH__HALF_MISC
1428
1428
  * \brief Extracts high 16 bits from \p half2 input.
1429
- *
1429
+ *
1430
1430
  * \details Extracts high 16 bits from \p half2 input \p a and returns a new \p half2
1431
1431
  * number which has both halves equal to the extracted bits.
1432
- * \param[in] a - half2. Is only being read.
1433
- *
1432
+ * \param[in] a - half2. Is only being read.
1433
+ *
1434
1434
  * \returns half2
1435
- * \retval The half2 with both halves equal to the high 16 bits of the input.
1435
+ * \retval The half2 with both halves equal to the high 16 bits of the input.
1436
1436
  * \internal
1437
1437
  * \exception-guarantee no-throw guarantee
1438
1438
  * \behavior reentrant, thread safe
@@ -1443,13 +1443,13 @@ __CUDA_FP16_DECL__ __half2 __high2half2(const __half2 a);
1443
1443
  /**
1444
1444
  * \ingroup CUDA_MATH__HALF_MISC
1445
1445
  * \brief Reinterprets bits in a \p half as a signed short integer.
1446
- *
1446
+ *
1447
1447
  * \details Reinterprets the bits in the half-precision floating-point number \p h
1448
- * as a signed short integer.
1449
- * \param[in] h - half. Is only being read.
1450
- *
1448
+ * as a signed short integer.
1449
+ * \param[in] h - half. Is only being read.
1450
+ *
1451
1451
  * \returns short int
1452
- * \retval The reinterpreted value.
1452
+ * \retval The reinterpreted value.
1453
1453
  * \internal
1454
1454
  * \exception-guarantee no-throw guarantee
1455
1455
  * \behavior reentrant, thread safe
@@ -1459,11 +1459,11 @@ __CUDA_FP16_DECL__ short int __half_as_short(const __half h);
1459
1459
  /**
1460
1460
  * \ingroup CUDA_MATH__HALF_MISC
1461
1461
  * \brief Reinterprets bits in a \p half as an unsigned short integer.
1462
- *
1462
+ *
1463
1463
  * \details Reinterprets the bits in the half-precision floating-point \p h
1464
1464
  * as an unsigned short number.
1465
- * \param[in] h - half. Is only being read.
1466
- *
1465
+ * \param[in] h - half. Is only being read.
1466
+ *
1467
1467
  * \returns unsigned short int
1468
1468
  * \retval The reinterpreted value.
1469
1469
  * \internal
@@ -1475,11 +1475,11 @@ __CUDA_FP16_DECL__ unsigned short int __half_as_ushort(const __half h);
1475
1475
  /**
1476
1476
  * \ingroup CUDA_MATH__HALF_MISC
1477
1477
  * \brief Reinterprets bits in a signed short integer as a \p half.
1478
- *
1478
+ *
1479
1479
  * \details Reinterprets the bits in the signed short integer \p i as a
1480
1480
  * half-precision floating-point number.
1481
- * \param[in] i - short int. Is only being read.
1482
- *
1481
+ * \param[in] i - short int. Is only being read.
1482
+ *
1483
1483
  * \returns half
1484
1484
  * \retval The reinterpreted value.
1485
1485
  * \internal
@@ -1491,11 +1491,11 @@ __CUDA_FP16_DECL__ __half __short_as_half(const short int i);
1491
1491
  /**
1492
1492
  * \ingroup CUDA_MATH__HALF_MISC
1493
1493
  * \brief Reinterprets bits in an unsigned short integer as a \p half.
1494
- *
1494
+ *
1495
1495
  * \details Reinterprets the bits in the unsigned short integer \p i as a
1496
1496
  * half-precision floating-point number.
1497
- * \param[in] i - unsigned short int. Is only being read.
1498
- *
1497
+ * \param[in] i - unsigned short int. Is only being read.
1498
+ *
1499
1499
  * \returns half
1500
1500
  * \retval The reinterpreted value.
1501
1501
  * \internal
@@ -1534,22 +1534,22 @@ __CUDA_FP16_DECL__ __DEPRECATED__(__WSB_DEPRECATION_MESSAGE(__shfl_xor)) __half
1534
1534
 
1535
1535
  /**
1536
1536
  * \ingroup CUDA_MATH__HALF_MISC
1537
- * \brief Exchange a variable between threads within a warp. Direct copy from indexed thread.
1538
- *
1539
- * \details Returns the value of var held by the thread whose ID is given by delta.
1540
- * If width is less than warpSize then each subsection of the warp behaves as a separate
1541
- * entity with a starting logical thread ID of 0. If delta is outside the range [0:width-1],
1542
- * the value returned corresponds to the value of var held by the delta modulo width (i.e.
1543
- * within the same subsection). width must have a value which is a power of 2;
1544
- * results are undefined if width is not a power of 2, or is a number greater than
1545
- * warpSize.
1546
- * \param[in] mask - unsigned int. Is only being read.
1547
- * \param[in] var - half2. Is only being read.
1548
- * \param[in] delta - int. Is only being read.
1549
- * \param[in] width - int. Is only being read.
1550
- *
1551
- * \returns Returns the 4-byte word referenced by var from the source thread ID as half2.
1552
- * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1537
+ * \brief Exchange a variable between threads within a warp. Direct copy from indexed thread.
1538
+ *
1539
+ * \details Returns the value of var held by the thread whose ID is given by delta.
1540
+ * If width is less than warpSize then each subsection of the warp behaves as a separate
1541
+ * entity with a starting logical thread ID of 0. If delta is outside the range [0:width-1],
1542
+ * the value returned corresponds to the value of var held by the delta modulo width (i.e.
1543
+ * within the same subsection). width must have a value which is a power of 2;
1544
+ * results are undefined if width is not a power of 2, or is a number greater than
1545
+ * warpSize.
1546
+ * \param[in] mask - unsigned int. Is only being read.
1547
+ * \param[in] var - half2. Is only being read.
1548
+ * \param[in] delta - int. Is only being read.
1549
+ * \param[in] width - int. Is only being read.
1550
+ *
1551
+ * \returns Returns the 4-byte word referenced by var from the source thread ID as half2.
1552
+ * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1553
1553
  * \internal
1554
1554
  * \exception-guarantee no-throw guarantee
1555
1555
  * \behavior not reentrant, not thread safe
@@ -1558,22 +1558,22 @@ __CUDA_FP16_DECL__ __DEPRECATED__(__WSB_DEPRECATION_MESSAGE(__shfl_xor)) __half
1558
1558
  __CUDA_FP16_DECL__ __half2 __shfl_sync(const unsigned mask, const __half2 var, const int delta, const int width = warpSize);
1559
1559
  /**
1560
1560
  * \ingroup CUDA_MATH__HALF_MISC
1561
- * \brief Exchange a variable between threads within a warp. Copy from a thread with lower ID relative to the caller.
1562
- *
1563
- * \details Calculates a source thread ID by subtracting delta from the caller's lane ID.
1564
- * The value of var held by the resulting lane ID is returned: in effect, var is shifted up
1565
- * the warp by delta threads. If width is less than warpSize then each subsection of the warp
1566
- * behaves as a separate entity with a starting logical thread ID of 0. The source thread index
1567
- * will not wrap around the value of width, so effectively the lower delta threads will be unchanged.
1568
- * width must have a value which is a power of 2; results are undefined if width is not a power of 2,
1569
- * or is a number greater than warpSize.
1570
- * \param[in] mask - unsigned int. Is only being read.
1571
- * \param[in] var - half2. Is only being read.
1572
- * \param[in] delta - int. Is only being read.
1573
- * \param[in] width - int. Is only being read.
1574
- *
1575
- * \returns Returns the 4-byte word referenced by var from the source thread ID as half2.
1576
- * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1561
+ * \brief Exchange a variable between threads within a warp. Copy from a thread with lower ID relative to the caller.
1562
+ *
1563
+ * \details Calculates a source thread ID by subtracting delta from the caller's lane ID.
1564
+ * The value of var held by the resulting lane ID is returned: in effect, var is shifted up
1565
+ * the warp by delta threads. If width is less than warpSize then each subsection of the warp
1566
+ * behaves as a separate entity with a starting logical thread ID of 0. The source thread index
1567
+ * will not wrap around the value of width, so effectively the lower delta threads will be unchanged.
1568
+ * width must have a value which is a power of 2; results are undefined if width is not a power of 2,
1569
+ * or is a number greater than warpSize.
1570
+ * \param[in] mask - unsigned int. Is only being read.
1571
+ * \param[in] var - half2. Is only being read.
1572
+ * \param[in] delta - int. Is only being read.
1573
+ * \param[in] width - int. Is only being read.
1574
+ *
1575
+ * \returns Returns the 4-byte word referenced by var from the source thread ID as half2.
1576
+ * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1577
1577
  * \internal
1578
1578
  * \exception-guarantee no-throw guarantee
1579
1579
  * \behavior not reentrant, not thread safe
@@ -1582,22 +1582,22 @@ __CUDA_FP16_DECL__ __half2 __shfl_sync(const unsigned mask, const __half2 var, c
1582
1582
  __CUDA_FP16_DECL__ __half2 __shfl_up_sync(const unsigned mask, const __half2 var, const unsigned int delta, const int width = warpSize);
1583
1583
  /**
1584
1584
  * \ingroup CUDA_MATH__HALF_MISC
1585
- * \brief Exchange a variable between threads within a warp. Copy from a thread with higher ID relative to the caller.
1586
- *
1587
- * \details Calculates a source thread ID by adding delta to the caller's thread ID.
1588
- * The value of var held by the resulting thread ID is returned: this has the effect
1589
- * of shifting var down the warp by delta threads. If width is less than warpSize then
1590
- * each subsection of the warp behaves as a separate entity with a starting logical
1591
- * thread ID of 0. As for __shfl_up_sync(), the ID number of the source thread
1592
- * will not wrap around the value of width and so the upper delta threads
1593
- * will remain unchanged.
1594
- * \param[in] mask - unsigned int. Is only being read.
1595
- * \param[in] var - half2. Is only being read.
1596
- * \param[in] delta - int. Is only being read.
1597
- * \param[in] width - int. Is only being read.
1598
- *
1599
- * \returns Returns the 4-byte word referenced by var from the source thread ID as half2.
1600
- * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1585
+ * \brief Exchange a variable between threads within a warp. Copy from a thread with higher ID relative to the caller.
1586
+ *
1587
+ * \details Calculates a source thread ID by adding delta to the caller's thread ID.
1588
+ * The value of var held by the resulting thread ID is returned: this has the effect
1589
+ * of shifting var down the warp by delta threads. If width is less than warpSize then
1590
+ * each subsection of the warp behaves as a separate entity with a starting logical
1591
+ * thread ID of 0. As for __shfl_up_sync(), the ID number of the source thread
1592
+ * will not wrap around the value of width and so the upper delta threads
1593
+ * will remain unchanged.
1594
+ * \param[in] mask - unsigned int. Is only being read.
1595
+ * \param[in] var - half2. Is only being read.
1596
+ * \param[in] delta - int. Is only being read.
1597
+ * \param[in] width - int. Is only being read.
1598
+ *
1599
+ * \returns Returns the 4-byte word referenced by var from the source thread ID as half2.
1600
+ * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1601
1601
  * \internal
1602
1602
  * \exception-guarantee no-throw guarantee
1603
1603
  * \behavior not reentrant, not thread safe
@@ -1606,21 +1606,21 @@ __CUDA_FP16_DECL__ __half2 __shfl_up_sync(const unsigned mask, const __half2 var
1606
1606
  __CUDA_FP16_DECL__ __half2 __shfl_down_sync(const unsigned mask, const __half2 var, const unsigned int delta, const int width = warpSize);
1607
1607
  /**
1608
1608
  * \ingroup CUDA_MATH__HALF_MISC
1609
- * \brief Exchange a variable between threads within a warp. Copy from a thread based on bitwise XOR of own thread ID.
1610
- *
1611
- * \details Calculates a source thread ID by performing a bitwise XOR of the caller's thread ID with mask:
1612
- * the value of var held by the resulting thread ID is returned. If width is less than warpSize then each
1613
- * group of width consecutive threads are able to access elements from earlier groups of threads,
1614
- * however if they attempt to access elements from later groups of threads their own value of var
1615
- * will be returned. This mode implements a butterfly addressing pattern such as is used in tree
1616
- * reduction and broadcast.
1617
- * \param[in] mask - unsigned int. Is only being read.
1618
- * \param[in] var - half2. Is only being read.
1619
- * \param[in] delta - int. Is only being read.
1620
- * \param[in] width - int. Is only being read.
1621
- *
1622
- * \returns Returns the 4-byte word referenced by var from the source thread ID as half2.
1623
- * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1609
+ * \brief Exchange a variable between threads within a warp. Copy from a thread based on bitwise XOR of own thread ID.
1610
+ *
1611
+ * \details Calculates a source thread ID by performing a bitwise XOR of the caller's thread ID with mask:
1612
+ * the value of var held by the resulting thread ID is returned. If width is less than warpSize then each
1613
+ * group of width consecutive threads are able to access elements from earlier groups of threads,
1614
+ * however if they attempt to access elements from later groups of threads their own value of var
1615
+ * will be returned. This mode implements a butterfly addressing pattern such as is used in tree
1616
+ * reduction and broadcast.
1617
+ * \param[in] mask - unsigned int. Is only being read.
1618
+ * \param[in] var - half2. Is only being read.
1619
+ * \param[in] delta - int. Is only being read.
1620
+ * \param[in] width - int. Is only being read.
1621
+ *
1622
+ * \returns Returns the 4-byte word referenced by var from the source thread ID as half2.
1623
+ * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1624
1624
  * \internal
1625
1625
  * \exception-guarantee no-throw guarantee
1626
1626
  * \behavior not reentrant, not thread safe
@@ -1629,22 +1629,22 @@ __CUDA_FP16_DECL__ __half2 __shfl_down_sync(const unsigned mask, const __half2 v
1629
1629
  __CUDA_FP16_DECL__ __half2 __shfl_xor_sync(const unsigned mask, const __half2 var, const int delta, const int width = warpSize);
1630
1630
  /**
1631
1631
  * \ingroup CUDA_MATH__HALF_MISC
1632
- * \brief Exchange a variable between threads within a warp. Direct copy from indexed thread.
1633
- *
1634
- * \details Returns the value of var held by the thread whose ID is given by delta.
1635
- * If width is less than warpSize then each subsection of the warp behaves as a separate
1636
- * entity with a starting logical thread ID of 0. If delta is outside the range [0:width-1],
1637
- * the value returned corresponds to the value of var held by the delta modulo width (i.e.
1638
- * within the same subsection). width must have a value which is a power of 2;
1639
- * results are undefined if width is not a power of 2, or is a number greater than
1640
- * warpSize.
1641
- * \param[in] mask - unsigned int. Is only being read.
1642
- * \param[in] var - half. Is only being read.
1643
- * \param[in] delta - int. Is only being read.
1644
- * \param[in] width - int. Is only being read.
1645
- *
1646
- * \returns Returns the 2-byte word referenced by var from the source thread ID as half.
1647
- * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1632
+ * \brief Exchange a variable between threads within a warp. Direct copy from indexed thread.
1633
+ *
1634
+ * \details Returns the value of var held by the thread whose ID is given by delta.
1635
+ * If width is less than warpSize then each subsection of the warp behaves as a separate
1636
+ * entity with a starting logical thread ID of 0. If delta is outside the range [0:width-1],
1637
+ * the value returned corresponds to the value of var held by the delta modulo width (i.e.
1638
+ * within the same subsection). width must have a value which is a power of 2;
1639
+ * results are undefined if width is not a power of 2, or is a number greater than
1640
+ * warpSize.
1641
+ * \param[in] mask - unsigned int. Is only being read.
1642
+ * \param[in] var - half. Is only being read.
1643
+ * \param[in] delta - int. Is only being read.
1644
+ * \param[in] width - int. Is only being read.
1645
+ *
1646
+ * \returns Returns the 2-byte word referenced by var from the source thread ID as half.
1647
+ * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1648
1648
  * \internal
1649
1649
  * \exception-guarantee no-throw guarantee
1650
1650
  * \behavior not reentrant, not thread safe
@@ -1653,21 +1653,21 @@ __CUDA_FP16_DECL__ __half2 __shfl_xor_sync(const unsigned mask, const __half2 va
1653
1653
  __CUDA_FP16_DECL__ __half __shfl_sync(const unsigned mask, const __half var, const int delta, const int width = warpSize);
1654
1654
  /**
1655
1655
  * \ingroup CUDA_MATH__HALF_MISC
1656
- * \brief Exchange a variable between threads within a warp. Copy from a thread with lower ID relative to the caller.
1657
- * \details Calculates a source thread ID by subtracting delta from the caller's lane ID.
1658
- * The value of var held by the resulting lane ID is returned: in effect, var is shifted up
1659
- * the warp by delta threads. If width is less than warpSize then each subsection of the warp
1660
- * behaves as a separate entity with a starting logical thread ID of 0. The source thread index
1661
- * will not wrap around the value of width, so effectively the lower delta threads will be unchanged.
1662
- * width must have a value which is a power of 2; results are undefined if width is not a power of 2,
1663
- * or is a number greater than warpSize.
1664
- * \param[in] mask - unsigned int. Is only being read.
1665
- * \param[in] var - half. Is only being read.
1666
- * \param[in] delta - int. Is only being read.
1667
- * \param[in] width - int. Is only being read.
1668
- *
1669
- * \returns Returns the 2-byte word referenced by var from the source thread ID as half.
1670
- * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1656
+ * \brief Exchange a variable between threads within a warp. Copy from a thread with lower ID relative to the caller.
1657
+ * \details Calculates a source thread ID by subtracting delta from the caller's lane ID.
1658
+ * The value of var held by the resulting lane ID is returned: in effect, var is shifted up
1659
+ * the warp by delta threads. If width is less than warpSize then each subsection of the warp
1660
+ * behaves as a separate entity with a starting logical thread ID of 0. The source thread index
1661
+ * will not wrap around the value of width, so effectively the lower delta threads will be unchanged.
1662
+ * width must have a value which is a power of 2; results are undefined if width is not a power of 2,
1663
+ * or is a number greater than warpSize.
1664
+ * \param[in] mask - unsigned int. Is only being read.
1665
+ * \param[in] var - half. Is only being read.
1666
+ * \param[in] delta - int. Is only being read.
1667
+ * \param[in] width - int. Is only being read.
1668
+ *
1669
+ * \returns Returns the 2-byte word referenced by var from the source thread ID as half.
1670
+ * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1671
1671
  * \internal
1672
1672
  * \exception-guarantee no-throw guarantee
1673
1673
  * \behavior not reentrant, not thread safe
@@ -1676,22 +1676,22 @@ __CUDA_FP16_DECL__ __half __shfl_sync(const unsigned mask, const __half var, con
1676
1676
  __CUDA_FP16_DECL__ __half __shfl_up_sync(const unsigned mask, const __half var, const unsigned int delta, const int width = warpSize);
1677
1677
  /**
1678
1678
  * \ingroup CUDA_MATH__HALF_MISC
1679
- * \brief Exchange a variable between threads within a warp. Copy from a thread with higher ID relative to the caller.
1680
- *
1681
- * \details Calculates a source thread ID by adding delta to the caller's thread ID.
1682
- * The value of var held by the resulting thread ID is returned: this has the effect
1683
- * of shifting var down the warp by delta threads. If width is less than warpSize then
1684
- * each subsection of the warp behaves as a separate entity with a starting logical
1685
- * thread ID of 0. As for __shfl_up_sync(), the ID number of the source thread
1686
- * will not wrap around the value of width and so the upper delta threads
1687
- * will remain unchanged.
1688
- * \param[in] mask - unsigned int. Is only being read.
1689
- * \param[in] var - half. Is only being read.
1690
- * \param[in] delta - int. Is only being read.
1691
- * \param[in] width - int. Is only being read.
1692
- *
1693
- * \returns Returns the 2-byte word referenced by var from the source thread ID as half.
1694
- * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1679
+ * \brief Exchange a variable between threads within a warp. Copy from a thread with higher ID relative to the caller.
1680
+ *
1681
+ * \details Calculates a source thread ID by adding delta to the caller's thread ID.
1682
+ * The value of var held by the resulting thread ID is returned: this has the effect
1683
+ * of shifting var down the warp by delta threads. If width is less than warpSize then
1684
+ * each subsection of the warp behaves as a separate entity with a starting logical
1685
+ * thread ID of 0. As for __shfl_up_sync(), the ID number of the source thread
1686
+ * will not wrap around the value of width and so the upper delta threads
1687
+ * will remain unchanged.
1688
+ * \param[in] mask - unsigned int. Is only being read.
1689
+ * \param[in] var - half. Is only being read.
1690
+ * \param[in] delta - int. Is only being read.
1691
+ * \param[in] width - int. Is only being read.
1692
+ *
1693
+ * \returns Returns the 2-byte word referenced by var from the source thread ID as half.
1694
+ * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1695
1695
  * \internal
1696
1696
  * \exception-guarantee no-throw guarantee
1697
1697
  * \behavior not reentrant, not thread safe
@@ -1700,21 +1700,21 @@ __CUDA_FP16_DECL__ __half __shfl_up_sync(const unsigned mask, const __half var,
1700
1700
  __CUDA_FP16_DECL__ __half __shfl_down_sync(const unsigned mask, const __half var, const unsigned int delta, const int width = warpSize);
1701
1701
  /**
1702
1702
  * \ingroup CUDA_MATH__HALF_MISC
1703
- * \brief Exchange a variable between threads within a warp. Copy from a thread based on bitwise XOR of own thread ID.
1704
- *
1705
- * \details Calculates a source thread ID by performing a bitwise XOR of the caller's thread ID with mask:
1706
- * the value of var held by the resulting thread ID is returned. If width is less than warpSize then each
1707
- * group of width consecutive threads are able to access elements from earlier groups of threads,
1708
- * however if they attempt to access elements from later groups of threads their own value of var
1709
- * will be returned. This mode implements a butterfly addressing pattern such as is used in tree
1710
- * reduction and broadcast.
1711
- * \param[in] mask - unsigned int. Is only being read.
1712
- * \param[in] var - half. Is only being read.
1713
- * \param[in] delta - int. Is only being read.
1714
- * \param[in] width - int. Is only being read.
1715
- *
1716
- * \returns Returns the 2-byte word referenced by var from the source thread ID as half.
1717
- * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1703
+ * \brief Exchange a variable between threads within a warp. Copy from a thread based on bitwise XOR of own thread ID.
1704
+ *
1705
+ * \details Calculates a source thread ID by performing a bitwise XOR of the caller's thread ID with mask:
1706
+ * the value of var held by the resulting thread ID is returned. If width is less than warpSize then each
1707
+ * group of width consecutive threads are able to access elements from earlier groups of threads,
1708
+ * however if they attempt to access elements from later groups of threads their own value of var
1709
+ * will be returned. This mode implements a butterfly addressing pattern such as is used in tree
1710
+ * reduction and broadcast.
1711
+ * \param[in] mask - unsigned int. Is only being read.
1712
+ * \param[in] var - half. Is only being read.
1713
+ * \param[in] delta - int. Is only being read.
1714
+ * \param[in] width - int. Is only being read.
1715
+ *
1716
+ * \returns Returns the 2-byte word referenced by var from the source thread ID as half.
1717
+ * If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
1718
1718
  * \internal
1719
1719
  * \exception-guarantee no-throw guarantee
1720
1720
  * \behavior not reentrant, not thread safe
@@ -1875,13 +1875,13 @@ __CUDA_FP16_DECL__ void __stwt(__half *const ptr, const __half value);
1875
1875
  /**
1876
1876
  * \ingroup CUDA_MATH__HALF2_COMPARISON
1877
1877
  * \brief Performs half2 vector if-equal comparison.
1878
- *
1878
+ *
1879
1879
  * \details Performs \p half2 vector if-equal comparison of inputs \p a and \p b.
1880
1880
  * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
1881
1881
  * NaN inputs generate false results.
1882
- * \param[in] a - half2. Is only being read.
1883
- * \param[in] b - half2. Is only being read.
1884
- *
1882
+ * \param[in] a - half2. Is only being read.
1883
+ * \param[in] b - half2. Is only being read.
1884
+ *
1885
1885
  * \returns half2
1886
1886
  * \retval The vector result of if-equal comparison of vectors \p a and \p b.
1887
1887
  * \internal
@@ -1893,13 +1893,13 @@ __CUDA_FP16_DECL__ __half2 __heq2(const __half2 a, const __half2 b);
1893
1893
  /**
1894
1894
  * \ingroup CUDA_MATH__HALF2_COMPARISON
1895
1895
  * \brief Performs \p half2 vector not-equal comparison.
1896
- *
1896
+ *
1897
1897
  * \details Performs \p half2 vector not-equal comparison of inputs \p a and \p b.
1898
1898
  * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
1899
1899
  * NaN inputs generate false results.
1900
- * \param[in] a - half2. Is only being read.
1901
- * \param[in] b - half2. Is only being read.
1902
- *
1900
+ * \param[in] a - half2. Is only being read.
1901
+ * \param[in] b - half2. Is only being read.
1902
+ *
1903
1903
  * \returns half2
1904
1904
  * \retval The vector result of not-equal comparison of vectors \p a and \p b.
1905
1905
  * \internal
@@ -1915,8 +1915,8 @@ __CUDA_FP16_DECL__ __half2 __hne2(const __half2 a, const __half2 b);
1915
1915
  * \details Performs \p half2 vector less-equal comparison of inputs \p a and \p b.
1916
1916
  * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
1917
1917
  * NaN inputs generate false results.
1918
- * \param[in] a - half2. Is only being read.
1919
- * \param[in] b - half2. Is only being read.
1918
+ * \param[in] a - half2. Is only being read.
1919
+ * \param[in] b - half2. Is only being read.
1920
1920
  *
1921
1921
  * \returns half2
1922
1922
  * \retval The \p half2 result of less-equal comparison of vectors \p a and \p b.
@@ -1933,8 +1933,8 @@ __CUDA_FP16_DECL__ __half2 __hle2(const __half2 a, const __half2 b);
1933
1933
  * \details Performs \p half2 vector greater-equal comparison of inputs \p a and \p b.
1934
1934
  * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
1935
1935
  * NaN inputs generate false results.
1936
- * \param[in] a - half2. Is only being read.
1937
- * \param[in] b - half2. Is only being read.
1936
+ * \param[in] a - half2. Is only being read.
1937
+ * \param[in] b - half2. Is only being read.
1938
1938
  *
1939
1939
  * \returns half2
1940
1940
  * \retval The vector result of greater-equal comparison of vectors \p a and \p b.
@@ -1951,8 +1951,8 @@ __CUDA_FP16_DECL__ __half2 __hge2(const __half2 a, const __half2 b);
1951
1951
  * \details Performs \p half2 vector less-than comparison of inputs \p a and \p b.
1952
1952
  * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
1953
1953
  * NaN inputs generate false results.
1954
- * \param[in] a - half2. Is only being read.
1955
- * \param[in] b - half2. Is only being read.
1954
+ * \param[in] a - half2. Is only being read.
1955
+ * \param[in] b - half2. Is only being read.
1956
1956
  *
1957
1957
  * \returns half2
1958
1958
  * \retval The half2 vector result of less-than comparison of vectors \p a and \p b.
@@ -1965,13 +1965,13 @@ __CUDA_FP16_DECL__ __half2 __hlt2(const __half2 a, const __half2 b);
1965
1965
  /**
1966
1966
  * \ingroup CUDA_MATH__HALF2_COMPARISON
1967
1967
  * \brief Performs \p half2 vector greater-than comparison.
1968
- *
1968
+ *
1969
1969
  * \details Performs \p half2 vector greater-than comparison of inputs \p a and \p b.
1970
1970
  * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
1971
1971
  * NaN inputs generate false results.
1972
- * \param[in] a - half2. Is only being read.
1973
- * \param[in] b - half2. Is only being read.
1974
- *
1972
+ * \param[in] a - half2. Is only being read.
1973
+ * \param[in] b - half2. Is only being read.
1974
+ *
1975
1975
  * \returns half2
1976
1976
  * \retval The vector result of greater-than comparison of vectors \p a and \p b.
1977
1977
  * \internal
@@ -1983,13 +1983,13 @@ __CUDA_FP16_DECL__ __half2 __hgt2(const __half2 a, const __half2 b);
1983
1983
  /**
1984
1984
  * \ingroup CUDA_MATH__HALF2_COMPARISON
1985
1985
  * \brief Performs \p half2 vector unordered if-equal comparison.
1986
- *
1986
+ *
1987
1987
  * \details Performs \p half2 vector if-equal comparison of inputs \p a and \p b.
1988
1988
  * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
1989
1989
  * NaN inputs generate true results.
1990
- * \param[in] a - half2. Is only being read.
1991
- * \param[in] b - half2. Is only being read.
1992
- *
1990
+ * \param[in] a - half2. Is only being read.
1991
+ * \param[in] b - half2. Is only being read.
1992
+ *
1993
1993
  * \returns half2
1994
1994
  * \retval The vector result of unordered if-equal comparison of vectors \p a and \p b.
1995
1995
  * \internal
@@ -2005,8 +2005,8 @@ __CUDA_FP16_DECL__ __half2 __hequ2(const __half2 a, const __half2 b);
2005
2005
  * \details Performs \p half2 vector not-equal comparison of inputs \p a and \p b.
2006
2006
  * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
2007
2007
  * NaN inputs generate true results.
2008
- * \param[in] a - half2. Is only being read.
2009
- * \param[in] b - half2. Is only being read.
2008
+ * \param[in] a - half2. Is only being read.
2009
+ * \param[in] b - half2. Is only being read.
2010
2010
  *
2011
2011
  * \returns half2
2012
2012
  * \retval The vector result of unordered not-equal comparison of vectors \p a and \p b.
@@ -2023,8 +2023,8 @@ __CUDA_FP16_DECL__ __half2 __hneu2(const __half2 a, const __half2 b);
2023
2023
  * Performs \p half2 vector less-equal comparison of inputs \p a and \p b.
2024
2024
  * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
2025
2025
  * NaN inputs generate true results.
2026
- * \param[in] a - half2. Is only being read.
2027
- * \param[in] b - half2. Is only being read.
2026
+ * \param[in] a - half2. Is only being read.
2027
+ * \param[in] b - half2. Is only being read.
2028
2028
  *
2029
2029
  * \returns half2
2030
2030
  * \retval The vector result of unordered less-equal comparison of vectors \p a and \p b.
@@ -2041,8 +2041,8 @@ __CUDA_FP16_DECL__ __half2 __hleu2(const __half2 a, const __half2 b);
2041
2041
  * \details Performs \p half2 vector greater-equal comparison of inputs \p a and \p b.
2042
2042
  * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
2043
2043
  * NaN inputs generate true results.
2044
- * \param[in] a - half2. Is only being read.
2045
- * \param[in] b - half2. Is only being read.
2044
+ * \param[in] a - half2. Is only being read.
2045
+ * \param[in] b - half2. Is only being read.
2046
2046
  *
2047
2047
  * \returns half2
2048
2048
  * \retval The \p half2 vector result of unordered greater-equal comparison of vectors \p a and \p b.
@@ -2059,8 +2059,8 @@ __CUDA_FP16_DECL__ __half2 __hgeu2(const __half2 a, const __half2 b);
2059
2059
  * \details Performs \p half2 vector less-than comparison of inputs \p a and \p b.
2060
2060
  * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
2061
2061
  * NaN inputs generate true results.
2062
- * \param[in] a - half2. Is only being read.
2063
- * \param[in] b - half2. Is only being read.
2062
+ * \param[in] a - half2. Is only being read.
2063
+ * \param[in] b - half2. Is only being read.
2064
2064
  *
2065
2065
  * \returns half2
2066
2066
  * \retval The vector result of unordered less-than comparison of vectors \p a and \p b.
@@ -2077,8 +2077,8 @@ __CUDA_FP16_DECL__ __half2 __hltu2(const __half2 a, const __half2 b);
2077
2077
  * \details Performs \p half2 vector greater-than comparison of inputs \p a and \p b.
2078
2078
  * The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
2079
2079
  * NaN inputs generate true results.
2080
- * \param[in] a - half2. Is only being read.
2081
- * \param[in] b - half2. Is only being read.
2080
+ * \param[in] a - half2. Is only being read.
2081
+ * \param[in] b - half2. Is only being read.
2082
2082
  *
2083
2083
  * \returns half2
2084
2084
  * \retval The \p half2 vector result of unordered greater-than comparison of vectors \p a and \p b.
@@ -2093,11 +2093,11 @@ __CUDA_FP16_DECL__ __half2 __hgtu2(const __half2 a, const __half2 b);
2093
2093
  * \brief Determine whether \p half2 argument is a NaN.
2094
2094
  *
2095
2095
  * \details Determine whether each half of input \p half2 number \p a is a NaN.
2096
- * \param[in] a - half2. Is only being read.
2096
+ * \param[in] a - half2. Is only being read.
2097
2097
  *
2098
2098
  * \returns half2
2099
2099
  * \retval The half2 with the corresponding \p half results set to
2100
- * 1.0 for NaN, 0.0 otherwise.
2100
+ * 1.0 for NaN, 0.0 otherwise.
2101
2101
  * \internal
2102
2102
  * \exception-guarantee no-throw guarantee
2103
2103
  * \behavior reentrant, thread safe
@@ -2113,11 +2113,11 @@ __CUDA_FP16_DECL__ __half2 __hisnan2(const __half2 a);
2113
2113
  * \internal
2114
2114
  * \req DEEPLEARN-SRM_REQ-95
2115
2115
  * \endinternal
2116
- * \param[in] a - half2. Is only being read.
2117
- * \param[in] b - half2. Is only being read.
2116
+ * \param[in] a - half2. Is only being read.
2117
+ * \param[in] b - half2. Is only being read.
2118
2118
  *
2119
2119
  * \returns half2
2120
- * \retval The sum of vectors \p a and \p b.
2120
+ * \retval The sum of vectors \p a and \p b.
2121
2121
  * \internal
2122
2122
  * \exception-guarantee no-throw guarantee
2123
2123
  * \behavior reentrant, thread safe
@@ -2133,11 +2133,11 @@ __CUDA_FP16_DECL__ __half2 __hadd2(const __half2 a, const __half2 b);
2133
2133
  * \internal
2134
2134
  * \req DEEPLEARN-SRM_REQ-104
2135
2135
  * \endinternal
2136
- * \param[in] a - half2. Is only being read.
2137
- * \param[in] b - half2. Is only being read.
2136
+ * \param[in] a - half2. Is only being read.
2137
+ * \param[in] b - half2. Is only being read.
2138
2138
  *
2139
2139
  * \returns half2
2140
- * \retval The subtraction of vector \p b from \p a.
2140
+ * \retval The subtraction of vector \p b from \p a.
2141
2141
  * \internal
2142
2142
  * \exception-guarantee no-throw guarantee
2143
2143
  * \behavior reentrant, thread safe
@@ -2153,11 +2153,11 @@ __CUDA_FP16_DECL__ __half2 __hsub2(const __half2 a, const __half2 b);
2153
2153
  * \internal
2154
2154
  * \req DEEPLEARN-SRM_REQ-102
2155
2155
  * \endinternal
2156
- * \param[in] a - half2. Is only being read.
2157
- * \param[in] b - half2. Is only being read.
2156
+ * \param[in] a - half2. Is only being read.
2157
+ * \param[in] b - half2. Is only being read.
2158
2158
  *
2159
2159
  * \returns half2
2160
- * \retval The result of elementwise multiplying the vectors \p a and \p b.
2160
+ * \retval The result of elementwise multiplying the vectors \p a and \p b.
2161
2161
  * \internal
2162
2162
  * \exception-guarantee no-throw guarantee
2163
2163
  * \behavior reentrant, thread safe
@@ -2173,11 +2173,11 @@ __CUDA_FP16_DECL__ __half2 __hmul2(const __half2 a, const __half2 b);
2173
2173
  * \internal
2174
2174
  * \req DEEPLEARN-SRM_REQ-103
2175
2175
  * \endinternal
2176
- * \param[in] a - half2. Is only being read.
2177
- * \param[in] b - half2. Is only being read.
2176
+ * \param[in] a - half2. Is only being read.
2177
+ * \param[in] b - half2. Is only being read.
2178
2178
  *
2179
2179
  * \returns half2
2180
- * \retval The elementwise division of \p a with \p b.
2180
+ * \retval The elementwise division of \p a with \p b.
2181
2181
  * \internal
2182
2182
  * \exception-guarantee no-throw guarantee
2183
2183
  * \behavior reentrant, thread safe
@@ -2191,10 +2191,10 @@ __CUDA_FP16_DECL__ __half2 __h2div(const __half2 a, const __half2 b);
2191
2191
  *
2192
2192
  * \details Calculates the absolute value of both halves of the input \p half2 number and
2193
2193
  * returns the result.
2194
- * \param[in] a - half2. Is only being read.
2194
+ * \param[in] a - half2. Is only being read.
2195
2195
  *
2196
2196
  * \returns half2
2197
- * \retval Returns \p a with the absolute value of both halves.
2197
+ * \retval Returns \p a with the absolute value of both halves.
2198
2198
  * \internal
2199
2199
  * \exception-guarantee no-throw guarantee
2200
2200
  * \behavior reentrant, thread safe
@@ -2209,11 +2209,11 @@ __CUDA_FP16_DECL__ __half2 __habs2(const __half2 a);
2209
2209
  * \details Performs \p half2 vector add of inputs \p a and \p b, in round-to-nearest
2210
2210
  * mode, and clamps the results to range [0.0, 1.0]. NaN results are flushed to
2211
2211
  * +0.0.
2212
- * \param[in] a - half2. Is only being read.
2213
- * \param[in] b - half2. Is only being read.
2212
+ * \param[in] a - half2. Is only being read.
2213
+ * \param[in] b - half2. Is only being read.
2214
2214
  *
2215
2215
  * \returns half2
2216
- * \retval The sum of \p a and \p b, with respect to saturation.
2216
+ * \retval The sum of \p a and \p b, with respect to saturation.
2217
2217
  * \internal
2218
2218
  * \exception-guarantee no-throw guarantee
2219
2219
  * \behavior reentrant, thread safe
@@ -2228,8 +2228,8 @@ __CUDA_FP16_DECL__ __half2 __hadd2_sat(const __half2 a, const __half2 b);
2228
2228
  * \details Subtracts \p half2 input vector \p b from input vector \p a in
2229
2229
  * round-to-nearest-even mode, and clamps the results to range [0.0, 1.0]. NaN
2230
2230
  * results are flushed to +0.0.
2231
- * \param[in] a - half2. Is only being read.
2232
- * \param[in] b - half2. Is only being read.
2231
+ * \param[in] a - half2. Is only being read.
2232
+ * \param[in] b - half2. Is only being read.
2233
2233
  *
2234
2234
  * \returns half2
2235
2235
  * \retval The subtraction of vector \p b from \p a, with respect to saturation.
@@ -2247,12 +2247,12 @@ __CUDA_FP16_DECL__ __half2 __hsub2_sat(const __half2 a, const __half2 b);
2247
2247
  * \details Performs \p half2 vector multiplication of inputs \p a and \p b, in
2248
2248
  * round-to-nearest-even mode, and clamps the results to range [0.0, 1.0]. NaN
2249
2249
  * results are flushed to +0.0.
2250
- * \param[in] a - half2. Is only being read.
2251
- * \param[in] b - half2. Is only being read.
2250
+ * \param[in] a - half2. Is only being read.
2251
+ * \param[in] b - half2. Is only being read.
2252
2252
  *
2253
2253
  * \returns half2
2254
- * \retval The result of elementwise multiplication of vectors \p a and \p b,
2255
- * with respect to saturation.
2254
+ * \retval The result of elementwise multiplication of vectors \p a and \p b,
2255
+ * with respect to saturation.
2256
2256
  * \internal
2257
2257
  * \exception-guarantee no-throw guarantee
2258
2258
  * \behavior reentrant, thread safe
@@ -2270,12 +2270,12 @@ __CUDA_FP16_DECL__ __half2 __hmul2_sat(const __half2 a, const __half2 b);
2270
2270
  * \internal
2271
2271
  * \req DEEPLEARN-SRM_REQ-105
2272
2272
  * \endinternal
2273
- * \param[in] a - half2. Is only being read.
2274
- * \param[in] b - half2. Is only being read.
2275
- * \param[in] c - half2. Is only being read.
2273
+ * \param[in] a - half2. Is only being read.
2274
+ * \param[in] b - half2. Is only being read.
2275
+ * \param[in] c - half2. Is only being read.
2276
2276
  *
2277
2277
  * \returns half2
2278
- * \retval The result of elementwise fused multiply-add operation on vectors \p a, \p b, and \p c.
2278
+ * \retval The result of elementwise fused multiply-add operation on vectors \p a, \p b, and \p c.
2279
2279
  * \internal
2280
2280
  * \exception-guarantee no-throw guarantee
2281
2281
  * \behavior reentrant, thread safe
@@ -2291,13 +2291,13 @@ __CUDA_FP16_DECL__ __half2 __hfma2(const __half2 a, const __half2 b, const __hal
2291
2291
  * then performs a \p half2 vector add of the result with \p c,
2292
2292
  * rounding the result once in round-to-nearest-even mode, and clamps the
2293
2293
  * results to range [0.0, 1.0]. NaN results are flushed to +0.0.
2294
- * \param[in] a - half2. Is only being read.
2295
- * \param[in] b - half2. Is only being read.
2296
- * \param[in] c - half2. Is only being read.
2294
+ * \param[in] a - half2. Is only being read.
2295
+ * \param[in] b - half2. Is only being read.
2296
+ * \param[in] c - half2. Is only being read.
2297
2297
  *
2298
2298
  * \returns half2
2299
- * \retval The result of elementwise fused multiply-add operation on vectors \p a, \p b, and \p c,
2300
- * with respect to saturation.
2299
+ * \retval The result of elementwise fused multiply-add operation on vectors \p a, \p b, and \p c,
2300
+ * with respect to saturation.
2301
2301
  * \internal
2302
2302
  * \exception-guarantee no-throw guarantee
2303
2303
  * \behavior reentrant, thread safe
@@ -2313,10 +2313,10 @@ __CUDA_FP16_DECL__ __half2 __hfma2_sat(const __half2 a, const __half2 b, const _
2313
2313
  * \internal
2314
2314
  * \req DEEPLEARN-SRM_REQ-101
2315
2315
  * \endinternal
2316
- * \param[in] a - half2. Is only being read.
2316
+ * \param[in] a - half2. Is only being read.
2317
2317
  *
2318
2318
  * \returns half2
2319
- * \retval Returns \p a with both halves negated.
2319
+ * \retval Returns \p a with both halves negated.
2320
2320
  * \internal
2321
2321
  * \exception-guarantee no-throw guarantee
2322
2322
  * \behavior reentrant, thread safe
@@ -2328,7 +2328,7 @@ __CUDA_FP16_DECL__ __half2 __hneg2(const __half2 a);
2328
2328
  * \brief Calculates the absolute value of input \p half number and returns the result.
2329
2329
  *
2330
2330
  * \details Calculates the absolute value of input \p half number and returns the result.
2331
- * \param[in] a - half. Is only being read.
2331
+ * \param[in] a - half. Is only being read.
2332
2332
  *
2333
2333
  * \returns half
2334
2334
  * \retval The absolute value of a.
@@ -2347,11 +2347,11 @@ __CUDA_FP16_DECL__ __half __habs(const __half a);
2347
2347
  * \internal
2348
2348
  * \req DEEPLEARN-SRM_REQ-94
2349
2349
  * \endinternal
2350
- * \param[in] a - half. Is only being read.
2351
- * \param[in] b - half. Is only being read.
2350
+ * \param[in] a - half. Is only being read.
2351
+ * \param[in] b - half. Is only being read.
2352
2352
  *
2353
2353
  * \returns half
2354
- * \retval The sum of \p a and \p b.
2354
+ * \retval The sum of \p a and \p b.
2355
2355
  * \internal
2356
2356
  * \exception-guarantee no-throw guarantee
2357
2357
  * \behavior reentrant, thread safe
@@ -2367,11 +2367,11 @@ __CUDA_FP16_DECL__ __half __hadd(const __half a, const __half b);
2367
2367
  * \internal
2368
2368
  * \req DEEPLEARN-SRM_REQ-97
2369
2369
  * \endinternal
2370
- * \param[in] a - half. Is only being read.
2371
- * \param[in] b - half. Is only being read.
2370
+ * \param[in] a - half. Is only being read.
2371
+ * \param[in] b - half. Is only being read.
2372
2372
  *
2373
2373
  * \returns half
2374
- * \retval The result of subtracting \p b from \p a.
2374
+ * \retval The result of subtracting \p b from \p a.
2375
2375
  * \internal
2376
2376
  * \exception-guarantee no-throw guarantee
2377
2377
  * \behavior reentrant, thread safe
@@ -2387,27 +2387,27 @@ __CUDA_FP16_DECL__ __half __hsub(const __half a, const __half b);
2387
2387
  * \internal
2388
2388
  * \req DEEPLEARN-SRM_REQ-99
2389
2389
  * \endinternal
2390
- * \param[in] a - half. Is only being read.
2391
- * \param[in] b - half. Is only being read.
2390
+ * \param[in] a - half. Is only being read.
2391
+ * \param[in] b - half. Is only being read.
2392
2392
  *
2393
2393
  * \returns half
2394
- * \retval The result of multiplying \p a and \p b.
2394
+ * \retval The result of multiplying \p a and \p b.
2395
2395
  */
2396
2396
  __CUDA_FP16_DECL__ __half __hmul(const __half a, const __half b);
2397
2397
  /**
2398
2398
  * \ingroup CUDA_MATH__HALF_ARITHMETIC
2399
2399
  * \brief Performs \p half division in round-to-nearest-even mode.
2400
- *
2400
+ *
2401
2401
  * \details Divides \p half input \p a by input \p b in round-to-nearest
2402
2402
  * mode.
2403
2403
  * \internal
2404
2404
  * \req DEEPLEARN-SRM_REQ-98
2405
2405
  * \endinternal
2406
- * \param[in] a - half. Is only being read.
2407
- * \param[in] b - half. Is only being read.
2408
- *
2406
+ * \param[in] a - half. Is only being read.
2407
+ * \param[in] b - half. Is only being read.
2408
+ *
2409
2409
  * \returns half
2410
- * \retval The result of dividing \p a by \p b.
2410
+ * \retval The result of dividing \p a by \p b.
2411
2411
  * \internal
2412
2412
  * \exception-guarantee no-throw guarantee
2413
2413
  * \behavior reentrant, thread safe
@@ -2421,8 +2421,8 @@ __CUDA_FP16_DECL__ __half __hdiv(const __half a, const __half b);
2421
2421
  *
2422
2422
  * \details Performs \p half add of inputs \p a and \p b, in round-to-nearest-even mode,
2423
2423
  * and clamps the result to range [0.0, 1.0]. NaN results are flushed to +0.0.
2424
- * \param[in] a - half. Is only being read.
2425
- * \param[in] b - half. Is only being read.
2424
+ * \param[in] a - half. Is only being read.
2425
+ * \param[in] b - half. Is only being read.
2426
2426
  *
2427
2427
  * \returns half
2428
2428
  * \retval The sum of \p a and \p b, with respect to saturation.
@@ -2440,8 +2440,8 @@ __CUDA_FP16_DECL__ __half __hadd_sat(const __half a, const __half b);
2440
2440
  * \details Subtracts \p half input \p b from input \p a in round-to-nearest
2441
2441
  * mode,
2442
2442
  * and clamps the result to range [0.0, 1.0]. NaN results are flushed to +0.0.
2443
- * \param[in] a - half. Is only being read.
2444
- * \param[in] b - half. Is only being read.
2443
+ * \param[in] a - half. Is only being read.
2444
+ * \param[in] b - half. Is only being read.
2445
2445
  *
2446
2446
  * \returns half
2447
2447
  * \retval The result of subtraction of \p b from \p a, with respect to saturation.
@@ -2459,8 +2459,8 @@ __CUDA_FP16_DECL__ __half __hsub_sat(const __half a, const __half b);
2459
2459
  * \details Performs \p half multiplication of inputs \p a and \p b, in round-to-nearest
2460
2460
  * mode, and clamps the result to range [0.0, 1.0]. NaN results are flushed to
2461
2461
  * +0.0.
2462
- * \param[in] a - half. Is only being read.
2463
- * \param[in] b - half. Is only being read.
2462
+ * \param[in] a - half. Is only being read.
2463
+ * \param[in] b - half. Is only being read.
2464
2464
  *
2465
2465
  * \returns half
2466
2466
  * \retval The result of multiplying \p a and \p b, with respect to saturation.
@@ -2480,13 +2480,13 @@ __CUDA_FP16_DECL__ __half __hmul_sat(const __half a, const __half b);
2480
2480
  * \internal
2481
2481
  * \req DEEPLEARN-SRM_REQ-96
2482
2482
  * \endinternal
2483
- * \param[in] a - half. Is only being read.
2484
- * \param[in] b - half. Is only being read.
2485
- * \param[in] c - half. Is only being read.
2483
+ * \param[in] a - half. Is only being read.
2484
+ * \param[in] b - half. Is only being read.
2485
+ * \param[in] c - half. Is only being read.
2486
2486
  *
2487
2487
  * \returns half
2488
2488
  * \retval The result of fused multiply-add operation on \p
2489
- * a, \p b, and \p c.
2489
+ * a, \p b, and \p c.
2490
2490
  * \internal
2491
2491
  * \exception-guarantee no-throw guarantee
2492
2492
  * \behavior reentrant, thread safe
@@ -2502,13 +2502,13 @@ __CUDA_FP16_DECL__ __half __hfma(const __half a, const __half b, const __half c)
2502
2502
  * then performs a \p half add of the result with \p c,
2503
2503
  * rounding the result once in round-to-nearest-even mode, and clamps the result
2504
2504
  * to range [0.0, 1.0]. NaN results are flushed to +0.0.
2505
- * \param[in] a - half. Is only being read.
2506
- * \param[in] b - half. Is only being read.
2507
- * \param[in] c - half. Is only being read.
2505
+ * \param[in] a - half. Is only being read.
2506
+ * \param[in] b - half. Is only being read.
2507
+ * \param[in] c - half. Is only being read.
2508
2508
  *
2509
2509
  * \returns half
2510
2510
  * \retval The result of fused multiply-add operation on \p
2511
- * a, \p b, and \p c, with respect to saturation.
2511
+ * a, \p b, and \p c, with respect to saturation.
2512
2512
  * \internal
2513
2513
  * \exception-guarantee no-throw guarantee
2514
2514
  * \behavior reentrant, thread safe
@@ -2523,7 +2523,7 @@ __CUDA_FP16_DECL__ __half __hfma_sat(const __half a, const __half b, const __hal
2523
2523
  * \internal
2524
2524
  * \req DEEPLEARN-SRM_REQ-100
2525
2525
  * \endinternal
2526
- * \param[in] a - half. Is only being read.
2526
+ * \param[in] a - half. Is only being read.
2527
2527
  *
2528
2528
  * \returns half
2529
2529
  * \retval minus a
@@ -2542,8 +2542,8 @@ __CUDA_FP16_DECL__ __half __hneg(const __half a);
2542
2542
  * The bool result is set to true only if both \p half if-equal comparisons
2543
2543
  * evaluate to true, or false otherwise.
2544
2544
  * NaN inputs generate false results.
2545
- * \param[in] a - half2. Is only being read.
2546
- * \param[in] b - half2. Is only being read.
2545
+ * \param[in] a - half2. Is only being read.
2546
+ * \param[in] b - half2. Is only being read.
2547
2547
  *
2548
2548
  * \returns bool
2549
2549
  * \retval true if both \p half results of if-equal comparison
@@ -2564,13 +2564,13 @@ __CUDA_FP16_DECL__ bool __hbeq2(const __half2 a, const __half2 b);
2564
2564
  * The bool result is set to true only if both \p half not-equal comparisons
2565
2565
  * evaluate to true, or false otherwise.
2566
2566
  * NaN inputs generate false results.
2567
- * \param[in] a - half2. Is only being read.
2568
- * \param[in] b - half2. Is only being read.
2567
+ * \param[in] a - half2. Is only being read.
2568
+ * \param[in] b - half2. Is only being read.
2569
2569
  *
2570
2570
  * \returns bool
2571
2571
  * \retval true if both \p half results of not-equal comparison
2572
- * of vectors \p a and \p b are true,
2573
- * \retval false otherwise.
2572
+ * of vectors \p a and \p b are true,
2573
+ * \retval false otherwise.
2574
2574
  * \internal
2575
2575
  * \exception-guarantee no-throw guarantee
2576
2576
  * \behavior reentrant, thread safe
@@ -2586,13 +2586,13 @@ __CUDA_FP16_DECL__ bool __hbne2(const __half2 a, const __half2 b);
2586
2586
  * The bool result is set to true only if both \p half less-equal comparisons
2587
2587
  * evaluate to true, or false otherwise.
2588
2588
  * NaN inputs generate false results.
2589
- * \param[in] a - half2. Is only being read.
2590
- * \param[in] b - half2. Is only being read.
2589
+ * \param[in] a - half2. Is only being read.
2590
+ * \param[in] b - half2. Is only being read.
2591
2591
  *
2592
2592
  * \returns bool
2593
2593
  * \retval true if both \p half results of less-equal comparison
2594
- * of vectors \p a and \p b are true;
2595
- * \retval false otherwise.
2594
+ * of vectors \p a and \p b are true;
2595
+ * \retval false otherwise.
2596
2596
  * \internal
2597
2597
  * \exception-guarantee no-throw guarantee
2598
2598
  * \behavior reentrant, thread safe
@@ -2608,13 +2608,13 @@ __CUDA_FP16_DECL__ bool __hble2(const __half2 a, const __half2 b);
2608
2608
  * The bool result is set to true only if both \p half greater-equal comparisons
2609
2609
  * evaluate to true, or false otherwise.
2610
2610
  * NaN inputs generate false results.
2611
- * \param[in] a - half2. Is only being read.
2612
- * \param[in] b - half2. Is only being read.
2611
+ * \param[in] a - half2. Is only being read.
2612
+ * \param[in] b - half2. Is only being read.
2613
2613
  *
2614
2614
  * \returns bool
2615
2615
  * \retval true if both \p half results of greater-equal
2616
- * comparison of vectors \p a and \p b are true;
2617
- * \retval false otherwise.
2616
+ * comparison of vectors \p a and \p b are true;
2617
+ * \retval false otherwise.
2618
2618
  * \internal
2619
2619
  * \exception-guarantee no-throw guarantee
2620
2620
  * \behavior reentrant, thread safe
@@ -2630,13 +2630,13 @@ __CUDA_FP16_DECL__ bool __hbge2(const __half2 a, const __half2 b);
2630
2630
  * The bool result is set to true only if both \p half less-than comparisons
2631
2631
  * evaluate to true, or false otherwise.
2632
2632
  * NaN inputs generate false results.
2633
- * \param[in] a - half2. Is only being read.
2634
- * \param[in] b - half2. Is only being read.
2633
+ * \param[in] a - half2. Is only being read.
2634
+ * \param[in] b - half2. Is only being read.
2635
2635
  *
2636
2636
  * \returns bool
2637
2637
  * \retval true if both \p half results of less-than comparison
2638
- * of vectors \p a and \p b are true;
2639
- * \retval false otherwise.
2638
+ * of vectors \p a and \p b are true;
2639
+ * \retval false otherwise.
2640
2640
  * \internal
2641
2641
  * \exception-guarantee no-throw guarantee
2642
2642
  * \behavior reentrant, thread safe
@@ -2652,13 +2652,13 @@ __CUDA_FP16_DECL__ bool __hblt2(const __half2 a, const __half2 b);
2652
2652
  * The bool result is set to true only if both \p half greater-than comparisons
2653
2653
  * evaluate to true, or false otherwise.
2654
2654
  * NaN inputs generate false results.
2655
- * \param[in] a - half2. Is only being read.
2656
- * \param[in] b - half2. Is only being read.
2657
- *
2658
- * \returns bool
2655
+ * \param[in] a - half2. Is only being read.
2656
+ * \param[in] b - half2. Is only being read.
2657
+ *
2658
+ * \returns bool
2659
2659
  * \retval true if both \p half results of greater-than
2660
- * comparison of vectors \p a and \p b are true;
2661
- * \retval false otherwise.
2660
+ * comparison of vectors \p a and \p b are true;
2661
+ * \retval false otherwise.
2662
2662
  * \internal
2663
2663
  * \exception-guarantee no-throw guarantee
2664
2664
  * \behavior reentrant, thread safe
@@ -2674,13 +2674,13 @@ __CUDA_FP16_DECL__ bool __hbgt2(const __half2 a, const __half2 b);
2674
2674
  * The bool result is set to true only if both \p half if-equal comparisons
2675
2675
  * evaluate to true, or false otherwise.
2676
2676
  * NaN inputs generate true results.
2677
- * \param[in] a - half2. Is only being read.
2678
- * \param[in] b - half2. Is only being read.
2677
+ * \param[in] a - half2. Is only being read.
2678
+ * \param[in] b - half2. Is only being read.
2679
2679
  *
2680
2680
  * \returns bool
2681
2681
  * \retval true if both \p half results of unordered if-equal
2682
- * comparison of vectors \p a and \p b are true;
2683
- * \retval false otherwise.
2682
+ * comparison of vectors \p a and \p b are true;
2683
+ * \retval false otherwise.
2684
2684
  * \internal
2685
2685
  * \exception-guarantee no-throw guarantee
2686
2686
  * \behavior reentrant, thread safe
@@ -2696,13 +2696,13 @@ __CUDA_FP16_DECL__ bool __hbequ2(const __half2 a, const __half2 b);
2696
2696
  * The bool result is set to true only if both \p half not-equal comparisons
2697
2697
  * evaluate to true, or false otherwise.
2698
2698
  * NaN inputs generate true results.
2699
- * \param[in] a - half2. Is only being read.
2700
- * \param[in] b - half2. Is only being read.
2699
+ * \param[in] a - half2. Is only being read.
2700
+ * \param[in] b - half2. Is only being read.
2701
2701
  *
2702
2702
  * \returns bool
2703
2703
  * \retval true if both \p half results of unordered not-equal
2704
2704
  * comparison of vectors \p a and \p b are true;
2705
- * \retval false otherwise.
2705
+ * \retval false otherwise.
2706
2706
  * \internal
2707
2707
  * \exception-guarantee no-throw guarantee
2708
2708
  * \behavior reentrant, thread safe
@@ -2718,13 +2718,13 @@ __CUDA_FP16_DECL__ bool __hbneu2(const __half2 a, const __half2 b);
2718
2718
  * The bool result is set to true only if both \p half less-equal comparisons
2719
2719
  * evaluate to true, or false otherwise.
2720
2720
  * NaN inputs generate true results.
2721
- * \param[in] a - half2. Is only being read.
2722
- * \param[in] b - half2. Is only being read.
2721
+ * \param[in] a - half2. Is only being read.
2722
+ * \param[in] b - half2. Is only being read.
2723
2723
  *
2724
2724
  * \returns bool
2725
2725
  * \retval true if both \p half results of unordered less-equal
2726
- * comparison of vectors \p a and \p b are true;
2727
- * \retval false otherwise.
2726
+ * comparison of vectors \p a and \p b are true;
2727
+ * \retval false otherwise.
2728
2728
  * \internal
2729
2729
  * \exception-guarantee no-throw guarantee
2730
2730
  * \behavior reentrant, thread safe
@@ -2741,13 +2741,13 @@ __CUDA_FP16_DECL__ bool __hbleu2(const __half2 a, const __half2 b);
2741
2741
  * The bool result is set to true only if both \p half greater-equal comparisons
2742
2742
  * evaluate to true, or false otherwise.
2743
2743
  * NaN inputs generate true results.
2744
- * \param[in] a - half2. Is only being read.
2745
- * \param[in] b - half2. Is only being read.
2744
+ * \param[in] a - half2. Is only being read.
2745
+ * \param[in] b - half2. Is only being read.
2746
2746
  *
2747
2747
  * \returns bool
2748
2748
  * \retval true if both \p half results of unordered
2749
- * greater-equal comparison of vectors \p a and \p b are true;
2750
- * \retval false otherwise.
2749
+ * greater-equal comparison of vectors \p a and \p b are true;
2750
+ * \retval false otherwise.
2751
2751
  * \internal
2752
2752
  * \exception-guarantee no-throw guarantee
2753
2753
  * \behavior reentrant, thread safe
@@ -2763,13 +2763,13 @@ __CUDA_FP16_DECL__ bool __hbgeu2(const __half2 a, const __half2 b);
2763
2763
  * The bool result is set to true only if both \p half less-than comparisons
2764
2764
  * evaluate to true, or false otherwise.
2765
2765
  * NaN inputs generate true results.
2766
- * \param[in] a - half2. Is only being read.
2767
- * \param[in] b - half2. Is only being read.
2766
+ * \param[in] a - half2. Is only being read.
2767
+ * \param[in] b - half2. Is only being read.
2768
2768
  *
2769
2769
  * \returns bool
2770
- * \retval true if both \p half results of unordered less-than comparison of
2771
- * vectors \p a and \p b are true;
2772
- * \retval false otherwise.
2770
+ * \retval true if both \p half results of unordered less-than comparison of
2771
+ * vectors \p a and \p b are true;
2772
+ * \retval false otherwise.
2773
2773
  * \internal
2774
2774
  * \exception-guarantee no-throw guarantee
2775
2775
  * \behavior reentrant, thread safe
@@ -2786,13 +2786,13 @@ __CUDA_FP16_DECL__ bool __hbltu2(const __half2 a, const __half2 b);
2786
2786
  * The bool result is set to true only if both \p half greater-than comparisons
2787
2787
  * evaluate to true, or false otherwise.
2788
2788
  * NaN inputs generate true results.
2789
- * \param[in] a - half2. Is only being read.
2790
- * \param[in] b - half2. Is only being read.
2789
+ * \param[in] a - half2. Is only being read.
2790
+ * \param[in] b - half2. Is only being read.
2791
2791
  *
2792
2792
  * \returns bool
2793
2793
  * \retval true if both \p half results of unordered
2794
2794
  * greater-than comparison of vectors \p a and \p b are true;
2795
- * \retval false otherwise.
2795
+ * \retval false otherwise.
2796
2796
  * \internal
2797
2797
  * \exception-guarantee no-throw guarantee
2798
2798
  * \behavior reentrant, thread safe
@@ -2805,11 +2805,11 @@ __CUDA_FP16_DECL__ bool __hbgtu2(const __half2 a, const __half2 b);
2805
2805
  *
2806
2806
  * \details Performs \p half if-equal comparison of inputs \p a and \p b.
2807
2807
  * NaN inputs generate false results.
2808
- * \param[in] a - half. Is only being read.
2809
- * \param[in] b - half. Is only being read.
2808
+ * \param[in] a - half. Is only being read.
2809
+ * \param[in] b - half. Is only being read.
2810
2810
  *
2811
2811
  * \returns bool
2812
- * \retval The boolean result of if-equal comparison of \p a and \p b.
2812
+ * \retval The boolean result of if-equal comparison of \p a and \p b.
2813
2813
  * \internal
2814
2814
  * \exception-guarantee no-throw guarantee
2815
2815
  * \behavior reentrant, thread safe
@@ -2822,8 +2822,8 @@ __CUDA_FP16_DECL__ bool __heq(const __half a, const __half b);
2822
2822
  *
2823
2823
  * \details Performs \p half not-equal comparison of inputs \p a and \p b.
2824
2824
  * NaN inputs generate false results.
2825
- * \param[in] a - half. Is only being read.
2826
- * \param[in] b - half. Is only being read.
2825
+ * \param[in] a - half. Is only being read.
2826
+ * \param[in] b - half. Is only being read.
2827
2827
  *
2828
2828
  * \returns bool
2829
2829
  * \retval The boolean result of not-equal comparison of \p a and \p b.
@@ -2839,8 +2839,8 @@ __CUDA_FP16_DECL__ bool __hne(const __half a, const __half b);
2839
2839
  *
2840
2840
  * \details Performs \p half less-equal comparison of inputs \p a and \p b.
2841
2841
  * NaN inputs generate false results.
2842
- * \param[in] a - half. Is only being read.
2843
- * \param[in] b - half. Is only being read.
2842
+ * \param[in] a - half. Is only being read.
2843
+ * \param[in] b - half. Is only being read.
2844
2844
  *
2845
2845
  * \returns bool
2846
2846
  * \retval The boolean result of less-equal comparison of \p a and \p b.
@@ -2856,8 +2856,8 @@ __CUDA_FP16_DECL__ bool __hle(const __half a, const __half b);
2856
2856
  *
2857
2857
  * \details Performs \p half greater-equal comparison of inputs \p a and \p b.
2858
2858
  * NaN inputs generate false results.
2859
- * \param[in] a - half. Is only being read.
2860
- * \param[in] b - half. Is only being read.
2859
+ * \param[in] a - half. Is only being read.
2860
+ * \param[in] b - half. Is only being read.
2861
2861
  *
2862
2862
  * \returns bool
2863
2863
  * \retval The boolean result of greater-equal comparison of \p a and \p b.
@@ -2873,8 +2873,8 @@ __CUDA_FP16_DECL__ bool __hge(const __half a, const __half b);
2873
2873
  *
2874
2874
  * \details Performs \p half less-than comparison of inputs \p a and \p b.
2875
2875
  * NaN inputs generate false results.
2876
- * \param[in] a - half. Is only being read.
2877
- * \param[in] b - half. Is only being read.
2876
+ * \param[in] a - half. Is only being read.
2877
+ * \param[in] b - half. Is only being read.
2878
2878
  *
2879
2879
  * \returns bool
2880
2880
  * \retval The boolean result of less-than comparison of \p a and \p b.
@@ -2890,8 +2890,8 @@ __CUDA_FP16_DECL__ bool __hlt(const __half a, const __half b);
2890
2890
  *
2891
2891
  * \details Performs \p half greater-than comparison of inputs \p a and \p b.
2892
2892
  * NaN inputs generate false results.
2893
- * \param[in] a - half. Is only being read.
2894
- * \param[in] b - half. Is only being read.
2893
+ * \param[in] a - half. Is only being read.
2894
+ * \param[in] b - half. Is only being read.
2895
2895
  *
2896
2896
  * \returns bool
2897
2897
  * \retval The boolean result of greater-than comparison of \p a and \p b.
@@ -2907,8 +2907,8 @@ __CUDA_FP16_DECL__ bool __hgt(const __half a, const __half b);
2907
2907
  *
2908
2908
  * \details Performs \p half if-equal comparison of inputs \p a and \p b.
2909
2909
  * NaN inputs generate true results.
2910
- * \param[in] a - half. Is only being read.
2911
- * \param[in] b - half. Is only being read.
2910
+ * \param[in] a - half. Is only being read.
2911
+ * \param[in] b - half. Is only being read.
2912
2912
  *
2913
2913
  * \returns bool
2914
2914
  * \retval The boolean result of unordered if-equal comparison of \p a and
@@ -2925,8 +2925,8 @@ __CUDA_FP16_DECL__ bool __hequ(const __half a, const __half b);
2925
2925
  *
2926
2926
  * \details Performs \p half not-equal comparison of inputs \p a and \p b.
2927
2927
  * NaN inputs generate true results.
2928
- * \param[in] a - half. Is only being read.
2929
- * \param[in] b - half. Is only being read.
2928
+ * \param[in] a - half. Is only being read.
2929
+ * \param[in] b - half. Is only being read.
2930
2930
  *
2931
2931
  * \returns bool
2932
2932
  * \retval The boolean result of unordered not-equal comparison of \p a and
@@ -2943,8 +2943,8 @@ __CUDA_FP16_DECL__ bool __hneu(const __half a, const __half b);
2943
2943
  *
2944
2944
  * \details Performs \p half less-equal comparison of inputs \p a and \p b.
2945
2945
  * NaN inputs generate true results.
2946
- * \param[in] a - half. Is only being read.
2947
- * \param[in] b - half. Is only being read.
2946
+ * \param[in] a - half. Is only being read.
2947
+ * \param[in] b - half. Is only being read.
2948
2948
  *
2949
2949
  * \returns bool
2950
2950
  * \retval The boolean result of unordered less-equal comparison of \p a and
@@ -2961,8 +2961,8 @@ __CUDA_FP16_DECL__ bool __hleu(const __half a, const __half b);
2961
2961
  *
2962
2962
  * \details Performs \p half greater-equal comparison of inputs \p a and \p b.
2963
2963
  * NaN inputs generate true results.
2964
- * \param[in] a - half. Is only being read.
2965
- * \param[in] b - half. Is only being read.
2964
+ * \param[in] a - half. Is only being read.
2965
+ * \param[in] b - half. Is only being read.
2966
2966
  *
2967
2967
  * \returns bool
2968
2968
  * \retval The boolean result of unordered greater-equal comparison of \p a
@@ -2979,8 +2979,8 @@ __CUDA_FP16_DECL__ bool __hgeu(const __half a, const __half b);
2979
2979
  *
2980
2980
  * \details Performs \p half less-than comparison of inputs \p a and \p b.
2981
2981
  * NaN inputs generate true results.
2982
- * \param[in] a - half. Is only being read.
2983
- * \param[in] b - half. Is only being read.
2982
+ * \param[in] a - half. Is only being read.
2983
+ * \param[in] b - half. Is only being read.
2984
2984
  *
2985
2985
  * \returns bool
2986
2986
  * \retval The boolean result of unordered less-than comparison of \p a and
@@ -2997,8 +2997,8 @@ __CUDA_FP16_DECL__ bool __hltu(const __half a, const __half b);
2997
2997
  *
2998
2998
  * \details Performs \p half greater-than comparison of inputs \p a and \p b.
2999
2999
  * NaN inputs generate true results.
3000
- * \param[in] a - half. Is only being read.
3001
- * \param[in] b - half. Is only being read.
3000
+ * \param[in] a - half. Is only being read.
3001
+ * \param[in] b - half. Is only being read.
3002
3002
  *
3003
3003
  * \returns bool
3004
3004
  * \retval The boolean result of unordered greater-than comparison of \p a
@@ -3014,10 +3014,10 @@ __CUDA_FP16_DECL__ bool __hgtu(const __half a, const __half b);
3014
3014
  * \brief Determine whether \p half argument is a NaN.
3015
3015
  *
3016
3016
  * \details Determine whether \p half value \p a is a NaN.
3017
- * \param[in] a - half. Is only being read.
3017
+ * \param[in] a - half. Is only being read.
3018
3018
  *
3019
3019
  * \returns bool
3020
- * \retval true iff argument is NaN.
3020
+ * \retval true iff argument is NaN.
3021
3021
  * \internal
3022
3022
  * \exception-guarantee no-throw guarantee
3023
3023
  * \behavior reentrant, thread safe
@@ -3250,7 +3250,7 @@ __CUDA_FP16_DECL__ __half2 __hcmadd(const __half2 a, const __half2 b, const __ha
3250
3250
  * \brief Calculates \p half square root in round-to-nearest-even mode.
3251
3251
  *
3252
3252
  * \details Calculates \p half square root of input \p a in round-to-nearest-even mode.
3253
- * \param[in] a - half. Is only being read.
3253
+ * \param[in] a - half. Is only being read.
3254
3254
  *
3255
3255
  * \returns half
3256
3256
  * \retval The square root of \p a.
@@ -3267,7 +3267,7 @@ __CUDA_FP16_DECL__ __half hsqrt(const __half a);
3267
3267
  *
3268
3268
  * \details Calculates \p half reciprocal square root of input \p a in round-to-nearest
3269
3269
  * mode.
3270
- * \param[in] a - half. Is only being read.
3270
+ * \param[in] a - half. Is only being read.
3271
3271
  *
3272
3272
  * \returns half
3273
3273
  * \retval The reciprocal square root of \p a.
@@ -3282,7 +3282,7 @@ __CUDA_FP16_DECL__ __half hrsqrt(const __half a);
3282
3282
  * \brief Calculates \p half reciprocal in round-to-nearest-even mode.
3283
3283
  *
3284
3284
  * \details Calculates \p half reciprocal of input \p a in round-to-nearest-even mode.
3285
- * \param[in] a - half. Is only being read.
3285
+ * \param[in] a - half. Is only being read.
3286
3286
  *
3287
3287
  * \returns half
3288
3288
  * \retval The reciprocal of \p a.
@@ -3298,7 +3298,7 @@ __CUDA_FP16_DECL__ __half hrcp(const __half a);
3298
3298
  *
3299
3299
  * \details Calculates \p half natural logarithm of input \p a in round-to-nearest-even
3300
3300
  * mode.
3301
- * \param[in] a - half. Is only being read.
3301
+ * \param[in] a - half. Is only being read.
3302
3302
  *
3303
3303
  * \returns half
3304
3304
  * \retval The natural logarithm of \p a.
@@ -3314,7 +3314,7 @@ __CUDA_FP16_DECL__ __half hlog(const __half a);
3314
3314
  *
3315
3315
  * \details Calculates \p half binary logarithm of input \p a in round-to-nearest-even
3316
3316
  * mode.
3317
- * \param[in] a - half. Is only being read.
3317
+ * \param[in] a - half. Is only being read.
3318
3318
  *
3319
3319
  * \returns half
3320
3320
  * \retval The binary logarithm of \p a.
@@ -3330,7 +3330,7 @@ __CUDA_FP16_DECL__ __half hlog2(const __half a);
3330
3330
  *
3331
3331
  * \details Calculates \p half decimal logarithm of input \p a in round-to-nearest-even
3332
3332
  * mode.
3333
- * \param[in] a - half. Is only being read.
3333
+ * \param[in] a - half. Is only being read.
3334
3334
  *
3335
3335
  * \returns half
3336
3336
  * \retval The decimal logarithm of \p a.
@@ -3347,7 +3347,7 @@ __CUDA_FP16_DECL__ __half hlog10(const __half a);
3347
3347
  *
3348
3348
  * \details Calculates \p half natural exponential function of input \p a in
3349
3349
  * round-to-nearest-even mode.
3350
- * \param[in] a - half. Is only being read.
3350
+ * \param[in] a - half. Is only being read.
3351
3351
  *
3352
3352
  * \returns half
3353
3353
  * \retval The natural exponential function on \p a.
@@ -3364,7 +3364,7 @@ __CUDA_FP16_DECL__ __half hexp(const __half a);
3364
3364
  *
3365
3365
  * \details Calculates \p half binary exponential function of input \p a in
3366
3366
  * round-to-nearest-even mode.
3367
- * \param[in] a - half. Is only being read.
3367
+ * \param[in] a - half. Is only being read.
3368
3368
  *
3369
3369
  * \returns half
3370
3370
  * \retval The binary exponential function on \p a.
@@ -3381,7 +3381,7 @@ __CUDA_FP16_DECL__ __half hexp2(const __half a);
3381
3381
  *
3382
3382
  * \details Calculates \p half decimal exponential function of input \p a in
3383
3383
  * round-to-nearest-even mode.
3384
- * \param[in] a - half. Is only being read.
3384
+ * \param[in] a - half. Is only being read.
3385
3385
  *
3386
3386
  * \returns half
3387
3387
  * \retval The decimal exponential function on \p a.
@@ -3396,7 +3396,7 @@ __CUDA_FP16_DECL__ __half hexp10(const __half a);
3396
3396
  * \brief Calculates \p half cosine in round-to-nearest-even mode.
3397
3397
  *
3398
3398
  * \details Calculates \p half cosine of input \p a in round-to-nearest-even mode.
3399
- * \param[in] a - half. Is only being read.
3399
+ * \param[in] a - half. Is only being read.
3400
3400
  *
3401
3401
  * \returns half
3402
3402
  * \retval The cosine of \p a.
@@ -3411,7 +3411,7 @@ __CUDA_FP16_DECL__ __half hcos(const __half a);
3411
3411
  * \brief Calculates \p half sine in round-to-nearest-even mode.
3412
3412
  *
3413
3413
  * \details Calculates \p half sine of input \p a in round-to-nearest-even mode.
3414
- * \param[in] a - half. Is only being read.
3414
+ * \param[in] a - half. Is only being read.
3415
3415
  *
3416
3416
  * \returns half
3417
3417
  * \retval The sine of \p a.
@@ -3427,7 +3427,7 @@ __CUDA_FP16_DECL__ __half hsin(const __half a);
3427
3427
  *
3428
3428
  * \details Calculates \p half2 square root of input vector \p a in round-to-nearest
3429
3429
  * mode.
3430
- * \param[in] a - half2. Is only being read.
3430
+ * \param[in] a - half2. Is only being read.
3431
3431
  *
3432
3432
  * \returns half2
3433
3433
  * \retval The elementwise square root on vector \p a.
@@ -3444,7 +3444,7 @@ __CUDA_FP16_DECL__ __half2 h2sqrt(const __half2 a);
3444
3444
  *
3445
3445
  * \details Calculates \p half2 reciprocal square root of input vector \p a in
3446
3446
  * round-to-nearest-even mode.
3447
- * \param[in] a - half2. Is only being read.
3447
+ * \param[in] a - half2. Is only being read.
3448
3448
  *
3449
3449
  * \returns half2
3450
3450
  * \retval The elementwise reciprocal square root on vector \p a.
@@ -3460,7 +3460,7 @@ __CUDA_FP16_DECL__ __half2 h2rsqrt(const __half2 a);
3460
3460
  *
3461
3461
  * \details Calculates \p half2 reciprocal of input vector \p a in round-to-nearest-even
3462
3462
  * mode.
3463
- * \param[in] a - half2. Is only being read.
3463
+ * \param[in] a - half2. Is only being read.
3464
3464
  *
3465
3465
  * \returns half2
3466
3466
  * \retval The elementwise reciprocal on vector \p a.
@@ -3477,7 +3477,7 @@ __CUDA_FP16_DECL__ __half2 h2rcp(const __half2 a);
3477
3477
  *
3478
3478
  * \details Calculates \p half2 natural logarithm of input vector \p a in
3479
3479
  * round-to-nearest-even mode.
3480
- * \param[in] a - half2. Is only being read.
3480
+ * \param[in] a - half2. Is only being read.
3481
3481
  *
3482
3482
  * \returns half2
3483
3483
  * \retval The elementwise natural logarithm on vector \p a.
@@ -3494,7 +3494,7 @@ __CUDA_FP16_DECL__ __half2 h2log(const __half2 a);
3494
3494
  *
3495
3495
  * \details Calculates \p half2 binary logarithm of input vector \p a in round-to-nearest
3496
3496
  * mode.
3497
- * \param[in] a - half2. Is only being read.
3497
+ * \param[in] a - half2. Is only being read.
3498
3498
  *
3499
3499
  * \returns half2
3500
3500
  * \retval The elementwise binary logarithm on vector \p a.
@@ -3511,7 +3511,7 @@ __CUDA_FP16_DECL__ __half2 h2log2(const __half2 a);
3511
3511
  *
3512
3512
  * \details Calculates \p half2 decimal logarithm of input vector \p a in
3513
3513
  * round-to-nearest-even mode.
3514
- * \param[in] a - half2. Is only being read.
3514
+ * \param[in] a - half2. Is only being read.
3515
3515
  *
3516
3516
  * \returns half2
3517
3517
  * \retval The elementwise decimal logarithm on vector \p a.
@@ -3528,7 +3528,7 @@ __CUDA_FP16_DECL__ __half2 h2log10(const __half2 a);
3528
3528
  *
3529
3529
  * \details Calculates \p half2 exponential function of input vector \p a in
3530
3530
  * round-to-nearest-even mode.
3531
- * \param[in] a - half2. Is only being read.
3531
+ * \param[in] a - half2. Is only being read.
3532
3532
  *
3533
3533
  * \returns half2
3534
3534
  * \retval The elementwise exponential function on vector \p a.
@@ -3545,7 +3545,7 @@ __CUDA_FP16_DECL__ __half2 h2exp(const __half2 a);
3545
3545
  *
3546
3546
  * \details Calculates \p half2 binary exponential function of input vector \p a in
3547
3547
  * round-to-nearest-even mode.
3548
- * \param[in] a - half2. Is only being read.
3548
+ * \param[in] a - half2. Is only being read.
3549
3549
  *
3550
3550
  * \returns half2
3551
3551
  * \retval The elementwise binary exponential function on vector \p a.
@@ -3559,11 +3559,11 @@ __CUDA_FP16_DECL__ __half2 h2exp2(const __half2 a);
3559
3559
  * \ingroup CUDA_MATH__HALF2_FUNCTIONS
3560
3560
  * \brief Calculates \p half2 vector decimal exponential function in
3561
3561
  * round-to-nearest-even mode.
3562
- *
3562
+ *
3563
3563
  * \details Calculates \p half2 decimal exponential function of input vector \p a in
3564
3564
  * round-to-nearest-even mode.
3565
- * \param[in] a - half2. Is only being read.
3566
- *
3565
+ * \param[in] a - half2. Is only being read.
3566
+ *
3567
3567
  * \returns half2
3568
3568
  * \retval The elementwise decimal exponential function on vector \p a.
3569
3569
  * \internal
@@ -3575,11 +3575,11 @@ __CUDA_FP16_DECL__ __half2 h2exp10(const __half2 a);
3575
3575
  /**
3576
3576
  * \ingroup CUDA_MATH__HALF2_FUNCTIONS
3577
3577
  * \brief Calculates \p half2 vector cosine in round-to-nearest-even mode.
3578
- *
3578
+ *
3579
3579
  * \details Calculates \p half2 cosine of input vector \p a in round-to-nearest-even
3580
3580
  * mode.
3581
- * \param[in] a - half2. Is only being read.
3582
- *
3581
+ * \param[in] a - half2. Is only being read.
3582
+ *
3583
3583
  * \returns half2
3584
3584
  * \retval The elementwise cosine on vector \p a.
3585
3585
  * \internal
@@ -3591,10 +3591,10 @@ __CUDA_FP16_DECL__ __half2 h2cos(const __half2 a);
3591
3591
  /**
3592
3592
  * \ingroup CUDA_MATH__HALF2_FUNCTIONS
3593
3593
  * \brief Calculates \p half2 vector sine in round-to-nearest-even mode.
3594
- *
3594
+ *
3595
3595
  * \details Calculates \p half2 sine of input vector \p a in round-to-nearest-even mode.
3596
- * \param[in] a - half2. Is only being read.
3597
- *
3596
+ * \param[in] a - half2. Is only being read.
3597
+ *
3598
3598
  * \returns half2
3599
3599
  * \retval The elementwise sine on vector \p a.
3600
3600
  * \internal