numba-cuda 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +246 -114
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
- numba_cuda/numba/cuda/cuda_paths.py +293 -99
- numba_cuda/numba/cuda/cudadecl.py +93 -79
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +27 -3
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +146 -30
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +296 -275
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +99 -7
- numba_cuda/numba/cuda/decorators.py +87 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +68 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +55 -1
- numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
- numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
- numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +1090 -927
- numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +468 -319
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
- numba_cuda/numba/cuda/intrinsics.py +203 -28
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/lowering.py +43 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +134 -108
- numba_cuda/numba/cuda/target.py +92 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +10 -7
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +59 -23
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +77 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +24 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +21 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +81 -30
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +5 -3
- numba_cuda/numba/cuda/vectorizers.py +38 -33
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
- numba_cuda-0.10.0.dist-info/RECORD +263 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.1.dist-info/RECORD +0 -251
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0
@@ -99,6 +99,9 @@
|
|
99
99
|
#ifndef __CUDA_FP16_H__
|
100
100
|
#define __CUDA_FP16_H__
|
101
101
|
|
102
|
+
#define ___CUDA_FP16_STRINGIFY_INNERMOST(x) #x
|
103
|
+
#define __CUDA_FP16_STRINGIFY(x) ___CUDA_FP16_STRINGIFY_INNERMOST(x)
|
104
|
+
|
102
105
|
#if defined(__cplusplus)
|
103
106
|
#if defined(__CUDACC__)
|
104
107
|
#define __CUDA_FP16_DECL__ static __device__ __inline__
|
@@ -112,33 +115,33 @@
|
|
112
115
|
/* Forward-declaration of structures defined in "cuda_fp16.hpp" */
|
113
116
|
|
114
117
|
/**
|
115
|
-
* \brief half datatype
|
116
|
-
*
|
117
|
-
* \details This structure implements the datatype for storing
|
118
|
-
* half-precision floating-point numbers. The structure implements
|
119
|
-
* assignment operators and type conversions.
|
120
|
-
* 16 bits are being used in total: 1 sign bit, 5 bits for the exponent,
|
121
|
-
* and the significand is being stored in 10 bits.
|
122
|
-
* The total precision is 11 bits. There are 15361 representable
|
123
|
-
* numbers within the interval [0.0, 1.0], endpoints included.
|
124
|
-
* On average we have log10(2**11) ~ 3.311 decimal digits.
|
125
|
-
*
|
118
|
+
* \brief half datatype
|
119
|
+
*
|
120
|
+
* \details This structure implements the datatype for storing
|
121
|
+
* half-precision floating-point numbers. The structure implements
|
122
|
+
* assignment operators and type conversions.
|
123
|
+
* 16 bits are being used in total: 1 sign bit, 5 bits for the exponent,
|
124
|
+
* and the significand is being stored in 10 bits.
|
125
|
+
* The total precision is 11 bits. There are 15361 representable
|
126
|
+
* numbers within the interval [0.0, 1.0], endpoints included.
|
127
|
+
* On average we have log10(2**11) ~ 3.311 decimal digits.
|
128
|
+
*
|
126
129
|
* \internal
|
127
|
-
* \req IEEE 754-2008 compliant implementation of half-precision
|
128
|
-
* floating-point numbers.
|
130
|
+
* \req IEEE 754-2008 compliant implementation of half-precision
|
131
|
+
* floating-point numbers.
|
129
132
|
* \endinternal
|
130
133
|
*/
|
131
134
|
struct __half;
|
132
135
|
|
133
136
|
/**
|
134
137
|
* \brief half2 datatype
|
135
|
-
*
|
136
|
-
* \details This structure implements the datatype for storing two
|
137
|
-
* half-precision floating-point numbers.
|
138
|
-
* The structure implements assignment operators and type conversions.
|
139
|
-
*
|
138
|
+
*
|
139
|
+
* \details This structure implements the datatype for storing two
|
140
|
+
* half-precision floating-point numbers.
|
141
|
+
* The structure implements assignment operators and type conversions.
|
142
|
+
*
|
140
143
|
* \internal
|
141
|
-
* \req Vectorified version of half.
|
144
|
+
* \req Vectorified version of half.
|
142
145
|
* \endinternal
|
143
146
|
*/
|
144
147
|
struct __half2;
|
@@ -151,7 +154,7 @@ struct __half2;
|
|
151
154
|
* \details Converts double number \p a to half precision in round-to-nearest-even mode.
|
152
155
|
* \param[in] a - double. Is only being read.
|
153
156
|
* \returns half
|
154
|
-
* \
|
157
|
+
* - \p a converted to half.
|
155
158
|
* \internal
|
156
159
|
* \exception-guarantee no-throw guarantee
|
157
160
|
* \behavior reentrant, thread safe
|
@@ -161,12 +164,12 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __double2half(const double a);
|
|
161
164
|
/**
|
162
165
|
* \ingroup CUDA_MATH__HALF_MISC
|
163
166
|
* \brief Converts float number to half precision in round-to-nearest-even mode
|
164
|
-
* and returns \p half with converted value.
|
165
|
-
*
|
166
|
-
* \details Converts float number \p a to half precision in round-to-nearest-even mode.
|
167
|
-
* \param[in] a - float. Is only being read.
|
167
|
+
* and returns \p half with converted value.
|
168
|
+
*
|
169
|
+
* \details Converts float number \p a to half precision in round-to-nearest-even mode.
|
170
|
+
* \param[in] a - float. Is only being read.
|
168
171
|
* \returns half
|
169
|
-
* \
|
172
|
+
* - \p a converted to half.
|
170
173
|
* \internal
|
171
174
|
* \exception-guarantee no-throw guarantee
|
172
175
|
* \behavior reentrant, thread safe
|
@@ -179,9 +182,9 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __float2half(const float a);
|
|
179
182
|
* and returns \p half with converted value.
|
180
183
|
*
|
181
184
|
* \details Converts float number \p a to half precision in round-to-nearest-even mode.
|
182
|
-
* \param[in] a - float. Is only being read.
|
185
|
+
* \param[in] a - float. Is only being read.
|
183
186
|
* \returns half
|
184
|
-
* \
|
187
|
+
* - \p a converted to half.
|
185
188
|
* \internal
|
186
189
|
* \exception-guarantee no-throw guarantee
|
187
190
|
* \behavior reentrant, thread safe
|
@@ -192,11 +195,11 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __float2half_rn(const float a);
|
|
192
195
|
* \ingroup CUDA_MATH__HALF_MISC
|
193
196
|
* \brief Converts float number to half precision in round-towards-zero mode
|
194
197
|
* and returns \p half with converted value.
|
195
|
-
*
|
198
|
+
*
|
196
199
|
* \details Converts float number \p a to half precision in round-towards-zero mode.
|
197
|
-
* \param[in] a - float. Is only being read.
|
200
|
+
* \param[in] a - float. Is only being read.
|
198
201
|
* \returns half
|
199
|
-
* \
|
202
|
+
* - \p a converted to half.
|
200
203
|
* \internal
|
201
204
|
* \exception-guarantee no-throw guarantee
|
202
205
|
* \behavior reentrant, thread safe
|
@@ -207,12 +210,12 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __float2half_rz(const float a);
|
|
207
210
|
* \ingroup CUDA_MATH__HALF_MISC
|
208
211
|
* \brief Converts float number to half precision in round-down mode
|
209
212
|
* and returns \p half with converted value.
|
210
|
-
*
|
213
|
+
*
|
211
214
|
* \details Converts float number \p a to half precision in round-down mode.
|
212
|
-
* \param[in] a - float. Is only being read.
|
213
|
-
*
|
215
|
+
* \param[in] a - float. Is only being read.
|
216
|
+
*
|
214
217
|
* \returns half
|
215
|
-
* \
|
218
|
+
* - \p a converted to half.
|
216
219
|
* \internal
|
217
220
|
* \exception-guarantee no-throw guarantee
|
218
221
|
* \behavior reentrant, thread safe
|
@@ -223,12 +226,12 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __float2half_rd(const float a);
|
|
223
226
|
* \ingroup CUDA_MATH__HALF_MISC
|
224
227
|
* \brief Converts float number to half precision in round-up mode
|
225
228
|
* and returns \p half with converted value.
|
226
|
-
*
|
229
|
+
*
|
227
230
|
* \details Converts float number \p a to half precision in round-up mode.
|
228
|
-
* \param[in] a - float. Is only being read.
|
229
|
-
*
|
231
|
+
* \param[in] a - float. Is only being read.
|
232
|
+
*
|
230
233
|
* \returns half
|
231
|
-
* \
|
234
|
+
* - \p a converted to half.
|
232
235
|
* \internal
|
233
236
|
* \exception-guarantee no-throw guarantee
|
234
237
|
* \behavior reentrant, thread safe
|
@@ -238,12 +241,12 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __float2half_ru(const float a);
|
|
238
241
|
/**
|
239
242
|
* \ingroup CUDA_MATH__HALF_MISC
|
240
243
|
* \brief Converts \p half number to float.
|
241
|
-
*
|
244
|
+
*
|
242
245
|
* \details Converts half number \p a to float.
|
243
|
-
* \param[in] a - float. Is only being read.
|
244
|
-
*
|
246
|
+
* \param[in] a - float. Is only being read.
|
247
|
+
*
|
245
248
|
* \returns float
|
246
|
-
* \
|
249
|
+
* - \p a converted to float.
|
247
250
|
* \internal
|
248
251
|
* \exception-guarantee no-throw guarantee
|
249
252
|
* \behavior reentrant, thread safe
|
@@ -257,10 +260,10 @@ __CUDA_HOSTDEVICE_FP16_DECL__ float __half2float(const __half a);
|
|
257
260
|
*
|
258
261
|
* \details Converts input \p a to half precision in round-to-nearest-even mode and
|
259
262
|
* populates both halves of \p half2 with converted value.
|
260
|
-
* \param[in] a - float. Is only being read.
|
263
|
+
* \param[in] a - float. Is only being read.
|
261
264
|
*
|
262
265
|
* \returns half2
|
263
|
-
*
|
266
|
+
* - The \p half2 value with both halves equal to the converted half
|
264
267
|
* precision number.
|
265
268
|
* \internal
|
266
269
|
* \exception-guarantee no-throw guarantee
|
@@ -277,11 +280,11 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half2 __float2half2_rn(const float a);
|
|
277
280
|
* and combines the results into one \p half2 number. Low 16 bits of the return
|
278
281
|
* value correspond to the input \p a, high 16 bits correspond to the input \p
|
279
282
|
* b.
|
280
|
-
* \param[in] a - float. Is only being read.
|
281
|
-
* \param[in] b - float. Is only being read.
|
282
|
-
*
|
283
|
+
* \param[in] a - float. Is only being read.
|
284
|
+
* \param[in] b - float. Is only being read.
|
285
|
+
*
|
283
286
|
* \returns half2
|
284
|
-
*
|
287
|
+
* - The \p half2 value with corresponding halves equal to the
|
285
288
|
* converted input floats.
|
286
289
|
* \internal
|
287
290
|
* \exception-guarantee no-throw guarantee
|
@@ -292,13 +295,13 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half2 __floats2half2_rn(const float a, const flo
|
|
292
295
|
/**
|
293
296
|
* \ingroup CUDA_MATH__HALF_MISC
|
294
297
|
* \brief Converts low 16 bits of \p half2 to float and returns the result
|
295
|
-
*
|
298
|
+
*
|
296
299
|
* \details Converts low 16 bits of \p half2 input \p a to 32-bit floating-point number
|
297
300
|
* and returns the result.
|
298
|
-
* \param[in] a - half2. Is only being read.
|
299
|
-
*
|
301
|
+
* \param[in] a - half2. Is only being read.
|
302
|
+
*
|
300
303
|
* \returns float
|
301
|
-
*
|
304
|
+
* - The low 16 bits of \p a converted to float.
|
302
305
|
* \internal
|
303
306
|
* \exception-guarantee no-throw guarantee
|
304
307
|
* \behavior reentrant, thread safe
|
@@ -308,34 +311,132 @@ __CUDA_HOSTDEVICE_FP16_DECL__ float __low2float(const __half2 a);
|
|
308
311
|
/**
|
309
312
|
* \ingroup CUDA_MATH__HALF_MISC
|
310
313
|
* \brief Converts high 16 bits of \p half2 to float and returns the result
|
311
|
-
*
|
314
|
+
*
|
312
315
|
* \details Converts high 16 bits of \p half2 input \p a to 32-bit floating-point number
|
313
316
|
* and returns the result.
|
314
|
-
* \param[in] a - half2. Is only being read.
|
315
|
-
*
|
317
|
+
* \param[in] a - half2. Is only being read.
|
318
|
+
*
|
316
319
|
* \returns float
|
317
|
-
*
|
320
|
+
* - The high 16 bits of \p a converted to float.
|
318
321
|
* \internal
|
319
322
|
* \exception-guarantee no-throw guarantee
|
320
323
|
* \behavior reentrant, thread safe
|
321
324
|
* \endinternal
|
322
325
|
*/
|
323
326
|
__CUDA_HOSTDEVICE_FP16_DECL__ float __high2float(const __half2 a);
|
327
|
+
/**
|
328
|
+
* \ingroup CUDA_MATH__HALF_MISC
|
329
|
+
* \brief Convert a half to a signed short integer in round-towards-zero mode.
|
330
|
+
*
|
331
|
+
* \details Convert the half-precision floating-point value \p h to a signed short
|
332
|
+
* integer in round-towards-zero mode. NaN inputs are converted to 0.
|
333
|
+
* \param[in] h - half. Is only being read.
|
334
|
+
*
|
335
|
+
* \returns short int
|
336
|
+
* - \p h converted to a signed short integer.
|
337
|
+
* \internal
|
338
|
+
* \exception-guarantee no-throw guarantee
|
339
|
+
* \behavior reentrant, thread safe
|
340
|
+
* \endinternal
|
341
|
+
*/
|
342
|
+
__CUDA_HOSTDEVICE_FP16_DECL__ short int __half2short_rz(const __half h);
|
343
|
+
/**
|
344
|
+
* \ingroup CUDA_MATH__HALF_MISC
|
345
|
+
* \brief Convert a half to an unsigned short integer in round-towards-zero
|
346
|
+
* mode.
|
347
|
+
*
|
348
|
+
* \details Convert the half-precision floating-point value \p h to an unsigned short
|
349
|
+
* integer in round-towards-zero mode. NaN inputs are converted to 0.
|
350
|
+
* \param[in] h - half. Is only being read.
|
351
|
+
*
|
352
|
+
* \returns unsigned short int
|
353
|
+
* - \p h converted to an unsigned short integer.
|
354
|
+
* \internal
|
355
|
+
* \exception-guarantee no-throw guarantee
|
356
|
+
* \behavior reentrant, thread safe
|
357
|
+
* \endinternal
|
358
|
+
*/
|
359
|
+
__CUDA_HOSTDEVICE_FP16_DECL__ unsigned short int __half2ushort_rz(const __half h);
|
360
|
+
/**
|
361
|
+
* \ingroup CUDA_MATH__HALF_MISC
|
362
|
+
* \brief Convert a half to a signed integer in round-towards-zero mode.
|
363
|
+
*
|
364
|
+
* \details Convert the half-precision floating-point value \p h to a signed integer in
|
365
|
+
* round-towards-zero mode. NaN inputs are converted to 0.
|
366
|
+
* \param[in] h - half. Is only being read.
|
367
|
+
*
|
368
|
+
* \returns int
|
369
|
+
* - \p h converted to a signed integer.
|
370
|
+
* \internal
|
371
|
+
* \exception-guarantee no-throw guarantee
|
372
|
+
* \behavior reentrant, thread safe
|
373
|
+
* \endinternal
|
374
|
+
*/
|
375
|
+
__CUDA_HOSTDEVICE_FP16_DECL__ int __half2int_rz(const __half h);
|
376
|
+
/**
|
377
|
+
* \ingroup CUDA_MATH__HALF_MISC
|
378
|
+
* \brief Convert a half to an unsigned integer in round-towards-zero mode.
|
379
|
+
*
|
380
|
+
* \details Convert the half-precision floating-point value \p h to an unsigned integer
|
381
|
+
* in round-towards-zero mode. NaN inputs are converted to 0.
|
382
|
+
* \param[in] h - half. Is only being read.
|
383
|
+
*
|
384
|
+
* \returns unsigned int
|
385
|
+
* - \p h converted to an unsigned integer.
|
386
|
+
* \internal
|
387
|
+
* \exception-guarantee no-throw guarantee
|
388
|
+
* \behavior reentrant, thread safe
|
389
|
+
* \endinternal
|
390
|
+
*/
|
391
|
+
__CUDA_HOSTDEVICE_FP16_DECL__ unsigned int __half2uint_rz(const __half h);
|
392
|
+
/**
|
393
|
+
* \ingroup CUDA_MATH__HALF_MISC
|
394
|
+
* \brief Convert a half to a signed 64-bit integer in round-towards-zero mode.
|
395
|
+
*
|
396
|
+
* \details Convert the half-precision floating-point value \p h to a signed 64-bit
|
397
|
+
* integer in round-towards-zero mode. NaN inputs return a long long int with hex value of 0x8000000000000000.
|
398
|
+
* \param[in] h - half. Is only being read.
|
399
|
+
*
|
400
|
+
* \returns long long int
|
401
|
+
* - \p h converted to a signed 64-bit integer.
|
402
|
+
* \internal
|
403
|
+
* \exception-guarantee no-throw guarantee
|
404
|
+
* \behavior reentrant, thread safe
|
405
|
+
* \endinternal
|
406
|
+
*/
|
407
|
+
__CUDA_HOSTDEVICE_FP16_DECL__ long long int __half2ll_rz(const __half h);
|
408
|
+
/**
|
409
|
+
* \ingroup CUDA_MATH__HALF_MISC
|
410
|
+
* \brief Convert a half to an unsigned 64-bit integer in round-towards-zero
|
411
|
+
* mode.
|
412
|
+
*
|
413
|
+
* \details Convert the half-precision floating-point value \p h to an unsigned 64-bit
|
414
|
+
* integer in round-towards-zero mode. NaN inputs return 0x8000000000000000.
|
415
|
+
* \param[in] h - half. Is only being read.
|
416
|
+
*
|
417
|
+
* \returns unsigned long long int
|
418
|
+
* - \p h converted to an unsigned 64-bit integer.
|
419
|
+
* \internal
|
420
|
+
* \exception-guarantee no-throw guarantee
|
421
|
+
* \behavior reentrant, thread safe
|
422
|
+
* \endinternal
|
423
|
+
*/
|
424
|
+
__CUDA_HOSTDEVICE_FP16_DECL__ unsigned long long int __half2ull_rz(const __half h);
|
324
425
|
|
325
426
|
#if defined(__CUDACC__)
|
326
427
|
/**
|
327
428
|
* \ingroup CUDA_MATH__HALF_MISC
|
328
429
|
* \brief Converts both components of float2 number to half precision in
|
329
430
|
* round-to-nearest-even mode and returns \p half2 with converted values.
|
330
|
-
*
|
431
|
+
*
|
331
432
|
* \details Converts both components of float2 to half precision in round-to-nearest
|
332
433
|
* mode and combines the results into one \p half2 number. Low 16 bits of the
|
333
434
|
* return value correspond to \p a.x and high 16 bits of the return value
|
334
435
|
* correspond to \p a.y.
|
335
|
-
* \param[in] a - float2. Is only being read.
|
336
|
-
*
|
436
|
+
* \param[in] a - float2. Is only being read.
|
437
|
+
*
|
337
438
|
* \returns half2
|
338
|
-
*
|
439
|
+
* - The \p half2 which has corresponding halves equal to the
|
339
440
|
* converted float2 components.
|
340
441
|
* \internal
|
341
442
|
* \exception-guarantee no-throw guarantee
|
@@ -346,13 +447,13 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half2 __float22half2_rn(const float2 a);
|
|
346
447
|
/**
|
347
448
|
* \ingroup CUDA_MATH__HALF_MISC
|
348
449
|
* \brief Converts both halves of \p half2 to float2 and returns the result.
|
349
|
-
*
|
450
|
+
*
|
350
451
|
* \details Converts both halves of \p half2 input \p a to float2 and returns the
|
351
452
|
* result.
|
352
|
-
* \param[in] a - half2. Is only being read.
|
353
|
-
*
|
453
|
+
* \param[in] a - half2. Is only being read.
|
454
|
+
*
|
354
455
|
* \returns float2
|
355
|
-
* \
|
456
|
+
* - \p a converted to float2.
|
356
457
|
* \internal
|
357
458
|
* \exception-guarantee no-throw guarantee
|
358
459
|
* \behavior reentrant, thread safe
|
@@ -362,13 +463,13 @@ __CUDA_HOSTDEVICE_FP16_DECL__ float2 __half22float2(const __half2 a);
|
|
362
463
|
/**
|
363
464
|
* \ingroup CUDA_MATH__HALF_MISC
|
364
465
|
* \brief Convert a half to a signed integer in round-to-nearest-even mode.
|
365
|
-
*
|
466
|
+
*
|
366
467
|
* \details Convert the half-precision floating-point value \p h to a signed integer in
|
367
|
-
* round-to-nearest-even mode.
|
368
|
-
* \param[in] h - half. Is only being read.
|
369
|
-
*
|
468
|
+
* round-to-nearest-even mode. NaN inputs are converted to 0.
|
469
|
+
* \param[in] h - half. Is only being read.
|
470
|
+
*
|
370
471
|
* \returns int
|
371
|
-
* \
|
472
|
+
* - \p h converted to a signed integer.
|
372
473
|
* \internal
|
373
474
|
* \exception-guarantee no-throw guarantee
|
374
475
|
* \behavior reentrant, thread safe
|
@@ -377,30 +478,14 @@ __CUDA_HOSTDEVICE_FP16_DECL__ float2 __half22float2(const __half2 a);
|
|
377
478
|
__CUDA_FP16_DECL__ int __half2int_rn(const __half h);
|
378
479
|
/**
|
379
480
|
* \ingroup CUDA_MATH__HALF_MISC
|
380
|
-
* \brief Convert a half to a signed integer in round-towards-zero mode.
|
381
|
-
*
|
382
|
-
* \details Convert the half-precision floating-point value \p h to a signed integer in
|
383
|
-
* round-towards-zero mode.
|
384
|
-
* \param[in] h - half. Is only being read.
|
385
|
-
*
|
386
|
-
* \returns int
|
387
|
-
* \retval h converted to a signed integer.
|
388
|
-
* \internal
|
389
|
-
* \exception-guarantee no-throw guarantee
|
390
|
-
* \behavior reentrant, thread safe
|
391
|
-
* \endinternal
|
392
|
-
*/
|
393
|
-
__CUDA_HOSTDEVICE_FP16_DECL__ int __half2int_rz(const __half h);
|
394
|
-
/**
|
395
|
-
* \ingroup CUDA_MATH__HALF_MISC
|
396
481
|
* \brief Convert a half to a signed integer in round-down mode.
|
397
|
-
*
|
482
|
+
*
|
398
483
|
* \details Convert the half-precision floating-point value \p h to a signed integer in
|
399
|
-
* round-down mode.
|
400
|
-
* \param[in] h - half. Is only being read.
|
401
|
-
*
|
484
|
+
* round-down mode. NaN inputs are converted to 0.
|
485
|
+
* \param[in] h - half. Is only being read.
|
486
|
+
*
|
402
487
|
* \returns int
|
403
|
-
* \
|
488
|
+
* - \p h converted to a signed integer.
|
404
489
|
* \internal
|
405
490
|
* \exception-guarantee no-throw guarantee
|
406
491
|
* \behavior reentrant, thread safe
|
@@ -410,13 +495,13 @@ __CUDA_FP16_DECL__ int __half2int_rd(const __half h);
|
|
410
495
|
/**
|
411
496
|
* \ingroup CUDA_MATH__HALF_MISC
|
412
497
|
* \brief Convert a half to a signed integer in round-up mode.
|
413
|
-
*
|
498
|
+
*
|
414
499
|
* \details Convert the half-precision floating-point value \p h to a signed integer in
|
415
|
-
* round-up mode.
|
416
|
-
* \param[in] h - half. Is only being read.
|
417
|
-
*
|
500
|
+
* round-up mode. NaN inputs are converted to 0.
|
501
|
+
* \param[in] h - half. Is only being read.
|
502
|
+
*
|
418
503
|
* \returns int
|
419
|
-
* \
|
504
|
+
* - \p h converted to a signed integer.
|
420
505
|
* \internal
|
421
506
|
* \exception-guarantee no-throw guarantee
|
422
507
|
* \behavior reentrant, thread safe
|
@@ -427,13 +512,13 @@ __CUDA_FP16_DECL__ int __half2int_ru(const __half h);
|
|
427
512
|
/**
|
428
513
|
* \ingroup CUDA_MATH__HALF_MISC
|
429
514
|
* \brief Convert a signed integer to a half in round-to-nearest-even mode.
|
430
|
-
*
|
515
|
+
*
|
431
516
|
* \details Convert the signed integer value \p i to a half-precision floating-point
|
432
517
|
* value in round-to-nearest-even mode.
|
433
|
-
* \param[in] i - int. Is only being read.
|
434
|
-
*
|
518
|
+
* \param[in] i - int. Is only being read.
|
519
|
+
*
|
435
520
|
* \returns half
|
436
|
-
* \
|
521
|
+
* - \p i converted to half.
|
437
522
|
* \internal
|
438
523
|
* \exception-guarantee no-throw guarantee
|
439
524
|
* \behavior reentrant, thread safe
|
@@ -443,13 +528,13 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __int2half_rn(const int i);
|
|
443
528
|
/**
|
444
529
|
* \ingroup CUDA_MATH__HALF_MISC
|
445
530
|
* \brief Convert a signed integer to a half in round-towards-zero mode.
|
446
|
-
*
|
531
|
+
*
|
447
532
|
* \details Convert the signed integer value \p i to a half-precision floating-point
|
448
533
|
* value in round-towards-zero mode.
|
449
|
-
* \param[in] i - int. Is only being read.
|
450
|
-
*
|
534
|
+
* \param[in] i - int. Is only being read.
|
535
|
+
*
|
451
536
|
* \returns half
|
452
|
-
* \
|
537
|
+
* - \p i converted to half.
|
453
538
|
* \internal
|
454
539
|
* \exception-guarantee no-throw guarantee
|
455
540
|
* \behavior reentrant, thread safe
|
@@ -459,13 +544,13 @@ __CUDA_FP16_DECL__ __half __int2half_rz(const int i);
|
|
459
544
|
/**
|
460
545
|
* \ingroup CUDA_MATH__HALF_MISC
|
461
546
|
* \brief Convert a signed integer to a half in round-down mode.
|
462
|
-
*
|
547
|
+
*
|
463
548
|
* \details Convert the signed integer value \p i to a half-precision floating-point
|
464
549
|
* value in round-down mode.
|
465
|
-
* \param[in] i - int. Is only being read.
|
466
|
-
*
|
550
|
+
* \param[in] i - int. Is only being read.
|
551
|
+
*
|
467
552
|
* \returns half
|
468
|
-
* \
|
553
|
+
* - \p i converted to half.
|
469
554
|
* \internal
|
470
555
|
* \exception-guarantee no-throw guarantee
|
471
556
|
* \behavior reentrant, thread safe
|
@@ -475,13 +560,13 @@ __CUDA_FP16_DECL__ __half __int2half_rd(const int i);
|
|
475
560
|
/**
|
476
561
|
* \ingroup CUDA_MATH__HALF_MISC
|
477
562
|
* \brief Convert a signed integer to a half in round-up mode.
|
478
|
-
*
|
563
|
+
*
|
479
564
|
* \details Convert the signed integer value \p i to a half-precision floating-point
|
480
565
|
* value in round-up mode.
|
481
|
-
* \param[in] i - int. Is only being read.
|
482
|
-
*
|
566
|
+
* \param[in] i - int. Is only being read.
|
567
|
+
*
|
483
568
|
* \returns half
|
484
|
-
* \
|
569
|
+
* - \p i converted to half.
|
485
570
|
* \internal
|
486
571
|
* \exception-guarantee no-throw guarantee
|
487
572
|
* \behavior reentrant, thread safe
|
@@ -493,13 +578,13 @@ __CUDA_FP16_DECL__ __half __int2half_ru(const int i);
|
|
493
578
|
* \ingroup CUDA_MATH__HALF_MISC
|
494
579
|
* \brief Convert a half to a signed short integer in round-to-nearest-even
|
495
580
|
* mode.
|
496
|
-
*
|
581
|
+
*
|
497
582
|
* \details Convert the half-precision floating-point value \p h to a signed short
|
498
|
-
* integer in round-to-nearest-even mode.
|
499
|
-
* \param[in] h - half. Is only being read.
|
500
|
-
*
|
583
|
+
* integer in round-to-nearest-even mode. NaN inputs are converted to 0.
|
584
|
+
* \param[in] h - half. Is only being read.
|
585
|
+
*
|
501
586
|
* \returns short int
|
502
|
-
* \
|
587
|
+
* - \p h converted to a signed short integer.
|
503
588
|
* \internal
|
504
589
|
* \exception-guarantee no-throw guarantee
|
505
590
|
* \behavior reentrant, thread safe
|
@@ -508,30 +593,14 @@ __CUDA_FP16_DECL__ __half __int2half_ru(const int i);
|
|
508
593
|
__CUDA_FP16_DECL__ short int __half2short_rn(const __half h);
|
509
594
|
/**
|
510
595
|
* \ingroup CUDA_MATH__HALF_MISC
|
511
|
-
* \brief Convert a half to a signed short integer in round-towards-zero mode.
|
512
|
-
*
|
513
|
-
* \details Convert the half-precision floating-point value \p h to a signed short
|
514
|
-
* integer in round-towards-zero mode.
|
515
|
-
* \param[in] h - half. Is only being read.
|
516
|
-
*
|
517
|
-
* \returns short int
|
518
|
-
* \retval h converted to a signed short integer.
|
519
|
-
* \internal
|
520
|
-
* \exception-guarantee no-throw guarantee
|
521
|
-
* \behavior reentrant, thread safe
|
522
|
-
* \endinternal
|
523
|
-
*/
|
524
|
-
__CUDA_HOSTDEVICE_FP16_DECL__ short int __half2short_rz(const __half h);
|
525
|
-
/**
|
526
|
-
* \ingroup CUDA_MATH__HALF_MISC
|
527
596
|
* \brief Convert a half to a signed short integer in round-down mode.
|
528
|
-
*
|
597
|
+
*
|
529
598
|
* \details Convert the half-precision floating-point value \p h to a signed short
|
530
|
-
* integer in round-down mode.
|
531
|
-
* \param[in] h - half. Is only being read.
|
532
|
-
*
|
599
|
+
* integer in round-down mode. NaN inputs are converted to 0.
|
600
|
+
* \param[in] h - half. Is only being read.
|
601
|
+
*
|
533
602
|
* \returns short int
|
534
|
-
* \
|
603
|
+
* - \p h converted to a signed short integer.
|
535
604
|
* \internal
|
536
605
|
* \exception-guarantee no-throw guarantee
|
537
606
|
* \behavior reentrant, thread safe
|
@@ -541,13 +610,13 @@ __CUDA_FP16_DECL__ short int __half2short_rd(const __half h);
|
|
541
610
|
/**
|
542
611
|
* \ingroup CUDA_MATH__HALF_MISC
|
543
612
|
* \brief Convert a half to a signed short integer in round-up mode.
|
544
|
-
*
|
613
|
+
*
|
545
614
|
* \details Convert the half-precision floating-point value \p h to a signed short
|
546
|
-
* integer in round-up mode.
|
547
|
-
* \param[in] h - half. Is only being read.
|
548
|
-
*
|
615
|
+
* integer in round-up mode. NaN inputs are converted to 0.
|
616
|
+
* \param[in] h - half. Is only being read.
|
617
|
+
*
|
549
618
|
* \returns short int
|
550
|
-
* \
|
619
|
+
* - \p h converted to a signed short integer.
|
551
620
|
* \internal
|
552
621
|
* \exception-guarantee no-throw guarantee
|
553
622
|
* \behavior reentrant, thread safe
|
@@ -559,13 +628,13 @@ __CUDA_FP16_DECL__ short int __half2short_ru(const __half h);
|
|
559
628
|
* \ingroup CUDA_MATH__HALF_MISC
|
560
629
|
* \brief Convert a signed short integer to a half in round-to-nearest-even
|
561
630
|
* mode.
|
562
|
-
*
|
631
|
+
*
|
563
632
|
* \details Convert the signed short integer value \p i to a half-precision floating-point
|
564
633
|
* value in round-to-nearest-even mode.
|
565
|
-
* \param[in] i - short int. Is only being read.
|
566
|
-
*
|
634
|
+
* \param[in] i - short int. Is only being read.
|
635
|
+
*
|
567
636
|
* \returns half
|
568
|
-
* \
|
637
|
+
* - \p i converted to half.
|
569
638
|
* \internal
|
570
639
|
* \exception-guarantee no-throw guarantee
|
571
640
|
* \behavior reentrant, thread safe
|
@@ -575,13 +644,13 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __short2half_rn(const short int i);
|
|
575
644
|
/**
|
576
645
|
* \ingroup CUDA_MATH__HALF_MISC
|
577
646
|
* \brief Convert a signed short integer to a half in round-towards-zero mode.
|
578
|
-
*
|
647
|
+
*
|
579
648
|
* \details Convert the signed short integer value \p i to a half-precision floating-point
|
580
649
|
* value in round-towards-zero mode.
|
581
|
-
* \param[in] i - short int. Is only being read.
|
582
|
-
*
|
650
|
+
* \param[in] i - short int. Is only being read.
|
651
|
+
*
|
583
652
|
* \returns half
|
584
|
-
* \
|
653
|
+
* - \p i converted to half.
|
585
654
|
* \internal
|
586
655
|
* \exception-guarantee no-throw guarantee
|
587
656
|
* \behavior reentrant, thread safe
|
@@ -591,13 +660,13 @@ __CUDA_FP16_DECL__ __half __short2half_rz(const short int i);
|
|
591
660
|
/**
|
592
661
|
* \ingroup CUDA_MATH__HALF_MISC
|
593
662
|
* \brief Convert a signed short integer to a half in round-down mode.
|
594
|
-
*
|
663
|
+
*
|
595
664
|
* \details Convert the signed short integer value \p i to a half-precision floating-point
|
596
665
|
* value in round-down mode.
|
597
|
-
* \param[in] i - short int. Is only being read.
|
598
|
-
*
|
666
|
+
* \param[in] i - short int. Is only being read.
|
667
|
+
*
|
599
668
|
* \returns half
|
600
|
-
* \
|
669
|
+
* - \p i converted to half.
|
601
670
|
* \internal
|
602
671
|
* \exception-guarantee no-throw guarantee
|
603
672
|
* \behavior reentrant, thread safe
|
@@ -607,13 +676,13 @@ __CUDA_FP16_DECL__ __half __short2half_rd(const short int i);
|
|
607
676
|
/**
|
608
677
|
* \ingroup CUDA_MATH__HALF_MISC
|
609
678
|
* \brief Convert a signed short integer to a half in round-up mode.
|
610
|
-
*
|
679
|
+
*
|
611
680
|
* \details Convert the signed short integer value \p i to a half-precision floating-point
|
612
681
|
* value in round-up mode.
|
613
|
-
* \param[in] i - short int. Is only being read.
|
614
|
-
*
|
682
|
+
* \param[in] i - short int. Is only being read.
|
683
|
+
*
|
615
684
|
* \returns half
|
616
|
-
* \
|
685
|
+
* - \p i converted to half.
|
617
686
|
* \internal
|
618
687
|
* \exception-guarantee no-throw guarantee
|
619
688
|
* \behavior reentrant, thread safe
|
@@ -624,13 +693,13 @@ __CUDA_FP16_DECL__ __half __short2half_ru(const short int i);
|
|
624
693
|
/**
|
625
694
|
* \ingroup CUDA_MATH__HALF_MISC
|
626
695
|
* \brief Convert a half to an unsigned integer in round-to-nearest-even mode.
|
627
|
-
*
|
696
|
+
*
|
628
697
|
* \details Convert the half-precision floating-point value \p h to an unsigned integer
|
629
|
-
* in round-to-nearest-even mode.
|
630
|
-
* \param[in] h - half. Is only being read.
|
631
|
-
*
|
698
|
+
* in round-to-nearest-even mode. NaN inputs are converted to 0.
|
699
|
+
* \param[in] h - half. Is only being read.
|
700
|
+
*
|
632
701
|
* \returns unsigned int
|
633
|
-
* \
|
702
|
+
* - \p h converted to an unsigned integer.
|
634
703
|
* \internal
|
635
704
|
* \exception-guarantee no-throw guarantee
|
636
705
|
* \behavior reentrant, thread safe
|
@@ -639,30 +708,14 @@ __CUDA_FP16_DECL__ __half __short2half_ru(const short int i);
|
|
639
708
|
__CUDA_FP16_DECL__ unsigned int __half2uint_rn(const __half h);
|
640
709
|
/**
|
641
710
|
* \ingroup CUDA_MATH__HALF_MISC
|
642
|
-
* \brief Convert a half to an unsigned integer in round-towards-zero mode.
|
643
|
-
*
|
644
|
-
* \details Convert the half-precision floating-point value \p h to an unsigned integer
|
645
|
-
* in round-towards-zero mode.
|
646
|
-
* \param[in] h - half. Is only being read.
|
647
|
-
*
|
648
|
-
* \returns unsigned int
|
649
|
-
* \retval h converted to an unsigned integer.
|
650
|
-
* \internal
|
651
|
-
* \exception-guarantee no-throw guarantee
|
652
|
-
* \behavior reentrant, thread safe
|
653
|
-
* \endinternal
|
654
|
-
*/
|
655
|
-
__CUDA_HOSTDEVICE_FP16_DECL__ unsigned int __half2uint_rz(const __half h);
|
656
|
-
/**
|
657
|
-
* \ingroup CUDA_MATH__HALF_MISC
|
658
711
|
* \brief Convert a half to an unsigned integer in round-down mode.
|
659
712
|
*
|
660
713
|
* \details Convert the half-precision floating-point value \p h to an unsigned integer
|
661
|
-
* in round-down mode.
|
662
|
-
* \param[in] h - half. Is only being read.
|
714
|
+
* in round-down mode. NaN inputs are converted to 0.
|
715
|
+
* \param[in] h - half. Is only being read.
|
663
716
|
*
|
664
717
|
* \returns unsigned int
|
665
|
-
* \
|
718
|
+
* - \p h converted to an unsigned integer.
|
666
719
|
* \internal
|
667
720
|
* \exception-guarantee no-throw guarantee
|
668
721
|
* \behavior reentrant, thread safe
|
@@ -674,11 +727,11 @@ __CUDA_FP16_DECL__ unsigned int __half2uint_rd(const __half h);
|
|
674
727
|
* \brief Convert a half to an unsigned integer in round-up mode.
|
675
728
|
*
|
676
729
|
* \details Convert the half-precision floating-point value \p h to an unsigned integer
|
677
|
-
* in round-up mode.
|
678
|
-
* \param[in] h - half. Is only being read.
|
730
|
+
* in round-up mode. NaN inputs are converted to 0.
|
731
|
+
* \param[in] h - half. Is only being read.
|
679
732
|
*
|
680
733
|
* \returns unsigned int
|
681
|
-
* \
|
734
|
+
* - \p h converted to an unsigned integer.
|
682
735
|
* \internal
|
683
736
|
* \exception-guarantee no-throw guarantee
|
684
737
|
* \behavior reentrant, thread safe
|
@@ -689,13 +742,13 @@ __CUDA_FP16_DECL__ unsigned int __half2uint_ru(const __half h);
|
|
689
742
|
/**
|
690
743
|
* \ingroup CUDA_MATH__HALF_MISC
|
691
744
|
* \brief Convert an unsigned integer to a half in round-to-nearest-even mode.
|
692
|
-
*
|
745
|
+
*
|
693
746
|
* \details Convert the unsigned integer value \p i to a half-precision floating-point
|
694
747
|
* value in round-to-nearest-even mode.
|
695
|
-
* \param[in] i - unsigned int. Is only being read.
|
696
|
-
*
|
748
|
+
* \param[in] i - unsigned int. Is only being read.
|
749
|
+
*
|
697
750
|
* \returns half
|
698
|
-
* \
|
751
|
+
* - \p i converted to half.
|
699
752
|
* \internal
|
700
753
|
* \exception-guarantee no-throw guarantee
|
701
754
|
* \behavior reentrant, thread safe
|
@@ -705,13 +758,13 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __uint2half_rn(const unsigned int i);
|
|
705
758
|
/**
|
706
759
|
* \ingroup CUDA_MATH__HALF_MISC
|
707
760
|
* \brief Convert an unsigned integer to a half in round-towards-zero mode.
|
708
|
-
*
|
761
|
+
*
|
709
762
|
* \details Convert the unsigned integer value \p i to a half-precision floating-point
|
710
763
|
* value in round-towards-zero mode.
|
711
|
-
* \param[in] i - unsigned int. Is only being read.
|
712
|
-
*
|
764
|
+
* \param[in] i - unsigned int. Is only being read.
|
765
|
+
*
|
713
766
|
* \returns half
|
714
|
-
* \
|
767
|
+
* - \p i converted to half.
|
715
768
|
* \internal
|
716
769
|
* \exception-guarantee no-throw guarantee
|
717
770
|
* \behavior reentrant, thread safe
|
@@ -721,13 +774,13 @@ __CUDA_FP16_DECL__ __half __uint2half_rz(const unsigned int i);
|
|
721
774
|
/**
|
722
775
|
* \ingroup CUDA_MATH__HALF_MISC
|
723
776
|
* \brief Convert an unsigned integer to a half in round-down mode.
|
724
|
-
*
|
777
|
+
*
|
725
778
|
* \details Convert the unsigned integer value \p i to a half-precision floating-point
|
726
779
|
* value in round-down mode.
|
727
|
-
* \param[in] i - unsigned int. Is only being read.
|
728
|
-
*
|
780
|
+
* \param[in] i - unsigned int. Is only being read.
|
781
|
+
*
|
729
782
|
* \returns half
|
730
|
-
* \
|
783
|
+
* - \p i converted to half.
|
731
784
|
* \internal
|
732
785
|
* \exception-guarantee no-throw guarantee
|
733
786
|
* \behavior reentrant, thread safe
|
@@ -737,13 +790,13 @@ __CUDA_FP16_DECL__ __half __uint2half_rd(const unsigned int i);
|
|
737
790
|
/**
|
738
791
|
* \ingroup CUDA_MATH__HALF_MISC
|
739
792
|
* \brief Convert an unsigned integer to a half in round-up mode.
|
740
|
-
*
|
793
|
+
*
|
741
794
|
* \details Convert the unsigned integer value \p i to a half-precision floating-point
|
742
795
|
* value in round-up mode.
|
743
|
-
* \param[in] i - unsigned int. Is only being read.
|
744
|
-
*
|
796
|
+
* \param[in] i - unsigned int. Is only being read.
|
797
|
+
*
|
745
798
|
* \returns half
|
746
|
-
* \
|
799
|
+
* - \p i converted to half.
|
747
800
|
* \internal
|
748
801
|
* \exception-guarantee no-throw guarantee
|
749
802
|
* \behavior reentrant, thread safe
|
@@ -755,13 +808,13 @@ __CUDA_FP16_DECL__ __half __uint2half_ru(const unsigned int i);
|
|
755
808
|
* \ingroup CUDA_MATH__HALF_MISC
|
756
809
|
* \brief Convert a half to an unsigned short integer in round-to-nearest-even
|
757
810
|
* mode.
|
758
|
-
*
|
811
|
+
*
|
759
812
|
* \details Convert the half-precision floating-point value \p h to an unsigned short
|
760
|
-
* integer in round-to-nearest-even mode.
|
761
|
-
* \param[in] h - half. Is only being read.
|
762
|
-
*
|
813
|
+
* integer in round-to-nearest-even mode. NaN inputs are converted to 0.
|
814
|
+
* \param[in] h - half. Is only being read.
|
815
|
+
*
|
763
816
|
* \returns unsigned short int
|
764
|
-
* \
|
817
|
+
* - \p h converted to an unsigned short integer.
|
765
818
|
* \internal
|
766
819
|
* \exception-guarantee no-throw guarantee
|
767
820
|
* \behavior reentrant, thread safe
|
@@ -770,43 +823,26 @@ __CUDA_FP16_DECL__ __half __uint2half_ru(const unsigned int i);
|
|
770
823
|
__CUDA_FP16_DECL__ unsigned short int __half2ushort_rn(const __half h);
|
771
824
|
/**
|
772
825
|
* \ingroup CUDA_MATH__HALF_MISC
|
773
|
-
* \brief Convert a half to an unsigned short integer in round-towards-zero
|
774
|
-
* mode.
|
775
|
-
*
|
776
|
-
* \details Convert the half-precision floating-point value \p h to an unsigned short
|
777
|
-
* integer in round-towards-zero mode.
|
778
|
-
* \param[in] h - half. Is only being read.
|
779
|
-
*
|
780
|
-
* \returns unsigned short int
|
781
|
-
* \retval h converted to an unsigned short integer.
|
782
|
-
* \internal
|
783
|
-
* \exception-guarantee no-throw guarantee
|
784
|
-
* \behavior reentrant, thread safe
|
785
|
-
* \endinternal
|
786
|
-
*/
|
787
|
-
__CUDA_HOSTDEVICE_FP16_DECL__ unsigned short int __half2ushort_rz(const __half h);
|
788
|
-
/**
|
789
|
-
* \ingroup CUDA_MATH__HALF_MISC
|
790
826
|
* \brief Convert a half to an unsigned short integer in round-down mode.
|
791
|
-
*
|
827
|
+
*
|
792
828
|
* \details Convert the half-precision floating-point value \p h to an unsigned short
|
793
|
-
* integer in round-down mode.
|
794
|
-
* \param[in] h - half. Is only being read.
|
795
|
-
*
|
829
|
+
* integer in round-down mode. NaN inputs are converted to 0.
|
830
|
+
* \param[in] h - half. Is only being read.
|
831
|
+
*
|
796
832
|
* \returns unsigned short int
|
797
|
-
* \
|
833
|
+
* - \p h converted to an unsigned short integer.
|
798
834
|
*/
|
799
835
|
__CUDA_FP16_DECL__ unsigned short int __half2ushort_rd(const __half h);
|
800
836
|
/**
|
801
837
|
* \ingroup CUDA_MATH__HALF_MISC
|
802
838
|
* \brief Convert a half to an unsigned short integer in round-up mode.
|
803
|
-
*
|
839
|
+
*
|
804
840
|
* \details Convert the half-precision floating-point value \p h to an unsigned short
|
805
|
-
* integer in round-up mode.
|
806
|
-
* \param[in] h - half. Is only being read.
|
807
|
-
*
|
841
|
+
* integer in round-up mode. NaN inputs are converted to 0.
|
842
|
+
* \param[in] h - half. Is only being read.
|
843
|
+
*
|
808
844
|
* \returns unsigned short int
|
809
|
-
* \
|
845
|
+
* - \p h converted to an unsigned short integer.
|
810
846
|
*/
|
811
847
|
__CUDA_FP16_DECL__ unsigned short int __half2ushort_ru(const __half h);
|
812
848
|
|
@@ -814,13 +850,13 @@ __CUDA_FP16_DECL__ unsigned short int __half2ushort_ru(const __half h);
|
|
814
850
|
* \ingroup CUDA_MATH__HALF_MISC
|
815
851
|
* \brief Convert an unsigned short integer to a half in round-to-nearest-even
|
816
852
|
* mode.
|
817
|
-
*
|
853
|
+
*
|
818
854
|
* \details Convert the unsigned short integer value \p i to a half-precision floating-point
|
819
855
|
* value in round-to-nearest-even mode.
|
820
|
-
* \param[in] i - unsigned short int. Is only being read.
|
821
|
-
*
|
856
|
+
* \param[in] i - unsigned short int. Is only being read.
|
857
|
+
*
|
822
858
|
* \returns half
|
823
|
-
* \
|
859
|
+
* - \p i converted to half.
|
824
860
|
* \internal
|
825
861
|
* \exception-guarantee no-throw guarantee
|
826
862
|
* \behavior reentrant, thread safe
|
@@ -831,13 +867,13 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __ushort2half_rn(const unsigned short int i
|
|
831
867
|
* \ingroup CUDA_MATH__HALF_MISC
|
832
868
|
* \brief Convert an unsigned short integer to a half in round-towards-zero
|
833
869
|
* mode.
|
834
|
-
*
|
870
|
+
*
|
835
871
|
* \details Convert the unsigned short integer value \p i to a half-precision floating-point
|
836
872
|
* value in round-towards-zero mode.
|
837
|
-
* \param[in] i - unsigned short int. Is only being read.
|
838
|
-
*
|
873
|
+
* \param[in] i - unsigned short int. Is only being read.
|
874
|
+
*
|
839
875
|
* \returns half
|
840
|
-
* \
|
876
|
+
* - \p i converted to half.
|
841
877
|
* \internal
|
842
878
|
* \exception-guarantee no-throw guarantee
|
843
879
|
* \behavior reentrant, thread safe
|
@@ -847,13 +883,13 @@ __CUDA_FP16_DECL__ __half __ushort2half_rz(const unsigned short int i);
|
|
847
883
|
/**
|
848
884
|
* \ingroup CUDA_MATH__HALF_MISC
|
849
885
|
* \brief Convert an unsigned short integer to a half in round-down mode.
|
850
|
-
*
|
886
|
+
*
|
851
887
|
* \details Convert the unsigned short integer value \p i to a half-precision floating-point
|
852
888
|
* value in round-down mode.
|
853
|
-
* \param[in] i - unsigned short int. Is only being read.
|
854
|
-
*
|
889
|
+
* \param[in] i - unsigned short int. Is only being read.
|
890
|
+
*
|
855
891
|
* \returns half
|
856
|
-
* \
|
892
|
+
* - \p i converted to half.
|
857
893
|
* \internal
|
858
894
|
* \exception-guarantee no-throw guarantee
|
859
895
|
* \behavior reentrant, thread safe
|
@@ -863,13 +899,13 @@ __CUDA_FP16_DECL__ __half __ushort2half_rd(const unsigned short int i);
|
|
863
899
|
/**
|
864
900
|
* \ingroup CUDA_MATH__HALF_MISC
|
865
901
|
* \brief Convert an unsigned short integer to a half in round-up mode.
|
866
|
-
*
|
902
|
+
*
|
867
903
|
* \details Convert the unsigned short integer value \p i to a half-precision floating-point
|
868
904
|
* value in round-up mode.
|
869
|
-
* \param[in] i - unsigned short int. Is only being read.
|
870
|
-
*
|
905
|
+
* \param[in] i - unsigned short int. Is only being read.
|
906
|
+
*
|
871
907
|
* \returns half
|
872
|
-
* \
|
908
|
+
* - \p i converted to half.
|
873
909
|
* \internal
|
874
910
|
* \exception-guarantee no-throw guarantee
|
875
911
|
* \behavior reentrant, thread safe
|
@@ -881,13 +917,13 @@ __CUDA_FP16_DECL__ __half __ushort2half_ru(const unsigned short int i);
|
|
881
917
|
* \ingroup CUDA_MATH__HALF_MISC
|
882
918
|
* \brief Convert a half to an unsigned 64-bit integer in round-to-nearest-even
|
883
919
|
* mode.
|
884
|
-
*
|
920
|
+
*
|
885
921
|
* \details Convert the half-precision floating-point value \p h to an unsigned 64-bit
|
886
|
-
* integer in round-to-nearest-even mode.
|
887
|
-
* \param[in] h - half. Is only being read.
|
888
|
-
*
|
922
|
+
* integer in round-to-nearest-even mode. NaN inputs return 0x8000000000000000.
|
923
|
+
* \param[in] h - half. Is only being read.
|
924
|
+
*
|
889
925
|
* \returns unsigned long long int
|
890
|
-
* \
|
926
|
+
* - \p h converted to an unsigned 64-bit integer.
|
891
927
|
* \internal
|
892
928
|
* \exception-guarantee no-throw guarantee
|
893
929
|
* \behavior reentrant, thread safe
|
@@ -896,31 +932,14 @@ __CUDA_FP16_DECL__ __half __ushort2half_ru(const unsigned short int i);
|
|
896
932
|
__CUDA_FP16_DECL__ unsigned long long int __half2ull_rn(const __half h);
|
897
933
|
/**
|
898
934
|
* \ingroup CUDA_MATH__HALF_MISC
|
899
|
-
* \brief Convert a half to an unsigned 64-bit integer in round-towards-zero
|
900
|
-
* mode.
|
901
|
-
*
|
902
|
-
* \details Convert the half-precision floating-point value \p h to an unsigned 64-bit
|
903
|
-
* integer in round-towards-zero mode.
|
904
|
-
* \param[in] h - half. Is only being read.
|
905
|
-
*
|
906
|
-
* \returns unsigned long long int
|
907
|
-
* \retval h converted to an unsigned 64-bit integer.
|
908
|
-
* \internal
|
909
|
-
* \exception-guarantee no-throw guarantee
|
910
|
-
* \behavior reentrant, thread safe
|
911
|
-
* \endinternal
|
912
|
-
*/
|
913
|
-
__CUDA_HOSTDEVICE_FP16_DECL__ unsigned long long int __half2ull_rz(const __half h);
|
914
|
-
/**
|
915
|
-
* \ingroup CUDA_MATH__HALF_MISC
|
916
935
|
* \brief Convert a half to an unsigned 64-bit integer in round-down mode.
|
917
|
-
*
|
936
|
+
*
|
918
937
|
* \details Convert the half-precision floating-point value \p h to an unsigned 64-bit
|
919
|
-
* integer in round-down mode.
|
920
|
-
* \param[in] h - half. Is only being read.
|
921
|
-
*
|
938
|
+
* integer in round-down mode. NaN inputs return 0x8000000000000000.
|
939
|
+
* \param[in] h - half. Is only being read.
|
940
|
+
*
|
922
941
|
* \returns unsigned long long int
|
923
|
-
* \
|
942
|
+
* - \p h converted to an unsigned 64-bit integer.
|
924
943
|
* \internal
|
925
944
|
* \exception-guarantee no-throw guarantee
|
926
945
|
* \behavior reentrant, thread safe
|
@@ -930,13 +949,13 @@ __CUDA_FP16_DECL__ unsigned long long int __half2ull_rd(const __half h);
|
|
930
949
|
/**
|
931
950
|
* \ingroup CUDA_MATH__HALF_MISC
|
932
951
|
* \brief Convert a half to an unsigned 64-bit integer in round-up mode.
|
933
|
-
*
|
952
|
+
*
|
934
953
|
* \details Convert the half-precision floating-point value \p h to an unsigned 64-bit
|
935
|
-
* integer in round-up mode.
|
936
|
-
* \param[in] h - half. Is only being read.
|
937
|
-
*
|
954
|
+
* integer in round-up mode. NaN inputs return 0x8000000000000000.
|
955
|
+
* \param[in] h - half. Is only being read.
|
956
|
+
*
|
938
957
|
* \returns unsigned long long int
|
939
|
-
* \
|
958
|
+
* - \p h converted to an unsigned 64-bit integer.
|
940
959
|
* \internal
|
941
960
|
* \exception-guarantee no-throw guarantee
|
942
961
|
* \behavior reentrant, thread safe
|
@@ -948,13 +967,13 @@ __CUDA_FP16_DECL__ unsigned long long int __half2ull_ru(const __half h);
|
|
948
967
|
* \ingroup CUDA_MATH__HALF_MISC
|
949
968
|
* \brief Convert an unsigned 64-bit integer to a half in round-to-nearest-even
|
950
969
|
* mode.
|
951
|
-
*
|
970
|
+
*
|
952
971
|
* \details Convert the unsigned 64-bit integer value \p i to a half-precision floating-point
|
953
972
|
* value in round-to-nearest-even mode.
|
954
|
-
* \param[in] i - unsigned long long int. Is only being read.
|
955
|
-
*
|
973
|
+
* \param[in] i - unsigned long long int. Is only being read.
|
974
|
+
*
|
956
975
|
* \returns half
|
957
|
-
* \
|
976
|
+
* - \p i converted to half.
|
958
977
|
* \internal
|
959
978
|
* \exception-guarantee no-throw guarantee
|
960
979
|
* \behavior reentrant, thread safe
|
@@ -965,13 +984,13 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __ull2half_rn(const unsigned long long int
|
|
965
984
|
* \ingroup CUDA_MATH__HALF_MISC
|
966
985
|
* \brief Convert an unsigned 64-bit integer to a half in round-towards-zero
|
967
986
|
* mode.
|
968
|
-
*
|
987
|
+
*
|
969
988
|
* \details Convert the unsigned 64-bit integer value \p i to a half-precision floating-point
|
970
989
|
* value in round-towards-zero mode.
|
971
|
-
* \param[in] i - unsigned long long int. Is only being read.
|
972
|
-
*
|
990
|
+
* \param[in] i - unsigned long long int. Is only being read.
|
991
|
+
*
|
973
992
|
* \returns half
|
974
|
-
* \
|
993
|
+
* - \p i converted to half.
|
975
994
|
* \internal
|
976
995
|
* \exception-guarantee no-throw guarantee
|
977
996
|
* \behavior reentrant, thread safe
|
@@ -981,13 +1000,13 @@ __CUDA_FP16_DECL__ __half __ull2half_rz(const unsigned long long int i);
|
|
981
1000
|
/**
|
982
1001
|
* \ingroup CUDA_MATH__HALF_MISC
|
983
1002
|
* \brief Convert an unsigned 64-bit integer to a half in round-down mode.
|
984
|
-
*
|
1003
|
+
*
|
985
1004
|
* \details Convert the unsigned 64-bit integer value \p i to a half-precision floating-point
|
986
1005
|
* value in round-down mode.
|
987
|
-
* \param[in] i - unsigned long long int. Is only being read.
|
988
|
-
*
|
1006
|
+
* \param[in] i - unsigned long long int. Is only being read.
|
1007
|
+
*
|
989
1008
|
* \returns half
|
990
|
-
* \
|
1009
|
+
* - \p i converted to half.
|
991
1010
|
* \internal
|
992
1011
|
* \exception-guarantee no-throw guarantee
|
993
1012
|
* \behavior reentrant, thread safe
|
@@ -997,13 +1016,13 @@ __CUDA_FP16_DECL__ __half __ull2half_rd(const unsigned long long int i);
|
|
997
1016
|
/**
|
998
1017
|
* \ingroup CUDA_MATH__HALF_MISC
|
999
1018
|
* \brief Convert an unsigned 64-bit integer to a half in round-up mode.
|
1000
|
-
*
|
1019
|
+
*
|
1001
1020
|
* \details Convert the unsigned 64-bit integer value \p i to a half-precision floating-point
|
1002
1021
|
* value in round-up mode.
|
1003
|
-
* \param[in] i - unsigned long long int. Is only being read.
|
1004
|
-
*
|
1022
|
+
* \param[in] i - unsigned long long int. Is only being read.
|
1023
|
+
*
|
1005
1024
|
* \returns half
|
1006
|
-
* \
|
1025
|
+
* - \p i converted to half.
|
1007
1026
|
* \internal
|
1008
1027
|
* \exception-guarantee no-throw guarantee
|
1009
1028
|
* \behavior reentrant, thread safe
|
@@ -1015,13 +1034,13 @@ __CUDA_FP16_DECL__ __half __ull2half_ru(const unsigned long long int i);
|
|
1015
1034
|
* \ingroup CUDA_MATH__HALF_MISC
|
1016
1035
|
* \brief Convert a half to a signed 64-bit integer in round-to-nearest-even
|
1017
1036
|
* mode.
|
1018
|
-
*
|
1037
|
+
*
|
1019
1038
|
* \details Convert the half-precision floating-point value \p h to a signed 64-bit
|
1020
|
-
* integer in round-to-nearest-even mode.
|
1021
|
-
* \param[in] h - half. Is only being read.
|
1022
|
-
*
|
1039
|
+
* integer in round-to-nearest-even mode. NaN inputs return a long long int with hex value of 0x8000000000000000.
|
1040
|
+
* \param[in] h - half. Is only being read.
|
1041
|
+
*
|
1023
1042
|
* \returns long long int
|
1024
|
-
* \
|
1043
|
+
* - \p h converted to a signed 64-bit integer.
|
1025
1044
|
* \internal
|
1026
1045
|
* \exception-guarantee no-throw guarantee
|
1027
1046
|
* \behavior reentrant, thread safe
|
@@ -1030,30 +1049,14 @@ __CUDA_FP16_DECL__ __half __ull2half_ru(const unsigned long long int i);
|
|
1030
1049
|
__CUDA_FP16_DECL__ long long int __half2ll_rn(const __half h);
|
1031
1050
|
/**
|
1032
1051
|
* \ingroup CUDA_MATH__HALF_MISC
|
1033
|
-
* \brief Convert a half to a signed 64-bit integer in round-towards-zero mode.
|
1034
|
-
*
|
1035
|
-
* \details Convert the half-precision floating-point value \p h to a signed 64-bit
|
1036
|
-
* integer in round-towards-zero mode.
|
1037
|
-
* \param[in] h - half. Is only being read.
|
1038
|
-
*
|
1039
|
-
* \returns long long int
|
1040
|
-
* \retval h converted to a signed 64-bit integer.
|
1041
|
-
* \internal
|
1042
|
-
* \exception-guarantee no-throw guarantee
|
1043
|
-
* \behavior reentrant, thread safe
|
1044
|
-
* \endinternal
|
1045
|
-
*/
|
1046
|
-
__CUDA_HOSTDEVICE_FP16_DECL__ long long int __half2ll_rz(const __half h);
|
1047
|
-
/**
|
1048
|
-
* \ingroup CUDA_MATH__HALF_MISC
|
1049
1052
|
* \brief Convert a half to a signed 64-bit integer in round-down mode.
|
1050
|
-
*
|
1053
|
+
*
|
1051
1054
|
* \details Convert the half-precision floating-point value \p h to a signed 64-bit
|
1052
|
-
* integer in round-down mode.
|
1053
|
-
* \param[in] h - half. Is only being read.
|
1054
|
-
*
|
1055
|
+
* integer in round-down mode. NaN inputs return a long long int with hex value of 0x8000000000000000.
|
1056
|
+
* \param[in] h - half. Is only being read.
|
1057
|
+
*
|
1055
1058
|
* \returns long long int
|
1056
|
-
* \
|
1059
|
+
* - \p h converted to a signed 64-bit integer.
|
1057
1060
|
* \internal
|
1058
1061
|
* \exception-guarantee no-throw guarantee
|
1059
1062
|
* \behavior reentrant, thread safe
|
@@ -1063,13 +1066,13 @@ __CUDA_FP16_DECL__ long long int __half2ll_rd(const __half h);
|
|
1063
1066
|
/**
|
1064
1067
|
* \ingroup CUDA_MATH__HALF_MISC
|
1065
1068
|
* \brief Convert a half to a signed 64-bit integer in round-up mode.
|
1066
|
-
*
|
1069
|
+
*
|
1067
1070
|
* \details Convert the half-precision floating-point value \p h to a signed 64-bit
|
1068
|
-
* integer in round-up mode.
|
1069
|
-
* \param[in] h - half. Is only being read.
|
1070
|
-
*
|
1071
|
+
* integer in round-up mode. NaN inputs return a long long int with hex value of 0x8000000000000000.
|
1072
|
+
* \param[in] h - half. Is only being read.
|
1073
|
+
*
|
1071
1074
|
* \returns long long int
|
1072
|
-
* \
|
1075
|
+
* - \p h converted to a signed 64-bit integer.
|
1073
1076
|
* \internal
|
1074
1077
|
* \exception-guarantee no-throw guarantee
|
1075
1078
|
* \behavior reentrant, thread safe
|
@@ -1081,13 +1084,13 @@ __CUDA_FP16_DECL__ long long int __half2ll_ru(const __half h);
|
|
1081
1084
|
* \ingroup CUDA_MATH__HALF_MISC
|
1082
1085
|
* \brief Convert a signed 64-bit integer to a half in round-to-nearest-even
|
1083
1086
|
* mode.
|
1084
|
-
*
|
1087
|
+
*
|
1085
1088
|
* \details Convert the signed 64-bit integer value \p i to a half-precision floating-point
|
1086
1089
|
* value in round-to-nearest-even mode.
|
1087
|
-
* \param[in] i - long long int. Is only being read.
|
1088
|
-
*
|
1090
|
+
* \param[in] i - long long int. Is only being read.
|
1091
|
+
*
|
1089
1092
|
* \returns half
|
1090
|
-
* \
|
1093
|
+
* - \p i converted to half.
|
1091
1094
|
* \internal
|
1092
1095
|
* \exception-guarantee no-throw guarantee
|
1093
1096
|
* \behavior reentrant, thread safe
|
@@ -1097,25 +1100,25 @@ __CUDA_HOSTDEVICE_FP16_DECL__ __half __ll2half_rn(const long long int i);
|
|
1097
1100
|
/**
|
1098
1101
|
* \ingroup CUDA_MATH__HALF_MISC
|
1099
1102
|
* \brief Convert a signed 64-bit integer to a half in round-towards-zero mode.
|
1100
|
-
*
|
1103
|
+
*
|
1101
1104
|
* \details Convert the signed 64-bit integer value \p i to a half-precision floating-point
|
1102
1105
|
* value in round-towards-zero mode.
|
1103
|
-
* \param[in] i - long long int. Is only being read.
|
1104
|
-
*
|
1106
|
+
* \param[in] i - long long int. Is only being read.
|
1107
|
+
*
|
1105
1108
|
* \returns half
|
1106
|
-
* \
|
1109
|
+
* - \p i converted to half.
|
1107
1110
|
*/
|
1108
1111
|
__CUDA_FP16_DECL__ __half __ll2half_rz(const long long int i);
|
1109
1112
|
/**
|
1110
1113
|
* \ingroup CUDA_MATH__HALF_MISC
|
1111
1114
|
* \brief Convert a signed 64-bit integer to a half in round-down mode.
|
1112
|
-
*
|
1115
|
+
*
|
1113
1116
|
* \details Convert the signed 64-bit integer value \p i to a half-precision floating-point
|
1114
1117
|
* value in round-down mode.
|
1115
|
-
* \param[in] i - long long int. Is only being read.
|
1116
|
-
*
|
1118
|
+
* \param[in] i - long long int. Is only being read.
|
1119
|
+
*
|
1117
1120
|
* \returns half
|
1118
|
-
* \
|
1121
|
+
* - \p i converted to half.
|
1119
1122
|
* \internal
|
1120
1123
|
* \exception-guarantee no-throw guarantee
|
1121
1124
|
* \behavior reentrant, thread safe
|
@@ -1125,13 +1128,13 @@ __CUDA_FP16_DECL__ __half __ll2half_rd(const long long int i);
|
|
1125
1128
|
/**
|
1126
1129
|
* \ingroup CUDA_MATH__HALF_MISC
|
1127
1130
|
* \brief Convert a signed 64-bit integer to a half in round-up mode.
|
1128
|
-
*
|
1131
|
+
*
|
1129
1132
|
* \details Convert the signed 64-bit integer value \p i to a half-precision floating-point
|
1130
1133
|
* value in round-up mode.
|
1131
|
-
* \param[in] i - long long int. Is only being read.
|
1132
|
-
*
|
1134
|
+
* \param[in] i - long long int. Is only being read.
|
1135
|
+
*
|
1133
1136
|
* \returns half
|
1134
|
-
* \
|
1137
|
+
* - \p i converted to half.
|
1135
1138
|
* \internal
|
1136
1139
|
* \exception-guarantee no-throw guarantee
|
1137
1140
|
* \behavior reentrant, thread safe
|
@@ -1142,13 +1145,13 @@ __CUDA_FP16_DECL__ __half __ll2half_ru(const long long int i);
|
|
1142
1145
|
/**
|
1143
1146
|
* \ingroup CUDA_MATH__HALF_FUNCTIONS
|
1144
1147
|
* \brief Truncate input argument to the integral part.
|
1145
|
-
*
|
1148
|
+
*
|
1146
1149
|
* \details Round \p h to the nearest integer value that does not exceed \p h in
|
1147
1150
|
* magnitude.
|
1148
|
-
* \param[in] h - half. Is only being read.
|
1149
|
-
*
|
1151
|
+
* \param[in] h - half. Is only being read.
|
1152
|
+
*
|
1150
1153
|
* \returns half
|
1151
|
-
*
|
1154
|
+
* - The truncated integer value.
|
1152
1155
|
* \internal
|
1153
1156
|
* \exception-guarantee no-throw guarantee
|
1154
1157
|
* \behavior reentrant, thread safe
|
@@ -1158,12 +1161,12 @@ __CUDA_FP16_DECL__ __half htrunc(const __half h);
|
|
1158
1161
|
/**
|
1159
1162
|
* \ingroup CUDA_MATH__HALF_FUNCTIONS
|
1160
1163
|
* \brief Calculate ceiling of the input argument.
|
1161
|
-
*
|
1164
|
+
*
|
1162
1165
|
* \details Compute the smallest integer value not less than \p h.
|
1163
|
-
* \param[in] h - half. Is only being read.
|
1164
|
-
*
|
1166
|
+
* \param[in] h - half. Is only being read.
|
1167
|
+
*
|
1165
1168
|
* \returns half
|
1166
|
-
*
|
1169
|
+
* - The smallest integer value not less than \p h.
|
1167
1170
|
* \internal
|
1168
1171
|
* \exception-guarantee no-throw guarantee
|
1169
1172
|
* \behavior reentrant, thread safe
|
@@ -1173,12 +1176,12 @@ __CUDA_FP16_DECL__ __half hceil(const __half h);
|
|
1173
1176
|
/**
|
1174
1177
|
* \ingroup CUDA_MATH__HALF_FUNCTIONS
|
1175
1178
|
* \brief Calculate the largest integer less than or equal to \p h.
|
1176
|
-
*
|
1179
|
+
*
|
1177
1180
|
* \details Calculate the largest integer value which is less than or equal to \p h.
|
1178
|
-
* \param[in] h - half. Is only being read.
|
1179
|
-
*
|
1181
|
+
* \param[in] h - half. Is only being read.
|
1182
|
+
*
|
1180
1183
|
* \returns half
|
1181
|
-
*
|
1184
|
+
* - The largest integer value which is less than or equal to \p h.
|
1182
1185
|
* \internal
|
1183
1186
|
* \exception-guarantee no-throw guarantee
|
1184
1187
|
* \behavior reentrant, thread safe
|
@@ -1189,13 +1192,13 @@ __CUDA_FP16_DECL__ __half hfloor(const __half h);
|
|
1189
1192
|
* \ingroup CUDA_MATH__HALF_FUNCTIONS
|
1190
1193
|
* \brief Round input to nearest integer value in half-precision floating-point
|
1191
1194
|
* number.
|
1192
|
-
*
|
1195
|
+
*
|
1193
1196
|
* \details Round \p h to the nearest integer value in half-precision floating-point
|
1194
1197
|
* format, with halfway cases rounded to the nearest even integer value.
|
1195
|
-
* \param[in] h - half. Is only being read.
|
1196
|
-
*
|
1198
|
+
* \param[in] h - half. Is only being read.
|
1199
|
+
*
|
1197
1200
|
* \returns half
|
1198
|
-
*
|
1201
|
+
* - The nearest integer to \p h.
|
1199
1202
|
* \internal
|
1200
1203
|
* \exception-guarantee no-throw guarantee
|
1201
1204
|
* \behavior reentrant, thread safe
|
@@ -1206,13 +1209,13 @@ __CUDA_FP16_DECL__ __half hrint(const __half h);
|
|
1206
1209
|
/**
|
1207
1210
|
* \ingroup CUDA_MATH__HALF2_FUNCTIONS
|
1208
1211
|
* \brief Truncate \p half2 vector input argument to the integral part.
|
1209
|
-
*
|
1212
|
+
*
|
1210
1213
|
* \details Round each component of vector \p h to the nearest integer value that does
|
1211
1214
|
* not exceed \p h in magnitude.
|
1212
|
-
* \param[in] h - half2. Is only being read.
|
1213
|
-
*
|
1215
|
+
* \param[in] h - half2. Is only being read.
|
1216
|
+
*
|
1214
1217
|
* \returns half2
|
1215
|
-
*
|
1218
|
+
* - The truncated \p h.
|
1216
1219
|
* \internal
|
1217
1220
|
* \exception-guarantee no-throw guarantee
|
1218
1221
|
* \behavior reentrant, thread safe
|
@@ -1222,13 +1225,13 @@ __CUDA_FP16_DECL__ __half2 h2trunc(const __half2 h);
|
|
1222
1225
|
/**
|
1223
1226
|
* \ingroup CUDA_MATH__HALF2_FUNCTIONS
|
1224
1227
|
* \brief Calculate \p half2 vector ceiling of the input argument.
|
1225
|
-
*
|
1228
|
+
*
|
1226
1229
|
* \details For each component of vector \p h compute the smallest integer value not less
|
1227
1230
|
* than \p h.
|
1228
|
-
* \param[in] h - half2. Is only being read.
|
1229
|
-
*
|
1231
|
+
* \param[in] h - half2. Is only being read.
|
1232
|
+
*
|
1230
1233
|
* \returns half2
|
1231
|
-
*
|
1234
|
+
* - The vector of smallest integers not less than \p h.
|
1232
1235
|
* \internal
|
1233
1236
|
* \exception-guarantee no-throw guarantee
|
1234
1237
|
* \behavior reentrant, thread safe
|
@@ -1238,13 +1241,13 @@ __CUDA_FP16_DECL__ __half2 h2ceil(const __half2 h);
|
|
1238
1241
|
/**
|
1239
1242
|
* \ingroup CUDA_MATH__HALF2_FUNCTIONS
|
1240
1243
|
* \brief Calculate the largest integer less than or equal to \p h.
|
1241
|
-
*
|
1244
|
+
*
|
1242
1245
|
* \details For each component of vector \p h calculate the largest integer value which
|
1243
1246
|
* is less than or equal to \p h.
|
1244
|
-
* \param[in] h - half2. Is only being read.
|
1245
|
-
*
|
1247
|
+
* \param[in] h - half2. Is only being read.
|
1248
|
+
*
|
1246
1249
|
* \returns half2
|
1247
|
-
*
|
1250
|
+
* - The vector of largest integers which is less than or equal to \p h.
|
1248
1251
|
* \internal
|
1249
1252
|
* \exception-guarantee no-throw guarantee
|
1250
1253
|
* \behavior reentrant, thread safe
|
@@ -1255,14 +1258,14 @@ __CUDA_FP16_DECL__ __half2 h2floor(const __half2 h);
|
|
1255
1258
|
* \ingroup CUDA_MATH__HALF2_FUNCTIONS
|
1256
1259
|
* \brief Round input to nearest integer value in half-precision floating-point
|
1257
1260
|
* number.
|
1258
|
-
*
|
1261
|
+
*
|
1259
1262
|
* \details Round each component of \p half2 vector \p h to the nearest integer value in
|
1260
1263
|
* half-precision floating-point format, with halfway cases rounded to the
|
1261
1264
|
* nearest even integer value.
|
1262
|
-
* \param[in] h - half2. Is only being read.
|
1263
|
-
*
|
1265
|
+
* \param[in] h - half2. Is only being read.
|
1266
|
+
*
|
1264
1267
|
* \returns half2
|
1265
|
-
*
|
1268
|
+
* - The vector of rounded integer values.
|
1266
1269
|
* \internal
|
1267
1270
|
* \exception-guarantee no-throw guarantee
|
1268
1271
|
* \behavior reentrant, thread safe
|
@@ -1273,13 +1276,13 @@ __CUDA_FP16_DECL__ __half2 h2rint(const __half2 h);
|
|
1273
1276
|
/**
|
1274
1277
|
* \ingroup CUDA_MATH__HALF_MISC
|
1275
1278
|
* \brief Returns \p half2 with both halves equal to the input value.
|
1276
|
-
*
|
1279
|
+
*
|
1277
1280
|
* \details Returns \p half2 number with both halves equal to the input \p a \p half
|
1278
1281
|
* number.
|
1279
|
-
* \param[in] a - half. Is only being read.
|
1280
|
-
*
|
1282
|
+
* \param[in] a - half. Is only being read.
|
1283
|
+
*
|
1281
1284
|
* \returns half2
|
1282
|
-
*
|
1285
|
+
* - The vector which has both its halves equal to the input \p a.
|
1283
1286
|
* \internal
|
1284
1287
|
* \exception-guarantee no-throw guarantee
|
1285
1288
|
* \behavior reentrant, thread safe
|
@@ -1289,13 +1292,13 @@ __CUDA_FP16_DECL__ __half2 __half2half2(const __half a);
|
|
1289
1292
|
/**
|
1290
1293
|
* \ingroup CUDA_MATH__HALF_MISC
|
1291
1294
|
* \brief Swaps both halves of the \p half2 input.
|
1292
|
-
*
|
1295
|
+
*
|
1293
1296
|
* \details Swaps both halves of the \p half2 input and returns a new \p half2 number
|
1294
1297
|
* with swapped halves.
|
1295
|
-
* \param[in] a - half2. Is only being read.
|
1296
|
-
*
|
1298
|
+
* \param[in] a - half2. Is only being read.
|
1299
|
+
*
|
1297
1300
|
* \returns half2
|
1298
|
-
* \
|
1301
|
+
* - \p a with its halves being swapped.
|
1299
1302
|
* \internal
|
1300
1303
|
* \exception-guarantee no-throw guarantee
|
1301
1304
|
* \behavior reentrant, thread safe
|
@@ -1305,17 +1308,17 @@ __CUDA_FP16_DECL__ __half2 __lowhigh2highlow(const __half2 a);
|
|
1305
1308
|
/**
|
1306
1309
|
* \ingroup CUDA_MATH__HALF_MISC
|
1307
1310
|
* \brief Extracts low 16 bits from each of the two \p half2 inputs and combines
|
1308
|
-
* into one \p half2 number.
|
1309
|
-
*
|
1311
|
+
* into one \p half2 number.
|
1312
|
+
*
|
1310
1313
|
* \details Extracts low 16 bits from each of the two \p half2 inputs and combines into
|
1311
1314
|
* one \p half2 number. Low 16 bits from input \p a is stored in low 16 bits of
|
1312
1315
|
* the return value, low 16 bits from input \p b is stored in high 16 bits of
|
1313
|
-
* the return value.
|
1314
|
-
* \param[in] a - half2. Is only being read.
|
1315
|
-
* \param[in] b - half2. Is only being read.
|
1316
|
-
*
|
1316
|
+
* the return value.
|
1317
|
+
* \param[in] a - half2. Is only being read.
|
1318
|
+
* \param[in] b - half2. Is only being read.
|
1319
|
+
*
|
1317
1320
|
* \returns half2
|
1318
|
-
*
|
1321
|
+
* - The low 16 bits of \p a and of \p b.
|
1319
1322
|
* \internal
|
1320
1323
|
* \exception-guarantee no-throw guarantee
|
1321
1324
|
* \behavior reentrant, thread safe
|
@@ -1326,16 +1329,16 @@ __CUDA_FP16_DECL__ __half2 __lows2half2(const __half2 a, const __half2 b);
|
|
1326
1329
|
* \ingroup CUDA_MATH__HALF_MISC
|
1327
1330
|
* \brief Extracts high 16 bits from each of the two \p half2 inputs and
|
1328
1331
|
* combines into one \p half2 number.
|
1329
|
-
*
|
1332
|
+
*
|
1330
1333
|
* \details Extracts high 16 bits from each of the two \p half2 inputs and combines into
|
1331
1334
|
* one \p half2 number. High 16 bits from input \p a is stored in low 16 bits of
|
1332
1335
|
* the return value, high 16 bits from input \p b is stored in high 16 bits of
|
1333
1336
|
* the return value.
|
1334
|
-
* \param[in] a - half2. Is only being read.
|
1335
|
-
* \param[in] b - half2. Is only being read.
|
1336
|
-
*
|
1337
|
+
* \param[in] a - half2. Is only being read.
|
1338
|
+
* \param[in] b - half2. Is only being read.
|
1339
|
+
*
|
1337
1340
|
* \returns half2
|
1338
|
-
*
|
1341
|
+
* - The high 16 bits of \p a and of \p b.
|
1339
1342
|
* \internal
|
1340
1343
|
* \exception-guarantee no-throw guarantee
|
1341
1344
|
* \behavior reentrant, thread safe
|
@@ -1347,10 +1350,10 @@ __CUDA_FP16_DECL__ __half2 __highs2half2(const __half2 a, const __half2 b);
|
|
1347
1350
|
* \brief Returns high 16 bits of \p half2 input.
|
1348
1351
|
*
|
1349
1352
|
* \details Returns high 16 bits of \p half2 input \p a.
|
1350
|
-
* \param[in] a - half2. Is only being read.
|
1353
|
+
* \param[in] a - half2. Is only being read.
|
1351
1354
|
*
|
1352
1355
|
* \returns half
|
1353
|
-
*
|
1356
|
+
* - The high 16 bits of the input.
|
1354
1357
|
* \internal
|
1355
1358
|
* \exception-guarantee no-throw guarantee
|
1356
1359
|
* \behavior reentrant, thread safe
|
@@ -1362,10 +1365,10 @@ __CUDA_FP16_DECL__ __half __high2half(const __half2 a);
|
|
1362
1365
|
* \brief Returns low 16 bits of \p half2 input.
|
1363
1366
|
*
|
1364
1367
|
* \details Returns low 16 bits of \p half2 input \p a.
|
1365
|
-
* \param[in] a - half2. Is only being read.
|
1368
|
+
* \param[in] a - half2. Is only being read.
|
1366
1369
|
*
|
1367
1370
|
* \returns half
|
1368
|
-
*
|
1371
|
+
* - Returns \p half which contains low 16 bits of the input \p a.
|
1369
1372
|
* \internal
|
1370
1373
|
* \exception-guarantee no-throw guarantee
|
1371
1374
|
* \behavior reentrant, thread safe
|
@@ -1375,14 +1378,14 @@ __CUDA_FP16_DECL__ __half __low2half(const __half2 a);
|
|
1375
1378
|
/**
|
1376
1379
|
* \ingroup CUDA_MATH__HALF_COMPARISON
|
1377
1380
|
* \brief Checks if the input \p half number is infinite.
|
1378
|
-
*
|
1379
|
-
* \details Checks if the input \p half number \p a is infinite.
|
1380
|
-
* \param[in] a - half. Is only being read.
|
1381
|
-
*
|
1382
|
-
* \returns int
|
1383
|
-
*
|
1384
|
-
*
|
1385
|
-
*
|
1381
|
+
*
|
1382
|
+
* \details Checks if the input \p half number \p a is infinite.
|
1383
|
+
* \param[in] a - half. Is only being read.
|
1384
|
+
*
|
1385
|
+
* \returns int
|
1386
|
+
* - -1 iff \p a is equal to negative infinity,
|
1387
|
+
* - 1 iff \p a is equal to positive infinity,
|
1388
|
+
* - 0 otherwise.
|
1386
1389
|
* \internal
|
1387
1390
|
* \exception-guarantee no-throw guarantee
|
1388
1391
|
* \behavior reentrant, thread safe
|
@@ -1392,15 +1395,15 @@ __CUDA_FP16_DECL__ int __hisinf(const __half a);
|
|
1392
1395
|
/**
|
1393
1396
|
* \ingroup CUDA_MATH__HALF_MISC
|
1394
1397
|
* \brief Combines two \p half numbers into one \p half2 number.
|
1395
|
-
*
|
1398
|
+
*
|
1396
1399
|
* \details Combines two input \p half number \p a and \p b into one \p half2 number.
|
1397
1400
|
* Input \p a is stored in low 16 bits of the return value, input \p b is stored
|
1398
1401
|
* in high 16 bits of the return value.
|
1399
|
-
* \param[in] a - half. Is only being read.
|
1400
|
-
* \param[in] b - half. Is only being read.
|
1401
|
-
*
|
1402
|
+
* \param[in] a - half. Is only being read.
|
1403
|
+
* \param[in] b - half. Is only being read.
|
1404
|
+
*
|
1402
1405
|
* \returns half2
|
1403
|
-
*
|
1406
|
+
* - The half2 with one half equal to \p a and the other to \p b.
|
1404
1407
|
* \internal
|
1405
1408
|
* \exception-guarantee no-throw guarantee
|
1406
1409
|
* \behavior reentrant, thread safe
|
@@ -1410,13 +1413,13 @@ __CUDA_FP16_DECL__ __half2 __halves2half2(const __half a, const __half b);
|
|
1410
1413
|
/**
|
1411
1414
|
* \ingroup CUDA_MATH__HALF_MISC
|
1412
1415
|
* \brief Extracts low 16 bits from \p half2 input.
|
1413
|
-
*
|
1416
|
+
*
|
1414
1417
|
* \details Extracts low 16 bits from \p half2 input \p a and returns a new \p half2
|
1415
1418
|
* number which has both halves equal to the extracted bits.
|
1416
|
-
* \param[in] a - half2. Is only being read.
|
1417
|
-
*
|
1419
|
+
* \param[in] a - half2. Is only being read.
|
1420
|
+
*
|
1418
1421
|
* \returns half2
|
1419
|
-
*
|
1422
|
+
* - The half2 with both halves equal to the low 16 bits of the input.
|
1420
1423
|
* \internal
|
1421
1424
|
* \exception-guarantee no-throw guarantee
|
1422
1425
|
* \behavior reentrant, thread safe
|
@@ -1426,13 +1429,13 @@ __CUDA_FP16_DECL__ __half2 __low2half2(const __half2 a);
|
|
1426
1429
|
/**
|
1427
1430
|
* \ingroup CUDA_MATH__HALF_MISC
|
1428
1431
|
* \brief Extracts high 16 bits from \p half2 input.
|
1429
|
-
*
|
1432
|
+
*
|
1430
1433
|
* \details Extracts high 16 bits from \p half2 input \p a and returns a new \p half2
|
1431
1434
|
* number which has both halves equal to the extracted bits.
|
1432
|
-
* \param[in] a - half2. Is only being read.
|
1433
|
-
*
|
1435
|
+
* \param[in] a - half2. Is only being read.
|
1436
|
+
*
|
1434
1437
|
* \returns half2
|
1435
|
-
*
|
1438
|
+
* - The half2 with both halves equal to the high 16 bits of the input.
|
1436
1439
|
* \internal
|
1437
1440
|
* \exception-guarantee no-throw guarantee
|
1438
1441
|
* \behavior reentrant, thread safe
|
@@ -1443,13 +1446,13 @@ __CUDA_FP16_DECL__ __half2 __high2half2(const __half2 a);
|
|
1443
1446
|
/**
|
1444
1447
|
* \ingroup CUDA_MATH__HALF_MISC
|
1445
1448
|
* \brief Reinterprets bits in a \p half as a signed short integer.
|
1446
|
-
*
|
1449
|
+
*
|
1447
1450
|
* \details Reinterprets the bits in the half-precision floating-point number \p h
|
1448
|
-
* as a signed short integer.
|
1449
|
-
* \param[in] h - half. Is only being read.
|
1450
|
-
*
|
1451
|
+
* as a signed short integer.
|
1452
|
+
* \param[in] h - half. Is only being read.
|
1453
|
+
*
|
1451
1454
|
* \returns short int
|
1452
|
-
*
|
1455
|
+
* - The reinterpreted value.
|
1453
1456
|
* \internal
|
1454
1457
|
* \exception-guarantee no-throw guarantee
|
1455
1458
|
* \behavior reentrant, thread safe
|
@@ -1459,13 +1462,13 @@ __CUDA_FP16_DECL__ short int __half_as_short(const __half h);
|
|
1459
1462
|
/**
|
1460
1463
|
* \ingroup CUDA_MATH__HALF_MISC
|
1461
1464
|
* \brief Reinterprets bits in a \p half as an unsigned short integer.
|
1462
|
-
*
|
1465
|
+
*
|
1463
1466
|
* \details Reinterprets the bits in the half-precision floating-point \p h
|
1464
1467
|
* as an unsigned short number.
|
1465
|
-
* \param[in] h - half. Is only being read.
|
1466
|
-
*
|
1468
|
+
* \param[in] h - half. Is only being read.
|
1469
|
+
*
|
1467
1470
|
* \returns unsigned short int
|
1468
|
-
*
|
1471
|
+
* - The reinterpreted value.
|
1469
1472
|
* \internal
|
1470
1473
|
* \exception-guarantee no-throw guarantee
|
1471
1474
|
* \behavior reentrant, thread safe
|
@@ -1475,13 +1478,13 @@ __CUDA_FP16_DECL__ unsigned short int __half_as_ushort(const __half h);
|
|
1475
1478
|
/**
|
1476
1479
|
* \ingroup CUDA_MATH__HALF_MISC
|
1477
1480
|
* \brief Reinterprets bits in a signed short integer as a \p half.
|
1478
|
-
*
|
1481
|
+
*
|
1479
1482
|
* \details Reinterprets the bits in the signed short integer \p i as a
|
1480
1483
|
* half-precision floating-point number.
|
1481
|
-
* \param[in] i - short int. Is only being read.
|
1482
|
-
*
|
1484
|
+
* \param[in] i - short int. Is only being read.
|
1485
|
+
*
|
1483
1486
|
* \returns half
|
1484
|
-
*
|
1487
|
+
* - The reinterpreted value.
|
1485
1488
|
* \internal
|
1486
1489
|
* \exception-guarantee no-throw guarantee
|
1487
1490
|
* \behavior reentrant, thread safe
|
@@ -1491,21 +1494,101 @@ __CUDA_FP16_DECL__ __half __short_as_half(const short int i);
|
|
1491
1494
|
/**
|
1492
1495
|
* \ingroup CUDA_MATH__HALF_MISC
|
1493
1496
|
* \brief Reinterprets bits in an unsigned short integer as a \p half.
|
1494
|
-
*
|
1497
|
+
*
|
1495
1498
|
* \details Reinterprets the bits in the unsigned short integer \p i as a
|
1496
1499
|
* half-precision floating-point number.
|
1497
|
-
* \param[in] i - unsigned short int. Is only being read.
|
1498
|
-
*
|
1500
|
+
* \param[in] i - unsigned short int. Is only being read.
|
1501
|
+
*
|
1499
1502
|
* \returns half
|
1500
|
-
*
|
1503
|
+
* - The reinterpreted value.
|
1501
1504
|
* \internal
|
1502
1505
|
* \exception-guarantee no-throw guarantee
|
1503
1506
|
* \behavior reentrant, thread safe
|
1504
1507
|
* \endinternal
|
1505
1508
|
*/
|
1506
1509
|
__CUDA_FP16_DECL__ __half __ushort_as_half(const unsigned short int i);
|
1510
|
+
/**
|
1511
|
+
* \ingroup CUDA_MATH__HALF_COMPARISON
|
1512
|
+
* \brief Calculates \p half maximum of two input values.
|
1513
|
+
*
|
1514
|
+
* \details Calculates \p half max(\p a, \p b)
|
1515
|
+
* defined as (\p a > \p b) ? \p a : \p b.
|
1516
|
+
* - If either of inputs is NaN, the other input is returned.
|
1517
|
+
* - If both inputs are NaNs, then canonical NaN is returned.
|
1518
|
+
* - If values of both inputs are 0.0, then +0.0 > -0.0
|
1519
|
+
* \param[in] a - half. Is only being read.
|
1520
|
+
* \param[in] b - half. Is only being read.
|
1521
|
+
*
|
1522
|
+
* \returns half
|
1523
|
+
* \internal
|
1524
|
+
* \exception-guarantee no-throw guarantee
|
1525
|
+
* \behavior reentrant, thread safe
|
1526
|
+
* \endinternal
|
1527
|
+
*/
|
1528
|
+
__CUDA_FP16_DECL__ __half __hmax(const __half a, const __half b);
|
1529
|
+
/**
|
1530
|
+
* \ingroup CUDA_MATH__HALF_COMPARISON
|
1531
|
+
* \brief Calculates \p half minimum of two input values.
|
1532
|
+
*
|
1533
|
+
* \details Calculates \p half min(\p a, \p b)
|
1534
|
+
* defined as (\p a < \p b) ? \p a : \p b.
|
1535
|
+
* - If either of inputs is NaN, the other input is returned.
|
1536
|
+
* - If both inputs are NaNs, then canonical NaN is returned.
|
1537
|
+
* - If values of both inputs are 0.0, then +0.0 > -0.0
|
1538
|
+
* \param[in] a - half. Is only being read.
|
1539
|
+
* \param[in] b - half. Is only being read.
|
1540
|
+
*
|
1541
|
+
* \returns half
|
1542
|
+
* \internal
|
1543
|
+
* \exception-guarantee no-throw guarantee
|
1544
|
+
* \behavior reentrant, thread safe
|
1545
|
+
* \endinternal
|
1546
|
+
*/
|
1547
|
+
__CUDA_FP16_DECL__ __half __hmin(const __half a, const __half b);
|
1548
|
+
/**
|
1549
|
+
* \ingroup CUDA_MATH__HALF2_COMPARISON
|
1550
|
+
* \brief Calculates \p half2 vector maximum of two inputs.
|
1551
|
+
*
|
1552
|
+
* \details Calculates \p half2 vector max(\p a, \p b).
|
1553
|
+
* Elementwise \p half operation is defined as
|
1554
|
+
* (\p a > \p b) ? \p a : \p b.
|
1555
|
+
* - If either of inputs is NaN, the other input is returned.
|
1556
|
+
* - If both inputs are NaNs, then canonical NaN is returned.
|
1557
|
+
* - If values of both inputs are 0.0, then +0.0 > -0.0
|
1558
|
+
* \param[in] a - half2. Is only being read.
|
1559
|
+
* \param[in] b - half2. Is only being read.
|
1560
|
+
*
|
1561
|
+
* \returns half2
|
1562
|
+
* - The result of elementwise maximum of vectors \p a and \p b
|
1563
|
+
* \internal
|
1564
|
+
* \exception-guarantee no-throw guarantee
|
1565
|
+
* \behavior reentrant, thread safe
|
1566
|
+
* \endinternal
|
1567
|
+
*/
|
1568
|
+
__CUDA_FP16_DECL__ __half2 __hmax2(const __half2 a, const __half2 b);
|
1569
|
+
/**
|
1570
|
+
* \ingroup CUDA_MATH__HALF2_COMPARISON
|
1571
|
+
* \brief Calculates \p half2 vector minimum of two inputs.
|
1572
|
+
*
|
1573
|
+
* \details Calculates \p half2 vector min(\p a, \p b).
|
1574
|
+
* Elementwise \p half operation is defined as
|
1575
|
+
* (\p a < \p b) ? \p a : \p b.
|
1576
|
+
* - If either of inputs is NaN, the other input is returned.
|
1577
|
+
* - If both inputs are NaNs, then canonical NaN is returned.
|
1578
|
+
* - If values of both inputs are 0.0, then +0.0 > -0.0
|
1579
|
+
* \param[in] a - half2. Is only being read.
|
1580
|
+
* \param[in] b - half2. Is only being read.
|
1581
|
+
*
|
1582
|
+
* \returns half2
|
1583
|
+
* - The result of elementwise minimum of vectors \p a and \p b
|
1584
|
+
* \internal
|
1585
|
+
* \exception-guarantee no-throw guarantee
|
1586
|
+
* \behavior reentrant, thread safe
|
1587
|
+
* \endinternal
|
1588
|
+
*/
|
1589
|
+
__CUDA_FP16_DECL__ __half2 __hmin2(const __half2 a, const __half2 b);
|
1507
1590
|
|
1508
|
-
#if __CUDA_ARCH__
|
1591
|
+
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 300)
|
1509
1592
|
#if !defined warpSize && !defined __local_warpSize
|
1510
1593
|
#define warpSize 32
|
1511
1594
|
#define __local_warpSize
|
@@ -1520,7 +1603,7 @@ __CUDA_FP16_DECL__ __half __ushort_as_half(const unsigned short int i);
|
|
1520
1603
|
#endif
|
1521
1604
|
|
1522
1605
|
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ < 700
|
1523
|
-
#define __WSB_DEPRECATION_MESSAGE(x)
|
1606
|
+
#define __WSB_DEPRECATION_MESSAGE(x) __CUDA_FP16_STRINGIFY(x) "() is deprecated in favor of " __CUDA_FP16_STRINGIFY(x) "_sync() and may be removed in a future release (Use -Wno-deprecated-declarations to suppress this warning)."
|
1524
1607
|
|
1525
1608
|
__CUDA_FP16_DECL__ __DEPRECATED__(__WSB_DEPRECATION_MESSAGE(__shfl)) __half2 __shfl(const __half2 var, const int delta, const int width = warpSize);
|
1526
1609
|
__CUDA_FP16_DECL__ __DEPRECATED__(__WSB_DEPRECATION_MESSAGE(__shfl_up)) __half2 __shfl_up(const __half2 var, const unsigned int delta, const int width = warpSize);
|
@@ -1534,22 +1617,22 @@ __CUDA_FP16_DECL__ __DEPRECATED__(__WSB_DEPRECATION_MESSAGE(__shfl_xor)) __half
|
|
1534
1617
|
|
1535
1618
|
/**
|
1536
1619
|
* \ingroup CUDA_MATH__HALF_MISC
|
1537
|
-
* \brief Exchange a variable between threads within a warp. Direct copy from indexed thread.
|
1538
|
-
*
|
1539
|
-
* \details Returns the value of var held by the thread whose ID is given by delta.
|
1540
|
-
* If width is less than warpSize then each subsection of the warp behaves as a separate
|
1541
|
-
* entity with a starting logical thread ID of 0. If delta is outside the range [0:width-1],
|
1542
|
-
* the value returned corresponds to the value of var held by the delta modulo width (i.e.
|
1543
|
-
* within the same subsection). width must have a value which is a power of 2;
|
1544
|
-
* results are undefined if width is not a power of 2, or is a number greater than
|
1545
|
-
* warpSize.
|
1546
|
-
* \param[in] mask - unsigned int. Is only being read.
|
1547
|
-
* \param[in] var - half2. Is only being read.
|
1548
|
-
* \param[in] delta - int. Is only being read.
|
1549
|
-
* \param[in] width - int. Is only being read.
|
1550
|
-
*
|
1551
|
-
* \returns Returns the 4-byte word referenced by var from the source thread ID as half2.
|
1552
|
-
* If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
|
1620
|
+
* \brief Exchange a variable between threads within a warp. Direct copy from indexed thread.
|
1621
|
+
*
|
1622
|
+
* \details Returns the value of var held by the thread whose ID is given by delta.
|
1623
|
+
* If width is less than warpSize then each subsection of the warp behaves as a separate
|
1624
|
+
* entity with a starting logical thread ID of 0. If delta is outside the range [0:width-1],
|
1625
|
+
* the value returned corresponds to the value of var held by the delta modulo width (i.e.
|
1626
|
+
* within the same subsection). width must have a value which is a power of 2;
|
1627
|
+
* results are undefined if width is not a power of 2, or is a number greater than
|
1628
|
+
* warpSize.
|
1629
|
+
* \param[in] mask - unsigned int. Is only being read.
|
1630
|
+
* \param[in] var - half2. Is only being read.
|
1631
|
+
* \param[in] delta - int. Is only being read.
|
1632
|
+
* \param[in] width - int. Is only being read.
|
1633
|
+
*
|
1634
|
+
* \returns Returns the 4-byte word referenced by var from the source thread ID as half2.
|
1635
|
+
* If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
|
1553
1636
|
* \internal
|
1554
1637
|
* \exception-guarantee no-throw guarantee
|
1555
1638
|
* \behavior not reentrant, not thread safe
|
@@ -1558,22 +1641,23 @@ __CUDA_FP16_DECL__ __DEPRECATED__(__WSB_DEPRECATION_MESSAGE(__shfl_xor)) __half
|
|
1558
1641
|
__CUDA_FP16_DECL__ __half2 __shfl_sync(const unsigned mask, const __half2 var, const int delta, const int width = warpSize);
|
1559
1642
|
/**
|
1560
1643
|
* \ingroup CUDA_MATH__HALF_MISC
|
1561
|
-
* \brief Exchange a variable between threads within a warp. Copy from a thread with lower ID relative to the caller.
|
1562
|
-
*
|
1563
|
-
* \details Calculates a source thread ID by subtracting delta from the caller's lane ID.
|
1564
|
-
* The value of var held by the resulting lane ID is returned: in effect, var is shifted up
|
1565
|
-
* the warp by delta threads. If width is less than warpSize then each subsection of the warp
|
1566
|
-
* behaves as a separate entity with a starting logical thread ID of 0. The source thread index
|
1567
|
-
* will not wrap around the value of width, so effectively the lower delta threads will be unchanged.
|
1568
|
-
* width must have a value which is a power of 2; results are undefined if width is not a power of 2,
|
1569
|
-
* or is a number greater than warpSize.
|
1570
|
-
* \param[in] mask - unsigned int. Is only being read.
|
1571
|
-
* \param[in] var - half2. Is only being read.
|
1572
|
-
* \param[in] delta - int. Is only being read.
|
1573
|
-
* \param[in] width - int. Is only being read.
|
1574
|
-
*
|
1575
|
-
* \returns Returns the 4-byte word referenced by var from the source thread ID as half2.
|
1576
|
-
* If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
|
1644
|
+
* \brief Exchange a variable between threads within a warp. Copy from a thread with lower ID relative to the caller.
|
1645
|
+
*
|
1646
|
+
* \details Calculates a source thread ID by subtracting delta from the caller's lane ID.
|
1647
|
+
* The value of var held by the resulting lane ID is returned: in effect, var is shifted up
|
1648
|
+
* the warp by delta threads. If width is less than warpSize then each subsection of the warp
|
1649
|
+
* behaves as a separate entity with a starting logical thread ID of 0. The source thread index
|
1650
|
+
* will not wrap around the value of width, so effectively the lower delta threads will be unchanged.
|
1651
|
+
* width must have a value which is a power of 2; results are undefined if width is not a power of 2,
|
1652
|
+
* or is a number greater than warpSize.
|
1653
|
+
* \param[in] mask - unsigned int. Is only being read.
|
1654
|
+
* \param[in] var - half2. Is only being read.
|
1655
|
+
* \param[in] delta - int. Is only being read.
|
1656
|
+
* \param[in] width - int. Is only being read.
|
1657
|
+
*
|
1658
|
+
* \returns Returns the 4-byte word referenced by var from the source thread ID as half2.
|
1659
|
+
* If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
|
1660
|
+
* \note_ref_guide_warp_shuffle
|
1577
1661
|
* \internal
|
1578
1662
|
* \exception-guarantee no-throw guarantee
|
1579
1663
|
* \behavior not reentrant, not thread safe
|
@@ -1582,22 +1666,23 @@ __CUDA_FP16_DECL__ __half2 __shfl_sync(const unsigned mask, const __half2 var, c
|
|
1582
1666
|
__CUDA_FP16_DECL__ __half2 __shfl_up_sync(const unsigned mask, const __half2 var, const unsigned int delta, const int width = warpSize);
|
1583
1667
|
/**
|
1584
1668
|
* \ingroup CUDA_MATH__HALF_MISC
|
1585
|
-
* \brief Exchange a variable between threads within a warp. Copy from a thread with higher ID relative to the caller.
|
1586
|
-
*
|
1587
|
-
* \details Calculates a source thread ID by adding delta to the caller's thread ID.
|
1588
|
-
* The value of var held by the resulting thread ID is returned: this has the effect
|
1589
|
-
* of shifting var down the warp by delta threads. If width is less than warpSize then
|
1590
|
-
* each subsection of the warp behaves as a separate entity with a starting logical
|
1591
|
-
* thread ID of 0. As for __shfl_up_sync(), the ID number of the source thread
|
1592
|
-
* will not wrap around the value of width and so the upper delta threads
|
1593
|
-
* will remain unchanged.
|
1594
|
-
* \param[in] mask - unsigned int. Is only being read.
|
1595
|
-
* \param[in] var - half2. Is only being read.
|
1596
|
-
* \param[in] delta - int. Is only being read.
|
1597
|
-
* \param[in] width - int. Is only being read.
|
1598
|
-
*
|
1599
|
-
* \returns Returns the 4-byte word referenced by var from the source thread ID as half2.
|
1600
|
-
* If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
|
1669
|
+
* \brief Exchange a variable between threads within a warp. Copy from a thread with higher ID relative to the caller.
|
1670
|
+
*
|
1671
|
+
* \details Calculates a source thread ID by adding delta to the caller's thread ID.
|
1672
|
+
* The value of var held by the resulting thread ID is returned: this has the effect
|
1673
|
+
* of shifting var down the warp by delta threads. If width is less than warpSize then
|
1674
|
+
* each subsection of the warp behaves as a separate entity with a starting logical
|
1675
|
+
* thread ID of 0. As for __shfl_up_sync(), the ID number of the source thread
|
1676
|
+
* will not wrap around the value of width and so the upper delta threads
|
1677
|
+
* will remain unchanged.
|
1678
|
+
* \param[in] mask - unsigned int. Is only being read.
|
1679
|
+
* \param[in] var - half2. Is only being read.
|
1680
|
+
* \param[in] delta - int. Is only being read.
|
1681
|
+
* \param[in] width - int. Is only being read.
|
1682
|
+
*
|
1683
|
+
* \returns Returns the 4-byte word referenced by var from the source thread ID as half2.
|
1684
|
+
* If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
|
1685
|
+
* \note_ref_guide_warp_shuffle
|
1601
1686
|
* \internal
|
1602
1687
|
* \exception-guarantee no-throw guarantee
|
1603
1688
|
* \behavior not reentrant, not thread safe
|
@@ -1606,21 +1691,22 @@ __CUDA_FP16_DECL__ __half2 __shfl_up_sync(const unsigned mask, const __half2 var
|
|
1606
1691
|
__CUDA_FP16_DECL__ __half2 __shfl_down_sync(const unsigned mask, const __half2 var, const unsigned int delta, const int width = warpSize);
|
1607
1692
|
/**
|
1608
1693
|
* \ingroup CUDA_MATH__HALF_MISC
|
1609
|
-
* \brief Exchange a variable between threads within a warp. Copy from a thread based on bitwise XOR of own thread ID.
|
1610
|
-
*
|
1611
|
-
* \details Calculates a source thread ID by performing a bitwise XOR of the caller's thread ID with mask:
|
1612
|
-
* the value of var held by the resulting thread ID is returned. If width is less than warpSize then each
|
1613
|
-
* group of width consecutive threads are able to access elements from earlier groups of threads,
|
1614
|
-
* however if they attempt to access elements from later groups of threads their own value of var
|
1615
|
-
* will be returned. This mode implements a butterfly addressing pattern such as is used in tree
|
1616
|
-
* reduction and broadcast.
|
1617
|
-
* \param[in] mask - unsigned int. Is only being read.
|
1618
|
-
* \param[in] var - half2. Is only being read.
|
1619
|
-
* \param[in] delta - int. Is only being read.
|
1620
|
-
* \param[in] width - int. Is only being read.
|
1621
|
-
*
|
1622
|
-
* \returns Returns the 4-byte word referenced by var from the source thread ID as half2.
|
1623
|
-
* If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
|
1694
|
+
* \brief Exchange a variable between threads within a warp. Copy from a thread based on bitwise XOR of own thread ID.
|
1695
|
+
*
|
1696
|
+
* \details Calculates a source thread ID by performing a bitwise XOR of the caller's thread ID with mask:
|
1697
|
+
* the value of var held by the resulting thread ID is returned. If width is less than warpSize then each
|
1698
|
+
* group of width consecutive threads are able to access elements from earlier groups of threads,
|
1699
|
+
* however if they attempt to access elements from later groups of threads their own value of var
|
1700
|
+
* will be returned. This mode implements a butterfly addressing pattern such as is used in tree
|
1701
|
+
* reduction and broadcast.
|
1702
|
+
* \param[in] mask - unsigned int. Is only being read.
|
1703
|
+
* \param[in] var - half2. Is only being read.
|
1704
|
+
* \param[in] delta - int. Is only being read.
|
1705
|
+
* \param[in] width - int. Is only being read.
|
1706
|
+
*
|
1707
|
+
* \returns Returns the 4-byte word referenced by var from the source thread ID as half2.
|
1708
|
+
* If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
|
1709
|
+
* \note_ref_guide_warp_shuffle
|
1624
1710
|
* \internal
|
1625
1711
|
* \exception-guarantee no-throw guarantee
|
1626
1712
|
* \behavior not reentrant, not thread safe
|
@@ -1629,22 +1715,23 @@ __CUDA_FP16_DECL__ __half2 __shfl_down_sync(const unsigned mask, const __half2 v
|
|
1629
1715
|
__CUDA_FP16_DECL__ __half2 __shfl_xor_sync(const unsigned mask, const __half2 var, const int delta, const int width = warpSize);
|
1630
1716
|
/**
|
1631
1717
|
* \ingroup CUDA_MATH__HALF_MISC
|
1632
|
-
* \brief Exchange a variable between threads within a warp. Direct copy from indexed thread.
|
1633
|
-
*
|
1634
|
-
* \details Returns the value of var held by the thread whose ID is given by delta.
|
1635
|
-
* If width is less than warpSize then each subsection of the warp behaves as a separate
|
1636
|
-
* entity with a starting logical thread ID of 0. If delta is outside the range [0:width-1],
|
1637
|
-
* the value returned corresponds to the value of var held by the delta modulo width (i.e.
|
1638
|
-
* within the same subsection). width must have a value which is a power of 2;
|
1639
|
-
* results are undefined if width is not a power of 2, or is a number greater than
|
1640
|
-
* warpSize.
|
1641
|
-
* \param[in] mask - unsigned int. Is only being read.
|
1642
|
-
* \param[in] var - half. Is only being read.
|
1643
|
-
* \param[in] delta - int. Is only being read.
|
1644
|
-
* \param[in] width - int. Is only being read.
|
1645
|
-
*
|
1646
|
-
* \returns Returns the 2-byte word referenced by var from the source thread ID as half.
|
1647
|
-
* If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
|
1718
|
+
* \brief Exchange a variable between threads within a warp. Direct copy from indexed thread.
|
1719
|
+
*
|
1720
|
+
* \details Returns the value of var held by the thread whose ID is given by delta.
|
1721
|
+
* If width is less than warpSize then each subsection of the warp behaves as a separate
|
1722
|
+
* entity with a starting logical thread ID of 0. If delta is outside the range [0:width-1],
|
1723
|
+
* the value returned corresponds to the value of var held by the delta modulo width (i.e.
|
1724
|
+
* within the same subsection). width must have a value which is a power of 2;
|
1725
|
+
* results are undefined if width is not a power of 2, or is a number greater than
|
1726
|
+
* warpSize.
|
1727
|
+
* \param[in] mask - unsigned int. Is only being read.
|
1728
|
+
* \param[in] var - half. Is only being read.
|
1729
|
+
* \param[in] delta - int. Is only being read.
|
1730
|
+
* \param[in] width - int. Is only being read.
|
1731
|
+
*
|
1732
|
+
* \returns Returns the 2-byte word referenced by var from the source thread ID as half.
|
1733
|
+
* If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
|
1734
|
+
* \note_ref_guide_warp_shuffle
|
1648
1735
|
* \internal
|
1649
1736
|
* \exception-guarantee no-throw guarantee
|
1650
1737
|
* \behavior not reentrant, not thread safe
|
@@ -1653,21 +1740,22 @@ __CUDA_FP16_DECL__ __half2 __shfl_xor_sync(const unsigned mask, const __half2 va
|
|
1653
1740
|
__CUDA_FP16_DECL__ __half __shfl_sync(const unsigned mask, const __half var, const int delta, const int width = warpSize);
|
1654
1741
|
/**
|
1655
1742
|
* \ingroup CUDA_MATH__HALF_MISC
|
1656
|
-
* \brief Exchange a variable between threads within a warp. Copy from a thread with lower ID relative to the caller.
|
1657
|
-
* \details Calculates a source thread ID by subtracting delta from the caller's lane ID.
|
1658
|
-
* The value of var held by the resulting lane ID is returned: in effect, var is shifted up
|
1659
|
-
* the warp by delta threads. If width is less than warpSize then each subsection of the warp
|
1660
|
-
* behaves as a separate entity with a starting logical thread ID of 0. The source thread index
|
1661
|
-
* will not wrap around the value of width, so effectively the lower delta threads will be unchanged.
|
1662
|
-
* width must have a value which is a power of 2; results are undefined if width is not a power of 2,
|
1663
|
-
* or is a number greater than warpSize.
|
1664
|
-
* \param[in] mask - unsigned int. Is only being read.
|
1665
|
-
* \param[in] var - half. Is only being read.
|
1666
|
-
* \param[in] delta - int. Is only being read.
|
1667
|
-
* \param[in] width - int. Is only being read.
|
1668
|
-
*
|
1669
|
-
* \returns Returns the 2-byte word referenced by var from the source thread ID as half.
|
1670
|
-
* If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
|
1743
|
+
* \brief Exchange a variable between threads within a warp. Copy from a thread with lower ID relative to the caller.
|
1744
|
+
* \details Calculates a source thread ID by subtracting delta from the caller's lane ID.
|
1745
|
+
* The value of var held by the resulting lane ID is returned: in effect, var is shifted up
|
1746
|
+
* the warp by delta threads. If width is less than warpSize then each subsection of the warp
|
1747
|
+
* behaves as a separate entity with a starting logical thread ID of 0. The source thread index
|
1748
|
+
* will not wrap around the value of width, so effectively the lower delta threads will be unchanged.
|
1749
|
+
* width must have a value which is a power of 2; results are undefined if width is not a power of 2,
|
1750
|
+
* or is a number greater than warpSize.
|
1751
|
+
* \param[in] mask - unsigned int. Is only being read.
|
1752
|
+
* \param[in] var - half. Is only being read.
|
1753
|
+
* \param[in] delta - int. Is only being read.
|
1754
|
+
* \param[in] width - int. Is only being read.
|
1755
|
+
*
|
1756
|
+
* \returns Returns the 2-byte word referenced by var from the source thread ID as half.
|
1757
|
+
* If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
|
1758
|
+
* \note_ref_guide_warp_shuffle
|
1671
1759
|
* \internal
|
1672
1760
|
* \exception-guarantee no-throw guarantee
|
1673
1761
|
* \behavior not reentrant, not thread safe
|
@@ -1676,22 +1764,23 @@ __CUDA_FP16_DECL__ __half __shfl_sync(const unsigned mask, const __half var, con
|
|
1676
1764
|
__CUDA_FP16_DECL__ __half __shfl_up_sync(const unsigned mask, const __half var, const unsigned int delta, const int width = warpSize);
|
1677
1765
|
/**
|
1678
1766
|
* \ingroup CUDA_MATH__HALF_MISC
|
1679
|
-
* \brief Exchange a variable between threads within a warp. Copy from a thread with higher ID relative to the caller.
|
1680
|
-
*
|
1681
|
-
* \details Calculates a source thread ID by adding delta to the caller's thread ID.
|
1682
|
-
* The value of var held by the resulting thread ID is returned: this has the effect
|
1683
|
-
* of shifting var down the warp by delta threads. If width is less than warpSize then
|
1684
|
-
* each subsection of the warp behaves as a separate entity with a starting logical
|
1685
|
-
* thread ID of 0. As for __shfl_up_sync(), the ID number of the source thread
|
1686
|
-
* will not wrap around the value of width and so the upper delta threads
|
1687
|
-
* will remain unchanged.
|
1688
|
-
* \param[in] mask - unsigned int. Is only being read.
|
1689
|
-
* \param[in] var - half. Is only being read.
|
1690
|
-
* \param[in] delta - int. Is only being read.
|
1691
|
-
* \param[in] width - int. Is only being read.
|
1692
|
-
*
|
1693
|
-
* \returns Returns the 2-byte word referenced by var from the source thread ID as half.
|
1694
|
-
* If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
|
1767
|
+
* \brief Exchange a variable between threads within a warp. Copy from a thread with higher ID relative to the caller.
|
1768
|
+
*
|
1769
|
+
* \details Calculates a source thread ID by adding delta to the caller's thread ID.
|
1770
|
+
* The value of var held by the resulting thread ID is returned: this has the effect
|
1771
|
+
* of shifting var down the warp by delta threads. If width is less than warpSize then
|
1772
|
+
* each subsection of the warp behaves as a separate entity with a starting logical
|
1773
|
+
* thread ID of 0. As for __shfl_up_sync(), the ID number of the source thread
|
1774
|
+
* will not wrap around the value of width and so the upper delta threads
|
1775
|
+
* will remain unchanged.
|
1776
|
+
* \param[in] mask - unsigned int. Is only being read.
|
1777
|
+
* \param[in] var - half. Is only being read.
|
1778
|
+
* \param[in] delta - int. Is only being read.
|
1779
|
+
* \param[in] width - int. Is only being read.
|
1780
|
+
*
|
1781
|
+
* \returns Returns the 2-byte word referenced by var from the source thread ID as half.
|
1782
|
+
* If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
|
1783
|
+
* \note_ref_guide_warp_shuffle
|
1695
1784
|
* \internal
|
1696
1785
|
* \exception-guarantee no-throw guarantee
|
1697
1786
|
* \behavior not reentrant, not thread safe
|
@@ -1700,21 +1789,22 @@ __CUDA_FP16_DECL__ __half __shfl_up_sync(const unsigned mask, const __half var,
|
|
1700
1789
|
__CUDA_FP16_DECL__ __half __shfl_down_sync(const unsigned mask, const __half var, const unsigned int delta, const int width = warpSize);
|
1701
1790
|
/**
|
1702
1791
|
* \ingroup CUDA_MATH__HALF_MISC
|
1703
|
-
* \brief Exchange a variable between threads within a warp. Copy from a thread based on bitwise XOR of own thread ID.
|
1704
|
-
*
|
1705
|
-
* \details Calculates a source thread ID by performing a bitwise XOR of the caller's thread ID with mask:
|
1706
|
-
* the value of var held by the resulting thread ID is returned. If width is less than warpSize then each
|
1707
|
-
* group of width consecutive threads are able to access elements from earlier groups of threads,
|
1708
|
-
* however if they attempt to access elements from later groups of threads their own value of var
|
1709
|
-
* will be returned. This mode implements a butterfly addressing pattern such as is used in tree
|
1710
|
-
* reduction and broadcast.
|
1711
|
-
* \param[in] mask - unsigned int. Is only being read.
|
1712
|
-
* \param[in] var - half. Is only being read.
|
1713
|
-
* \param[in] delta - int. Is only being read.
|
1714
|
-
* \param[in] width - int. Is only being read.
|
1715
|
-
*
|
1716
|
-
* \returns Returns the 2-byte word referenced by var from the source thread ID as half.
|
1717
|
-
* If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
|
1792
|
+
* \brief Exchange a variable between threads within a warp. Copy from a thread based on bitwise XOR of own thread ID.
|
1793
|
+
*
|
1794
|
+
* \details Calculates a source thread ID by performing a bitwise XOR of the caller's thread ID with mask:
|
1795
|
+
* the value of var held by the resulting thread ID is returned. If width is less than warpSize then each
|
1796
|
+
* group of width consecutive threads are able to access elements from earlier groups of threads,
|
1797
|
+
* however if they attempt to access elements from later groups of threads their own value of var
|
1798
|
+
* will be returned. This mode implements a butterfly addressing pattern such as is used in tree
|
1799
|
+
* reduction and broadcast.
|
1800
|
+
* \param[in] mask - unsigned int. Is only being read.
|
1801
|
+
* \param[in] var - half. Is only being read.
|
1802
|
+
* \param[in] delta - int. Is only being read.
|
1803
|
+
* \param[in] width - int. Is only being read.
|
1804
|
+
*
|
1805
|
+
* \returns Returns the 2-byte word referenced by var from the source thread ID as half.
|
1806
|
+
* If the source thread ID is out of range or the source thread has exited, the calling thread's own var is returned.
|
1807
|
+
* \note_ref_guide_warp_shuffle
|
1718
1808
|
* \internal
|
1719
1809
|
* \exception-guarantee no-throw guarantee
|
1720
1810
|
* \behavior not reentrant, not thread safe
|
@@ -1726,9 +1816,9 @@ __CUDA_FP16_DECL__ __half __shfl_xor_sync(const unsigned mask, const __half var,
|
|
1726
1816
|
#undef warpSize
|
1727
1817
|
#undef __local_warpSize
|
1728
1818
|
#endif
|
1729
|
-
#endif
|
1819
|
+
#endif /*!defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 300) */
|
1730
1820
|
|
1731
|
-
#if defined(__cplusplus) && ( __CUDA_ARCH__
|
1821
|
+
#if defined(__cplusplus) && ( !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 320) )
|
1732
1822
|
/**
|
1733
1823
|
* \ingroup CUDA_MATH__HALF_MISC
|
1734
1824
|
* \brief Generates a `ld.global.nc` load instruction.
|
@@ -1869,21 +1959,21 @@ __CUDA_FP16_DECL__ void __stwt(__half2 *const ptr, const __half2 value);
|
|
1869
1959
|
* \param[in] value - the value to be stored
|
1870
1960
|
*/
|
1871
1961
|
__CUDA_FP16_DECL__ void __stwt(__half *const ptr, const __half value);
|
1872
|
-
#endif /*defined(__cplusplus) && ( __CUDA_ARCH__
|
1962
|
+
#endif /*defined(__cplusplus) && ( !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 320) )*/
|
1873
1963
|
|
1874
|
-
#if __CUDA_ARCH__
|
1964
|
+
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
1875
1965
|
/**
|
1876
1966
|
* \ingroup CUDA_MATH__HALF2_COMPARISON
|
1877
1967
|
* \brief Performs half2 vector if-equal comparison.
|
1878
|
-
*
|
1968
|
+
*
|
1879
1969
|
* \details Performs \p half2 vector if-equal comparison of inputs \p a and \p b.
|
1880
1970
|
* The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
|
1881
1971
|
* NaN inputs generate false results.
|
1882
|
-
* \param[in] a - half2. Is only being read.
|
1883
|
-
* \param[in] b - half2. Is only being read.
|
1884
|
-
*
|
1972
|
+
* \param[in] a - half2. Is only being read.
|
1973
|
+
* \param[in] b - half2. Is only being read.
|
1974
|
+
*
|
1885
1975
|
* \returns half2
|
1886
|
-
*
|
1976
|
+
* - The vector result of if-equal comparison of vectors \p a and \p b.
|
1887
1977
|
* \internal
|
1888
1978
|
* \exception-guarantee no-throw guarantee
|
1889
1979
|
* \behavior reentrant, thread safe
|
@@ -1893,15 +1983,15 @@ __CUDA_FP16_DECL__ __half2 __heq2(const __half2 a, const __half2 b);
|
|
1893
1983
|
/**
|
1894
1984
|
* \ingroup CUDA_MATH__HALF2_COMPARISON
|
1895
1985
|
* \brief Performs \p half2 vector not-equal comparison.
|
1896
|
-
*
|
1986
|
+
*
|
1897
1987
|
* \details Performs \p half2 vector not-equal comparison of inputs \p a and \p b.
|
1898
1988
|
* The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
|
1899
1989
|
* NaN inputs generate false results.
|
1900
|
-
* \param[in] a - half2. Is only being read.
|
1901
|
-
* \param[in] b - half2. Is only being read.
|
1902
|
-
*
|
1990
|
+
* \param[in] a - half2. Is only being read.
|
1991
|
+
* \param[in] b - half2. Is only being read.
|
1992
|
+
*
|
1903
1993
|
* \returns half2
|
1904
|
-
*
|
1994
|
+
* - The vector result of not-equal comparison of vectors \p a and \p b.
|
1905
1995
|
* \internal
|
1906
1996
|
* \exception-guarantee no-throw guarantee
|
1907
1997
|
* \behavior reentrant, thread safe
|
@@ -1915,11 +2005,11 @@ __CUDA_FP16_DECL__ __half2 __hne2(const __half2 a, const __half2 b);
|
|
1915
2005
|
* \details Performs \p half2 vector less-equal comparison of inputs \p a and \p b.
|
1916
2006
|
* The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
|
1917
2007
|
* NaN inputs generate false results.
|
1918
|
-
* \param[in] a - half2. Is only being read.
|
1919
|
-
* \param[in] b - half2. Is only being read.
|
2008
|
+
* \param[in] a - half2. Is only being read.
|
2009
|
+
* \param[in] b - half2. Is only being read.
|
1920
2010
|
*
|
1921
2011
|
* \returns half2
|
1922
|
-
*
|
2012
|
+
* - The \p half2 result of less-equal comparison of vectors \p a and \p b.
|
1923
2013
|
* \internal
|
1924
2014
|
* \exception-guarantee no-throw guarantee
|
1925
2015
|
* \behavior reentrant, thread safe
|
@@ -1933,11 +2023,11 @@ __CUDA_FP16_DECL__ __half2 __hle2(const __half2 a, const __half2 b);
|
|
1933
2023
|
* \details Performs \p half2 vector greater-equal comparison of inputs \p a and \p b.
|
1934
2024
|
* The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
|
1935
2025
|
* NaN inputs generate false results.
|
1936
|
-
* \param[in] a - half2. Is only being read.
|
1937
|
-
* \param[in] b - half2. Is only being read.
|
2026
|
+
* \param[in] a - half2. Is only being read.
|
2027
|
+
* \param[in] b - half2. Is only being read.
|
1938
2028
|
*
|
1939
2029
|
* \returns half2
|
1940
|
-
*
|
2030
|
+
* - The vector result of greater-equal comparison of vectors \p a and \p b.
|
1941
2031
|
* \internal
|
1942
2032
|
* \exception-guarantee no-throw guarantee
|
1943
2033
|
* \behavior reentrant, thread safe
|
@@ -1951,11 +2041,11 @@ __CUDA_FP16_DECL__ __half2 __hge2(const __half2 a, const __half2 b);
|
|
1951
2041
|
* \details Performs \p half2 vector less-than comparison of inputs \p a and \p b.
|
1952
2042
|
* The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
|
1953
2043
|
* NaN inputs generate false results.
|
1954
|
-
* \param[in] a - half2. Is only being read.
|
1955
|
-
* \param[in] b - half2. Is only being read.
|
2044
|
+
* \param[in] a - half2. Is only being read.
|
2045
|
+
* \param[in] b - half2. Is only being read.
|
1956
2046
|
*
|
1957
2047
|
* \returns half2
|
1958
|
-
*
|
2048
|
+
* - The half2 vector result of less-than comparison of vectors \p a and \p b.
|
1959
2049
|
* \internal
|
1960
2050
|
* \exception-guarantee no-throw guarantee
|
1961
2051
|
* \behavior reentrant, thread safe
|
@@ -1965,15 +2055,15 @@ __CUDA_FP16_DECL__ __half2 __hlt2(const __half2 a, const __half2 b);
|
|
1965
2055
|
/**
|
1966
2056
|
* \ingroup CUDA_MATH__HALF2_COMPARISON
|
1967
2057
|
* \brief Performs \p half2 vector greater-than comparison.
|
1968
|
-
*
|
2058
|
+
*
|
1969
2059
|
* \details Performs \p half2 vector greater-than comparison of inputs \p a and \p b.
|
1970
2060
|
* The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
|
1971
2061
|
* NaN inputs generate false results.
|
1972
|
-
* \param[in] a - half2. Is only being read.
|
1973
|
-
* \param[in] b - half2. Is only being read.
|
1974
|
-
*
|
2062
|
+
* \param[in] a - half2. Is only being read.
|
2063
|
+
* \param[in] b - half2. Is only being read.
|
2064
|
+
*
|
1975
2065
|
* \returns half2
|
1976
|
-
*
|
2066
|
+
* - The vector result of greater-than comparison of vectors \p a and \p b.
|
1977
2067
|
* \internal
|
1978
2068
|
* \exception-guarantee no-throw guarantee
|
1979
2069
|
* \behavior reentrant, thread safe
|
@@ -1983,15 +2073,15 @@ __CUDA_FP16_DECL__ __half2 __hgt2(const __half2 a, const __half2 b);
|
|
1983
2073
|
/**
|
1984
2074
|
* \ingroup CUDA_MATH__HALF2_COMPARISON
|
1985
2075
|
* \brief Performs \p half2 vector unordered if-equal comparison.
|
1986
|
-
*
|
2076
|
+
*
|
1987
2077
|
* \details Performs \p half2 vector if-equal comparison of inputs \p a and \p b.
|
1988
2078
|
* The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
|
1989
2079
|
* NaN inputs generate true results.
|
1990
|
-
* \param[in] a - half2. Is only being read.
|
1991
|
-
* \param[in] b - half2. Is only being read.
|
1992
|
-
*
|
2080
|
+
* \param[in] a - half2. Is only being read.
|
2081
|
+
* \param[in] b - half2. Is only being read.
|
2082
|
+
*
|
1993
2083
|
* \returns half2
|
1994
|
-
*
|
2084
|
+
* - The vector result of unordered if-equal comparison of vectors \p a and \p b.
|
1995
2085
|
* \internal
|
1996
2086
|
* \exception-guarantee no-throw guarantee
|
1997
2087
|
* \behavior reentrant, thread safe
|
@@ -2005,11 +2095,11 @@ __CUDA_FP16_DECL__ __half2 __hequ2(const __half2 a, const __half2 b);
|
|
2005
2095
|
* \details Performs \p half2 vector not-equal comparison of inputs \p a and \p b.
|
2006
2096
|
* The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
|
2007
2097
|
* NaN inputs generate true results.
|
2008
|
-
* \param[in] a - half2. Is only being read.
|
2009
|
-
* \param[in] b - half2. Is only being read.
|
2098
|
+
* \param[in] a - half2. Is only being read.
|
2099
|
+
* \param[in] b - half2. Is only being read.
|
2010
2100
|
*
|
2011
2101
|
* \returns half2
|
2012
|
-
*
|
2102
|
+
* - The vector result of unordered not-equal comparison of vectors \p a and \p b.
|
2013
2103
|
* \internal
|
2014
2104
|
* \exception-guarantee no-throw guarantee
|
2015
2105
|
* \behavior reentrant, thread safe
|
@@ -2023,11 +2113,11 @@ __CUDA_FP16_DECL__ __half2 __hneu2(const __half2 a, const __half2 b);
|
|
2023
2113
|
* Performs \p half2 vector less-equal comparison of inputs \p a and \p b.
|
2024
2114
|
* The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
|
2025
2115
|
* NaN inputs generate true results.
|
2026
|
-
* \param[in] a - half2. Is only being read.
|
2027
|
-
* \param[in] b - half2. Is only being read.
|
2116
|
+
* \param[in] a - half2. Is only being read.
|
2117
|
+
* \param[in] b - half2. Is only being read.
|
2028
2118
|
*
|
2029
2119
|
* \returns half2
|
2030
|
-
*
|
2120
|
+
* - The vector result of unordered less-equal comparison of vectors \p a and \p b.
|
2031
2121
|
* \internal
|
2032
2122
|
* \exception-guarantee no-throw guarantee
|
2033
2123
|
* \behavior reentrant, thread safe
|
@@ -2041,11 +2131,11 @@ __CUDA_FP16_DECL__ __half2 __hleu2(const __half2 a, const __half2 b);
|
|
2041
2131
|
* \details Performs \p half2 vector greater-equal comparison of inputs \p a and \p b.
|
2042
2132
|
* The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
|
2043
2133
|
* NaN inputs generate true results.
|
2044
|
-
* \param[in] a - half2. Is only being read.
|
2045
|
-
* \param[in] b - half2. Is only being read.
|
2134
|
+
* \param[in] a - half2. Is only being read.
|
2135
|
+
* \param[in] b - half2. Is only being read.
|
2046
2136
|
*
|
2047
2137
|
* \returns half2
|
2048
|
-
*
|
2138
|
+
* - The \p half2 vector result of unordered greater-equal comparison of vectors \p a and \p b.
|
2049
2139
|
* \internal
|
2050
2140
|
* \exception-guarantee no-throw guarantee
|
2051
2141
|
* \behavior reentrant, thread safe
|
@@ -2059,11 +2149,11 @@ __CUDA_FP16_DECL__ __half2 __hgeu2(const __half2 a, const __half2 b);
|
|
2059
2149
|
* \details Performs \p half2 vector less-than comparison of inputs \p a and \p b.
|
2060
2150
|
* The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
|
2061
2151
|
* NaN inputs generate true results.
|
2062
|
-
* \param[in] a - half2. Is only being read.
|
2063
|
-
* \param[in] b - half2. Is only being read.
|
2152
|
+
* \param[in] a - half2. Is only being read.
|
2153
|
+
* \param[in] b - half2. Is only being read.
|
2064
2154
|
*
|
2065
2155
|
* \returns half2
|
2066
|
-
*
|
2156
|
+
* - The vector result of unordered less-than comparison of vectors \p a and \p b.
|
2067
2157
|
* \internal
|
2068
2158
|
* \exception-guarantee no-throw guarantee
|
2069
2159
|
* \behavior reentrant, thread safe
|
@@ -2077,11 +2167,11 @@ __CUDA_FP16_DECL__ __half2 __hltu2(const __half2 a, const __half2 b);
|
|
2077
2167
|
* \details Performs \p half2 vector greater-than comparison of inputs \p a and \p b.
|
2078
2168
|
* The corresponding \p half results are set to 1.0 for true, or 0.0 for false.
|
2079
2169
|
* NaN inputs generate true results.
|
2080
|
-
* \param[in] a - half2. Is only being read.
|
2081
|
-
* \param[in] b - half2. Is only being read.
|
2170
|
+
* \param[in] a - half2. Is only being read.
|
2171
|
+
* \param[in] b - half2. Is only being read.
|
2082
2172
|
*
|
2083
2173
|
* \returns half2
|
2084
|
-
*
|
2174
|
+
* - The \p half2 vector result of unordered greater-than comparison of vectors \p a and \p b.
|
2085
2175
|
* \internal
|
2086
2176
|
* \exception-guarantee no-throw guarantee
|
2087
2177
|
* \behavior reentrant, thread safe
|
@@ -2093,11 +2183,11 @@ __CUDA_FP16_DECL__ __half2 __hgtu2(const __half2 a, const __half2 b);
|
|
2093
2183
|
* \brief Determine whether \p half2 argument is a NaN.
|
2094
2184
|
*
|
2095
2185
|
* \details Determine whether each half of input \p half2 number \p a is a NaN.
|
2096
|
-
* \param[in] a - half2. Is only being read.
|
2186
|
+
* \param[in] a - half2. Is only being read.
|
2097
2187
|
*
|
2098
2188
|
* \returns half2
|
2099
|
-
*
|
2100
|
-
* 1.0 for NaN, 0.0 otherwise.
|
2189
|
+
* - The half2 with the corresponding \p half results set to
|
2190
|
+
* 1.0 for NaN, 0.0 otherwise.
|
2101
2191
|
* \internal
|
2102
2192
|
* \exception-guarantee no-throw guarantee
|
2103
2193
|
* \behavior reentrant, thread safe
|
@@ -2113,11 +2203,11 @@ __CUDA_FP16_DECL__ __half2 __hisnan2(const __half2 a);
|
|
2113
2203
|
* \internal
|
2114
2204
|
* \req DEEPLEARN-SRM_REQ-95
|
2115
2205
|
* \endinternal
|
2116
|
-
* \param[in] a - half2. Is only being read.
|
2117
|
-
* \param[in] b - half2. Is only being read.
|
2206
|
+
* \param[in] a - half2. Is only being read.
|
2207
|
+
* \param[in] b - half2. Is only being read.
|
2118
2208
|
*
|
2119
2209
|
* \returns half2
|
2120
|
-
*
|
2210
|
+
* - The sum of vectors \p a and \p b.
|
2121
2211
|
* \internal
|
2122
2212
|
* \exception-guarantee no-throw guarantee
|
2123
2213
|
* \behavior reentrant, thread safe
|
@@ -2133,11 +2223,11 @@ __CUDA_FP16_DECL__ __half2 __hadd2(const __half2 a, const __half2 b);
|
|
2133
2223
|
* \internal
|
2134
2224
|
* \req DEEPLEARN-SRM_REQ-104
|
2135
2225
|
* \endinternal
|
2136
|
-
* \param[in] a - half2. Is only being read.
|
2137
|
-
* \param[in] b - half2. Is only being read.
|
2226
|
+
* \param[in] a - half2. Is only being read.
|
2227
|
+
* \param[in] b - half2. Is only being read.
|
2138
2228
|
*
|
2139
2229
|
* \returns half2
|
2140
|
-
*
|
2230
|
+
* - The subtraction of vector \p b from \p a.
|
2141
2231
|
* \internal
|
2142
2232
|
* \exception-guarantee no-throw guarantee
|
2143
2233
|
* \behavior reentrant, thread safe
|
@@ -2153,11 +2243,11 @@ __CUDA_FP16_DECL__ __half2 __hsub2(const __half2 a, const __half2 b);
|
|
2153
2243
|
* \internal
|
2154
2244
|
* \req DEEPLEARN-SRM_REQ-102
|
2155
2245
|
* \endinternal
|
2156
|
-
* \param[in] a - half2. Is only being read.
|
2157
|
-
* \param[in] b - half2. Is only being read.
|
2246
|
+
* \param[in] a - half2. Is only being read.
|
2247
|
+
* \param[in] b - half2. Is only being read.
|
2158
2248
|
*
|
2159
2249
|
* \returns half2
|
2160
|
-
*
|
2250
|
+
* - The result of elementwise multiplying the vectors \p a and \p b.
|
2161
2251
|
* \internal
|
2162
2252
|
* \exception-guarantee no-throw guarantee
|
2163
2253
|
* \behavior reentrant, thread safe
|
@@ -2166,6 +2256,68 @@ __CUDA_FP16_DECL__ __half2 __hsub2(const __half2 a, const __half2 b);
|
|
2166
2256
|
__CUDA_FP16_DECL__ __half2 __hmul2(const __half2 a, const __half2 b);
|
2167
2257
|
/**
|
2168
2258
|
* \ingroup CUDA_MATH__HALF2_ARITHMETIC
|
2259
|
+
* \brief Performs \p half2 vector addition in round-to-nearest-even mode.
|
2260
|
+
*
|
2261
|
+
* \details Performs \p half2 vector add of inputs \p a and \p b, in round-to-nearest
|
2262
|
+
* mode. Prevents floating-point contractions of mul+add into fma.
|
2263
|
+
* \internal
|
2264
|
+
* \req DEEPLEARN-SRM_REQ-95
|
2265
|
+
* \endinternal
|
2266
|
+
* \param[in] a - half2. Is only being read.
|
2267
|
+
* \param[in] b - half2. Is only being read.
|
2268
|
+
*
|
2269
|
+
* \returns half2
|
2270
|
+
* - The sum of vectors \p a and \p b.
|
2271
|
+
* \internal
|
2272
|
+
* \exception-guarantee no-throw guarantee
|
2273
|
+
* \behavior reentrant, thread safe
|
2274
|
+
* \endinternal
|
2275
|
+
*/
|
2276
|
+
__CUDA_FP16_DECL__ __half2 __hadd2_rn(const __half2 a, const __half2 b);
|
2277
|
+
/**
|
2278
|
+
* \ingroup CUDA_MATH__HALF2_ARITHMETIC
|
2279
|
+
* \brief Performs \p half2 vector subtraction in round-to-nearest-even mode.
|
2280
|
+
*
|
2281
|
+
* \details Subtracts \p half2 input vector \p b from input vector \p a in
|
2282
|
+
* round-to-nearest-even mode. Prevents floating-point contractions of mul+sub
|
2283
|
+
* into fma.
|
2284
|
+
* \internal
|
2285
|
+
* \req DEEPLEARN-SRM_REQ-104
|
2286
|
+
* \endinternal
|
2287
|
+
* \param[in] a - half2. Is only being read.
|
2288
|
+
* \param[in] b - half2. Is only being read.
|
2289
|
+
*
|
2290
|
+
* \returns half2
|
2291
|
+
* - The subtraction of vector \p b from \p a.
|
2292
|
+
* \internal
|
2293
|
+
* \exception-guarantee no-throw guarantee
|
2294
|
+
* \behavior reentrant, thread safe
|
2295
|
+
* \endinternal
|
2296
|
+
*/
|
2297
|
+
__CUDA_FP16_DECL__ __half2 __hsub2_rn(const __half2 a, const __half2 b);
|
2298
|
+
/**
|
2299
|
+
* \ingroup CUDA_MATH__HALF2_ARITHMETIC
|
2300
|
+
* \brief Performs \p half2 vector multiplication in round-to-nearest-even mode.
|
2301
|
+
*
|
2302
|
+
* \details Performs \p half2 vector multiplication of inputs \p a and \p b, in
|
2303
|
+
* round-to-nearest-even mode. Prevents floating-point contractions of
|
2304
|
+
* mul+add or sub into fma.
|
2305
|
+
* \internal
|
2306
|
+
* \req DEEPLEARN-SRM_REQ-102
|
2307
|
+
* \endinternal
|
2308
|
+
* \param[in] a - half2. Is only being read.
|
2309
|
+
* \param[in] b - half2. Is only being read.
|
2310
|
+
*
|
2311
|
+
* \returns half2
|
2312
|
+
* - The result of elementwise multiplying the vectors \p a and \p b.
|
2313
|
+
* \internal
|
2314
|
+
* \exception-guarantee no-throw guarantee
|
2315
|
+
* \behavior reentrant, thread safe
|
2316
|
+
* \endinternal
|
2317
|
+
*/
|
2318
|
+
__CUDA_FP16_DECL__ __half2 __hmul2_rn(const __half2 a, const __half2 b);
|
2319
|
+
/**
|
2320
|
+
* \ingroup CUDA_MATH__HALF2_ARITHMETIC
|
2169
2321
|
* \brief Performs \p half2 vector division in round-to-nearest-even mode.
|
2170
2322
|
*
|
2171
2323
|
* \details Divides \p half2 input vector \p a by input vector \p b in round-to-nearest
|
@@ -2173,11 +2325,11 @@ __CUDA_FP16_DECL__ __half2 __hmul2(const __half2 a, const __half2 b);
|
|
2173
2325
|
* \internal
|
2174
2326
|
* \req DEEPLEARN-SRM_REQ-103
|
2175
2327
|
* \endinternal
|
2176
|
-
* \param[in] a - half2. Is only being read.
|
2177
|
-
* \param[in] b - half2. Is only being read.
|
2328
|
+
* \param[in] a - half2. Is only being read.
|
2329
|
+
* \param[in] b - half2. Is only being read.
|
2178
2330
|
*
|
2179
2331
|
* \returns half2
|
2180
|
-
*
|
2332
|
+
* - The elementwise division of \p a with \p b.
|
2181
2333
|
* \internal
|
2182
2334
|
* \exception-guarantee no-throw guarantee
|
2183
2335
|
* \behavior reentrant, thread safe
|
@@ -2191,10 +2343,10 @@ __CUDA_FP16_DECL__ __half2 __h2div(const __half2 a, const __half2 b);
|
|
2191
2343
|
*
|
2192
2344
|
* \details Calculates the absolute value of both halves of the input \p half2 number and
|
2193
2345
|
* returns the result.
|
2194
|
-
* \param[in] a - half2. Is only being read.
|
2346
|
+
* \param[in] a - half2. Is only being read.
|
2195
2347
|
*
|
2196
2348
|
* \returns half2
|
2197
|
-
*
|
2349
|
+
* - Returns \p a with the absolute value of both halves.
|
2198
2350
|
* \internal
|
2199
2351
|
* \exception-guarantee no-throw guarantee
|
2200
2352
|
* \behavior reentrant, thread safe
|
@@ -2209,11 +2361,11 @@ __CUDA_FP16_DECL__ __half2 __habs2(const __half2 a);
|
|
2209
2361
|
* \details Performs \p half2 vector add of inputs \p a and \p b, in round-to-nearest
|
2210
2362
|
* mode, and clamps the results to range [0.0, 1.0]. NaN results are flushed to
|
2211
2363
|
* +0.0.
|
2212
|
-
* \param[in] a - half2. Is only being read.
|
2213
|
-
* \param[in] b - half2. Is only being read.
|
2364
|
+
* \param[in] a - half2. Is only being read.
|
2365
|
+
* \param[in] b - half2. Is only being read.
|
2214
2366
|
*
|
2215
2367
|
* \returns half2
|
2216
|
-
*
|
2368
|
+
* - The sum of \p a and \p b, with respect to saturation.
|
2217
2369
|
* \internal
|
2218
2370
|
* \exception-guarantee no-throw guarantee
|
2219
2371
|
* \behavior reentrant, thread safe
|
@@ -2228,11 +2380,11 @@ __CUDA_FP16_DECL__ __half2 __hadd2_sat(const __half2 a, const __half2 b);
|
|
2228
2380
|
* \details Subtracts \p half2 input vector \p b from input vector \p a in
|
2229
2381
|
* round-to-nearest-even mode, and clamps the results to range [0.0, 1.0]. NaN
|
2230
2382
|
* results are flushed to +0.0.
|
2231
|
-
* \param[in] a - half2. Is only being read.
|
2232
|
-
* \param[in] b - half2. Is only being read.
|
2383
|
+
* \param[in] a - half2. Is only being read.
|
2384
|
+
* \param[in] b - half2. Is only being read.
|
2233
2385
|
*
|
2234
2386
|
* \returns half2
|
2235
|
-
*
|
2387
|
+
* - The subtraction of vector \p b from \p a, with respect to saturation.
|
2236
2388
|
* \internal
|
2237
2389
|
* \exception-guarantee no-throw guarantee
|
2238
2390
|
* \behavior reentrant, thread safe
|
@@ -2247,12 +2399,12 @@ __CUDA_FP16_DECL__ __half2 __hsub2_sat(const __half2 a, const __half2 b);
|
|
2247
2399
|
* \details Performs \p half2 vector multiplication of inputs \p a and \p b, in
|
2248
2400
|
* round-to-nearest-even mode, and clamps the results to range [0.0, 1.0]. NaN
|
2249
2401
|
* results are flushed to +0.0.
|
2250
|
-
* \param[in] a - half2. Is only being read.
|
2251
|
-
* \param[in] b - half2. Is only being read.
|
2402
|
+
* \param[in] a - half2. Is only being read.
|
2403
|
+
* \param[in] b - half2. Is only being read.
|
2252
2404
|
*
|
2253
2405
|
* \returns half2
|
2254
|
-
*
|
2255
|
-
* with respect to saturation.
|
2406
|
+
* - The result of elementwise multiplication of vectors \p a and \p b,
|
2407
|
+
* with respect to saturation.
|
2256
2408
|
* \internal
|
2257
2409
|
* \exception-guarantee no-throw guarantee
|
2258
2410
|
* \behavior reentrant, thread safe
|
@@ -2270,12 +2422,12 @@ __CUDA_FP16_DECL__ __half2 __hmul2_sat(const __half2 a, const __half2 b);
|
|
2270
2422
|
* \internal
|
2271
2423
|
* \req DEEPLEARN-SRM_REQ-105
|
2272
2424
|
* \endinternal
|
2273
|
-
* \param[in] a - half2. Is only being read.
|
2274
|
-
* \param[in] b - half2. Is only being read.
|
2275
|
-
* \param[in] c - half2. Is only being read.
|
2425
|
+
* \param[in] a - half2. Is only being read.
|
2426
|
+
* \param[in] b - half2. Is only being read.
|
2427
|
+
* \param[in] c - half2. Is only being read.
|
2276
2428
|
*
|
2277
2429
|
* \returns half2
|
2278
|
-
*
|
2430
|
+
* - The result of elementwise fused multiply-add operation on vectors \p a, \p b, and \p c.
|
2279
2431
|
* \internal
|
2280
2432
|
* \exception-guarantee no-throw guarantee
|
2281
2433
|
* \behavior reentrant, thread safe
|
@@ -2291,13 +2443,13 @@ __CUDA_FP16_DECL__ __half2 __hfma2(const __half2 a, const __half2 b, const __hal
|
|
2291
2443
|
* then performs a \p half2 vector add of the result with \p c,
|
2292
2444
|
* rounding the result once in round-to-nearest-even mode, and clamps the
|
2293
2445
|
* results to range [0.0, 1.0]. NaN results are flushed to +0.0.
|
2294
|
-
* \param[in] a - half2. Is only being read.
|
2295
|
-
* \param[in] b - half2. Is only being read.
|
2296
|
-
* \param[in] c - half2. Is only being read.
|
2446
|
+
* \param[in] a - half2. Is only being read.
|
2447
|
+
* \param[in] b - half2. Is only being read.
|
2448
|
+
* \param[in] c - half2. Is only being read.
|
2297
2449
|
*
|
2298
2450
|
* \returns half2
|
2299
|
-
*
|
2300
|
-
* with respect to saturation.
|
2451
|
+
* - The result of elementwise fused multiply-add operation on vectors \p a, \p b, and \p c,
|
2452
|
+
* with respect to saturation.
|
2301
2453
|
* \internal
|
2302
2454
|
* \exception-guarantee no-throw guarantee
|
2303
2455
|
* \behavior reentrant, thread safe
|
@@ -2313,10 +2465,10 @@ __CUDA_FP16_DECL__ __half2 __hfma2_sat(const __half2 a, const __half2 b, const _
|
|
2313
2465
|
* \internal
|
2314
2466
|
* \req DEEPLEARN-SRM_REQ-101
|
2315
2467
|
* \endinternal
|
2316
|
-
* \param[in] a - half2. Is only being read.
|
2468
|
+
* \param[in] a - half2. Is only being read.
|
2317
2469
|
*
|
2318
2470
|
* \returns half2
|
2319
|
-
*
|
2471
|
+
* - Returns \p a with both halves negated.
|
2320
2472
|
* \internal
|
2321
2473
|
* \exception-guarantee no-throw guarantee
|
2322
2474
|
* \behavior reentrant, thread safe
|
@@ -2328,10 +2480,10 @@ __CUDA_FP16_DECL__ __half2 __hneg2(const __half2 a);
|
|
2328
2480
|
* \brief Calculates the absolute value of input \p half number and returns the result.
|
2329
2481
|
*
|
2330
2482
|
* \details Calculates the absolute value of input \p half number and returns the result.
|
2331
|
-
* \param[in] a - half. Is only being read.
|
2483
|
+
* \param[in] a - half. Is only being read.
|
2332
2484
|
*
|
2333
2485
|
* \returns half
|
2334
|
-
*
|
2486
|
+
* - The absolute value of \p a.
|
2335
2487
|
* \internal
|
2336
2488
|
* \exception-guarantee no-throw guarantee
|
2337
2489
|
* \behavior reentrant, thread safe
|
@@ -2347,11 +2499,11 @@ __CUDA_FP16_DECL__ __half __habs(const __half a);
|
|
2347
2499
|
* \internal
|
2348
2500
|
* \req DEEPLEARN-SRM_REQ-94
|
2349
2501
|
* \endinternal
|
2350
|
-
* \param[in] a - half. Is only being read.
|
2351
|
-
* \param[in] b - half. Is only being read.
|
2502
|
+
* \param[in] a - half. Is only being read.
|
2503
|
+
* \param[in] b - half. Is only being read.
|
2352
2504
|
*
|
2353
2505
|
* \returns half
|
2354
|
-
*
|
2506
|
+
* - The sum of \p a and \p b.
|
2355
2507
|
* \internal
|
2356
2508
|
* \exception-guarantee no-throw guarantee
|
2357
2509
|
* \behavior reentrant, thread safe
|
@@ -2367,11 +2519,11 @@ __CUDA_FP16_DECL__ __half __hadd(const __half a, const __half b);
|
|
2367
2519
|
* \internal
|
2368
2520
|
* \req DEEPLEARN-SRM_REQ-97
|
2369
2521
|
* \endinternal
|
2370
|
-
* \param[in] a - half. Is only being read.
|
2371
|
-
* \param[in] b - half. Is only being read.
|
2522
|
+
* \param[in] a - half. Is only being read.
|
2523
|
+
* \param[in] b - half. Is only being read.
|
2372
2524
|
*
|
2373
2525
|
* \returns half
|
2374
|
-
*
|
2526
|
+
* - The result of subtracting \p b from \p a.
|
2375
2527
|
* \internal
|
2376
2528
|
* \exception-guarantee no-throw guarantee
|
2377
2529
|
* \behavior reentrant, thread safe
|
@@ -2387,27 +2539,83 @@ __CUDA_FP16_DECL__ __half __hsub(const __half a, const __half b);
|
|
2387
2539
|
* \internal
|
2388
2540
|
* \req DEEPLEARN-SRM_REQ-99
|
2389
2541
|
* \endinternal
|
2390
|
-
* \param[in] a - half. Is only being read.
|
2391
|
-
* \param[in] b - half. Is only being read.
|
2542
|
+
* \param[in] a - half. Is only being read.
|
2543
|
+
* \param[in] b - half. Is only being read.
|
2392
2544
|
*
|
2393
2545
|
* \returns half
|
2394
|
-
*
|
2546
|
+
* - The result of multiplying \p a and \p b.
|
2395
2547
|
*/
|
2396
2548
|
__CUDA_FP16_DECL__ __half __hmul(const __half a, const __half b);
|
2397
2549
|
/**
|
2398
2550
|
* \ingroup CUDA_MATH__HALF_ARITHMETIC
|
2551
|
+
* \brief Performs \p half addition in round-to-nearest-even mode.
|
2552
|
+
*
|
2553
|
+
* \details Performs \p half addition of inputs \p a and \p b, in round-to-nearest-even
|
2554
|
+
* mode. Prevents floating-point contractions of mul+add into fma.
|
2555
|
+
* \internal
|
2556
|
+
* \req DEEPLEARN-SRM_REQ-94
|
2557
|
+
* \endinternal
|
2558
|
+
* \param[in] a - half. Is only being read.
|
2559
|
+
* \param[in] b - half. Is only being read.
|
2560
|
+
*
|
2561
|
+
* \returns half
|
2562
|
+
* - The sum of \p a and \p b.
|
2563
|
+
* \internal
|
2564
|
+
* \exception-guarantee no-throw guarantee
|
2565
|
+
* \behavior reentrant, thread safe
|
2566
|
+
* \endinternal
|
2567
|
+
*/
|
2568
|
+
__CUDA_FP16_DECL__ __half __hadd_rn(const __half a, const __half b);
|
2569
|
+
/**
|
2570
|
+
* \ingroup CUDA_MATH__HALF_ARITHMETIC
|
2571
|
+
* \brief Performs \p half subtraction in round-to-nearest-even mode.
|
2572
|
+
*
|
2573
|
+
* \details Subtracts \p half input \p b from input \p a in round-to-nearest
|
2574
|
+
* mode. Prevents floating-point contractions of mul+sub into fma.
|
2575
|
+
* \internal
|
2576
|
+
* \req DEEPLEARN-SRM_REQ-97
|
2577
|
+
* \endinternal
|
2578
|
+
* \param[in] a - half. Is only being read.
|
2579
|
+
* \param[in] b - half. Is only being read.
|
2580
|
+
*
|
2581
|
+
* \returns half
|
2582
|
+
* - The result of subtracting \p b from \p a.
|
2583
|
+
* \internal
|
2584
|
+
* \exception-guarantee no-throw guarantee
|
2585
|
+
* \behavior reentrant, thread safe
|
2586
|
+
* \endinternal
|
2587
|
+
*/
|
2588
|
+
__CUDA_FP16_DECL__ __half __hsub_rn(const __half a, const __half b);
|
2589
|
+
/**
|
2590
|
+
* \ingroup CUDA_MATH__HALF_ARITHMETIC
|
2591
|
+
* \brief Performs \p half multiplication in round-to-nearest-even mode.
|
2592
|
+
*
|
2593
|
+
* \details Performs \p half multiplication of inputs \p a and \p b, in round-to-nearest
|
2594
|
+
* mode. Prevents floating-point contractions of mul+add or sub into fma.
|
2595
|
+
* \internal
|
2596
|
+
* \req DEEPLEARN-SRM_REQ-99
|
2597
|
+
* \endinternal
|
2598
|
+
* \param[in] a - half. Is only being read.
|
2599
|
+
* \param[in] b - half. Is only being read.
|
2600
|
+
*
|
2601
|
+
* \returns half
|
2602
|
+
* - The result of multiplying \p a and \p b.
|
2603
|
+
*/
|
2604
|
+
__CUDA_FP16_DECL__ __half __hmul_rn(const __half a, const __half b);
|
2605
|
+
/**
|
2606
|
+
* \ingroup CUDA_MATH__HALF_ARITHMETIC
|
2399
2607
|
* \brief Performs \p half division in round-to-nearest-even mode.
|
2400
|
-
*
|
2608
|
+
*
|
2401
2609
|
* \details Divides \p half input \p a by input \p b in round-to-nearest
|
2402
2610
|
* mode.
|
2403
2611
|
* \internal
|
2404
2612
|
* \req DEEPLEARN-SRM_REQ-98
|
2405
2613
|
* \endinternal
|
2406
|
-
* \param[in] a - half. Is only being read.
|
2407
|
-
* \param[in] b - half. Is only being read.
|
2408
|
-
*
|
2614
|
+
* \param[in] a - half. Is only being read.
|
2615
|
+
* \param[in] b - half. Is only being read.
|
2616
|
+
*
|
2409
2617
|
* \returns half
|
2410
|
-
*
|
2618
|
+
* - The result of dividing \p a by \p b.
|
2411
2619
|
* \internal
|
2412
2620
|
* \exception-guarantee no-throw guarantee
|
2413
2621
|
* \behavior reentrant, thread safe
|
@@ -2421,11 +2629,11 @@ __CUDA_FP16_DECL__ __half __hdiv(const __half a, const __half b);
|
|
2421
2629
|
*
|
2422
2630
|
* \details Performs \p half add of inputs \p a and \p b, in round-to-nearest-even mode,
|
2423
2631
|
* and clamps the result to range [0.0, 1.0]. NaN results are flushed to +0.0.
|
2424
|
-
* \param[in] a - half. Is only being read.
|
2425
|
-
* \param[in] b - half. Is only being read.
|
2632
|
+
* \param[in] a - half. Is only being read.
|
2633
|
+
* \param[in] b - half. Is only being read.
|
2426
2634
|
*
|
2427
2635
|
* \returns half
|
2428
|
-
*
|
2636
|
+
* - The sum of \p a and \p b, with respect to saturation.
|
2429
2637
|
* \internal
|
2430
2638
|
* \exception-guarantee no-throw guarantee
|
2431
2639
|
* \behavior reentrant, thread safe
|
@@ -2440,11 +2648,11 @@ __CUDA_FP16_DECL__ __half __hadd_sat(const __half a, const __half b);
|
|
2440
2648
|
* \details Subtracts \p half input \p b from input \p a in round-to-nearest
|
2441
2649
|
* mode,
|
2442
2650
|
* and clamps the result to range [0.0, 1.0]. NaN results are flushed to +0.0.
|
2443
|
-
* \param[in] a - half. Is only being read.
|
2444
|
-
* \param[in] b - half. Is only being read.
|
2651
|
+
* \param[in] a - half. Is only being read.
|
2652
|
+
* \param[in] b - half. Is only being read.
|
2445
2653
|
*
|
2446
2654
|
* \returns half
|
2447
|
-
*
|
2655
|
+
* - The result of subtraction of \p b from \p a, with respect to saturation.
|
2448
2656
|
* \internal
|
2449
2657
|
* \exception-guarantee no-throw guarantee
|
2450
2658
|
* \behavior reentrant, thread safe
|
@@ -2459,11 +2667,11 @@ __CUDA_FP16_DECL__ __half __hsub_sat(const __half a, const __half b);
|
|
2459
2667
|
* \details Performs \p half multiplication of inputs \p a and \p b, in round-to-nearest
|
2460
2668
|
* mode, and clamps the result to range [0.0, 1.0]. NaN results are flushed to
|
2461
2669
|
* +0.0.
|
2462
|
-
* \param[in] a - half. Is only being read.
|
2463
|
-
* \param[in] b - half. Is only being read.
|
2670
|
+
* \param[in] a - half. Is only being read.
|
2671
|
+
* \param[in] b - half. Is only being read.
|
2464
2672
|
*
|
2465
2673
|
* \returns half
|
2466
|
-
*
|
2674
|
+
* - The result of multiplying \p a and \p b, with respect to saturation.
|
2467
2675
|
* \internal
|
2468
2676
|
* \exception-guarantee no-throw guarantee
|
2469
2677
|
* \behavior reentrant, thread safe
|
@@ -2480,13 +2688,13 @@ __CUDA_FP16_DECL__ __half __hmul_sat(const __half a, const __half b);
|
|
2480
2688
|
* \internal
|
2481
2689
|
* \req DEEPLEARN-SRM_REQ-96
|
2482
2690
|
* \endinternal
|
2483
|
-
* \param[in] a - half. Is only being read.
|
2484
|
-
* \param[in] b - half. Is only being read.
|
2485
|
-
* \param[in] c - half. Is only being read.
|
2691
|
+
* \param[in] a - half. Is only being read.
|
2692
|
+
* \param[in] b - half. Is only being read.
|
2693
|
+
* \param[in] c - half. Is only being read.
|
2486
2694
|
*
|
2487
2695
|
* \returns half
|
2488
|
-
*
|
2489
|
-
* a, \p b, and \p c.
|
2696
|
+
* - The result of fused multiply-add operation on \p
|
2697
|
+
* a, \p b, and \p c.
|
2490
2698
|
* \internal
|
2491
2699
|
* \exception-guarantee no-throw guarantee
|
2492
2700
|
* \behavior reentrant, thread safe
|
@@ -2502,13 +2710,13 @@ __CUDA_FP16_DECL__ __half __hfma(const __half a, const __half b, const __half c)
|
|
2502
2710
|
* then performs a \p half add of the result with \p c,
|
2503
2711
|
* rounding the result once in round-to-nearest-even mode, and clamps the result
|
2504
2712
|
* to range [0.0, 1.0]. NaN results are flushed to +0.0.
|
2505
|
-
* \param[in] a - half. Is only being read.
|
2506
|
-
* \param[in] b - half. Is only being read.
|
2507
|
-
* \param[in] c - half. Is only being read.
|
2713
|
+
* \param[in] a - half. Is only being read.
|
2714
|
+
* \param[in] b - half. Is only being read.
|
2715
|
+
* \param[in] c - half. Is only being read.
|
2508
2716
|
*
|
2509
2717
|
* \returns half
|
2510
|
-
*
|
2511
|
-
* a, \p b, and \p c, with respect to saturation.
|
2718
|
+
* - The result of fused multiply-add operation on \p
|
2719
|
+
* a, \p b, and \p c, with respect to saturation.
|
2512
2720
|
* \internal
|
2513
2721
|
* \exception-guarantee no-throw guarantee
|
2514
2722
|
* \behavior reentrant, thread safe
|
@@ -2523,10 +2731,10 @@ __CUDA_FP16_DECL__ __half __hfma_sat(const __half a, const __half b, const __hal
|
|
2523
2731
|
* \internal
|
2524
2732
|
* \req DEEPLEARN-SRM_REQ-100
|
2525
2733
|
* \endinternal
|
2526
|
-
* \param[in] a - half. Is only being read.
|
2734
|
+
* \param[in] a - half. Is only being read.
|
2527
2735
|
*
|
2528
2736
|
* \returns half
|
2529
|
-
*
|
2737
|
+
* - minus a
|
2530
2738
|
* \internal
|
2531
2739
|
* \exception-guarantee no-throw guarantee
|
2532
2740
|
* \behavior reentrant, thread safe
|
@@ -2542,13 +2750,13 @@ __CUDA_FP16_DECL__ __half __hneg(const __half a);
|
|
2542
2750
|
* The bool result is set to true only if both \p half if-equal comparisons
|
2543
2751
|
* evaluate to true, or false otherwise.
|
2544
2752
|
* NaN inputs generate false results.
|
2545
|
-
* \param[in] a - half2. Is only being read.
|
2546
|
-
* \param[in] b - half2. Is only being read.
|
2753
|
+
* \param[in] a - half2. Is only being read.
|
2754
|
+
* \param[in] b - half2. Is only being read.
|
2547
2755
|
*
|
2548
2756
|
* \returns bool
|
2549
|
-
*
|
2757
|
+
* - true if both \p half results of if-equal comparison
|
2550
2758
|
* of vectors \p a and \p b are true;
|
2551
|
-
*
|
2759
|
+
* - false otherwise.
|
2552
2760
|
* \internal
|
2553
2761
|
* \exception-guarantee no-throw guarantee
|
2554
2762
|
* \behavior reentrant, thread safe
|
@@ -2564,13 +2772,13 @@ __CUDA_FP16_DECL__ bool __hbeq2(const __half2 a, const __half2 b);
|
|
2564
2772
|
* The bool result is set to true only if both \p half not-equal comparisons
|
2565
2773
|
* evaluate to true, or false otherwise.
|
2566
2774
|
* NaN inputs generate false results.
|
2567
|
-
* \param[in] a - half2. Is only being read.
|
2568
|
-
* \param[in] b - half2. Is only being read.
|
2775
|
+
* \param[in] a - half2. Is only being read.
|
2776
|
+
* \param[in] b - half2. Is only being read.
|
2569
2777
|
*
|
2570
2778
|
* \returns bool
|
2571
|
-
*
|
2572
|
-
* of vectors \p a and \p b are true,
|
2573
|
-
*
|
2779
|
+
* - true if both \p half results of not-equal comparison
|
2780
|
+
* of vectors \p a and \p b are true,
|
2781
|
+
* - false otherwise.
|
2574
2782
|
* \internal
|
2575
2783
|
* \exception-guarantee no-throw guarantee
|
2576
2784
|
* \behavior reentrant, thread safe
|
@@ -2586,13 +2794,13 @@ __CUDA_FP16_DECL__ bool __hbne2(const __half2 a, const __half2 b);
|
|
2586
2794
|
* The bool result is set to true only if both \p half less-equal comparisons
|
2587
2795
|
* evaluate to true, or false otherwise.
|
2588
2796
|
* NaN inputs generate false results.
|
2589
|
-
* \param[in] a - half2. Is only being read.
|
2590
|
-
* \param[in] b - half2. Is only being read.
|
2797
|
+
* \param[in] a - half2. Is only being read.
|
2798
|
+
* \param[in] b - half2. Is only being read.
|
2591
2799
|
*
|
2592
2800
|
* \returns bool
|
2593
|
-
*
|
2594
|
-
* of vectors \p a and \p b are true;
|
2595
|
-
*
|
2801
|
+
* - true if both \p half results of less-equal comparison
|
2802
|
+
* of vectors \p a and \p b are true;
|
2803
|
+
* - false otherwise.
|
2596
2804
|
* \internal
|
2597
2805
|
* \exception-guarantee no-throw guarantee
|
2598
2806
|
* \behavior reentrant, thread safe
|
@@ -2608,13 +2816,13 @@ __CUDA_FP16_DECL__ bool __hble2(const __half2 a, const __half2 b);
|
|
2608
2816
|
* The bool result is set to true only if both \p half greater-equal comparisons
|
2609
2817
|
* evaluate to true, or false otherwise.
|
2610
2818
|
* NaN inputs generate false results.
|
2611
|
-
* \param[in] a - half2. Is only being read.
|
2612
|
-
* \param[in] b - half2. Is only being read.
|
2819
|
+
* \param[in] a - half2. Is only being read.
|
2820
|
+
* \param[in] b - half2. Is only being read.
|
2613
2821
|
*
|
2614
2822
|
* \returns bool
|
2615
|
-
*
|
2616
|
-
* comparison of vectors \p a and \p b are true;
|
2617
|
-
*
|
2823
|
+
* - true if both \p half results of greater-equal
|
2824
|
+
* comparison of vectors \p a and \p b are true;
|
2825
|
+
* - false otherwise.
|
2618
2826
|
* \internal
|
2619
2827
|
* \exception-guarantee no-throw guarantee
|
2620
2828
|
* \behavior reentrant, thread safe
|
@@ -2630,13 +2838,13 @@ __CUDA_FP16_DECL__ bool __hbge2(const __half2 a, const __half2 b);
|
|
2630
2838
|
* The bool result is set to true only if both \p half less-than comparisons
|
2631
2839
|
* evaluate to true, or false otherwise.
|
2632
2840
|
* NaN inputs generate false results.
|
2633
|
-
* \param[in] a - half2. Is only being read.
|
2634
|
-
* \param[in] b - half2. Is only being read.
|
2841
|
+
* \param[in] a - half2. Is only being read.
|
2842
|
+
* \param[in] b - half2. Is only being read.
|
2635
2843
|
*
|
2636
2844
|
* \returns bool
|
2637
|
-
*
|
2638
|
-
* of vectors \p a and \p b are true;
|
2639
|
-
*
|
2845
|
+
* - true if both \p half results of less-than comparison
|
2846
|
+
* of vectors \p a and \p b are true;
|
2847
|
+
* - false otherwise.
|
2640
2848
|
* \internal
|
2641
2849
|
* \exception-guarantee no-throw guarantee
|
2642
2850
|
* \behavior reentrant, thread safe
|
@@ -2652,13 +2860,13 @@ __CUDA_FP16_DECL__ bool __hblt2(const __half2 a, const __half2 b);
|
|
2652
2860
|
* The bool result is set to true only if both \p half greater-than comparisons
|
2653
2861
|
* evaluate to true, or false otherwise.
|
2654
2862
|
* NaN inputs generate false results.
|
2655
|
-
* \param[in] a - half2. Is only being read.
|
2656
|
-
* \param[in] b - half2. Is only being read.
|
2657
|
-
*
|
2658
|
-
* \returns bool
|
2659
|
-
*
|
2660
|
-
* comparison of vectors \p a and \p b are true;
|
2661
|
-
*
|
2863
|
+
* \param[in] a - half2. Is only being read.
|
2864
|
+
* \param[in] b - half2. Is only being read.
|
2865
|
+
*
|
2866
|
+
* \returns bool
|
2867
|
+
* - true if both \p half results of greater-than
|
2868
|
+
* comparison of vectors \p a and \p b are true;
|
2869
|
+
* - false otherwise.
|
2662
2870
|
* \internal
|
2663
2871
|
* \exception-guarantee no-throw guarantee
|
2664
2872
|
* \behavior reentrant, thread safe
|
@@ -2674,13 +2882,13 @@ __CUDA_FP16_DECL__ bool __hbgt2(const __half2 a, const __half2 b);
|
|
2674
2882
|
* The bool result is set to true only if both \p half if-equal comparisons
|
2675
2883
|
* evaluate to true, or false otherwise.
|
2676
2884
|
* NaN inputs generate true results.
|
2677
|
-
* \param[in] a - half2. Is only being read.
|
2678
|
-
* \param[in] b - half2. Is only being read.
|
2885
|
+
* \param[in] a - half2. Is only being read.
|
2886
|
+
* \param[in] b - half2. Is only being read.
|
2679
2887
|
*
|
2680
2888
|
* \returns bool
|
2681
|
-
*
|
2682
|
-
* comparison of vectors \p a and \p b are true;
|
2683
|
-
*
|
2889
|
+
* - true if both \p half results of unordered if-equal
|
2890
|
+
* comparison of vectors \p a and \p b are true;
|
2891
|
+
* - false otherwise.
|
2684
2892
|
* \internal
|
2685
2893
|
* \exception-guarantee no-throw guarantee
|
2686
2894
|
* \behavior reentrant, thread safe
|
@@ -2696,13 +2904,13 @@ __CUDA_FP16_DECL__ bool __hbequ2(const __half2 a, const __half2 b);
|
|
2696
2904
|
* The bool result is set to true only if both \p half not-equal comparisons
|
2697
2905
|
* evaluate to true, or false otherwise.
|
2698
2906
|
* NaN inputs generate true results.
|
2699
|
-
* \param[in] a - half2. Is only being read.
|
2700
|
-
* \param[in] b - half2. Is only being read.
|
2907
|
+
* \param[in] a - half2. Is only being read.
|
2908
|
+
* \param[in] b - half2. Is only being read.
|
2701
2909
|
*
|
2702
2910
|
* \returns bool
|
2703
|
-
*
|
2911
|
+
* - true if both \p half results of unordered not-equal
|
2704
2912
|
* comparison of vectors \p a and \p b are true;
|
2705
|
-
*
|
2913
|
+
* - false otherwise.
|
2706
2914
|
* \internal
|
2707
2915
|
* \exception-guarantee no-throw guarantee
|
2708
2916
|
* \behavior reentrant, thread safe
|
@@ -2718,13 +2926,13 @@ __CUDA_FP16_DECL__ bool __hbneu2(const __half2 a, const __half2 b);
|
|
2718
2926
|
* The bool result is set to true only if both \p half less-equal comparisons
|
2719
2927
|
* evaluate to true, or false otherwise.
|
2720
2928
|
* NaN inputs generate true results.
|
2721
|
-
* \param[in] a - half2. Is only being read.
|
2722
|
-
* \param[in] b - half2. Is only being read.
|
2929
|
+
* \param[in] a - half2. Is only being read.
|
2930
|
+
* \param[in] b - half2. Is only being read.
|
2723
2931
|
*
|
2724
2932
|
* \returns bool
|
2725
|
-
*
|
2726
|
-
* comparison of vectors \p a and \p b are true;
|
2727
|
-
*
|
2933
|
+
* - true if both \p half results of unordered less-equal
|
2934
|
+
* comparison of vectors \p a and \p b are true;
|
2935
|
+
* - false otherwise.
|
2728
2936
|
* \internal
|
2729
2937
|
* \exception-guarantee no-throw guarantee
|
2730
2938
|
* \behavior reentrant, thread safe
|
@@ -2741,13 +2949,13 @@ __CUDA_FP16_DECL__ bool __hbleu2(const __half2 a, const __half2 b);
|
|
2741
2949
|
* The bool result is set to true only if both \p half greater-equal comparisons
|
2742
2950
|
* evaluate to true, or false otherwise.
|
2743
2951
|
* NaN inputs generate true results.
|
2744
|
-
* \param[in] a - half2. Is only being read.
|
2745
|
-
* \param[in] b - half2. Is only being read.
|
2952
|
+
* \param[in] a - half2. Is only being read.
|
2953
|
+
* \param[in] b - half2. Is only being read.
|
2746
2954
|
*
|
2747
2955
|
* \returns bool
|
2748
|
-
*
|
2749
|
-
* greater-equal comparison of vectors \p a and \p b are true;
|
2750
|
-
*
|
2956
|
+
* - true if both \p half results of unordered
|
2957
|
+
* greater-equal comparison of vectors \p a and \p b are true;
|
2958
|
+
* - false otherwise.
|
2751
2959
|
* \internal
|
2752
2960
|
* \exception-guarantee no-throw guarantee
|
2753
2961
|
* \behavior reentrant, thread safe
|
@@ -2763,13 +2971,13 @@ __CUDA_FP16_DECL__ bool __hbgeu2(const __half2 a, const __half2 b);
|
|
2763
2971
|
* The bool result is set to true only if both \p half less-than comparisons
|
2764
2972
|
* evaluate to true, or false otherwise.
|
2765
2973
|
* NaN inputs generate true results.
|
2766
|
-
* \param[in] a - half2. Is only being read.
|
2767
|
-
* \param[in] b - half2. Is only being read.
|
2974
|
+
* \param[in] a - half2. Is only being read.
|
2975
|
+
* \param[in] b - half2. Is only being read.
|
2768
2976
|
*
|
2769
2977
|
* \returns bool
|
2770
|
-
*
|
2771
|
-
* vectors \p a and \p b are true;
|
2772
|
-
*
|
2978
|
+
* - true if both \p half results of unordered less-than comparison of
|
2979
|
+
* vectors \p a and \p b are true;
|
2980
|
+
* - false otherwise.
|
2773
2981
|
* \internal
|
2774
2982
|
* \exception-guarantee no-throw guarantee
|
2775
2983
|
* \behavior reentrant, thread safe
|
@@ -2786,13 +2994,13 @@ __CUDA_FP16_DECL__ bool __hbltu2(const __half2 a, const __half2 b);
|
|
2786
2994
|
* The bool result is set to true only if both \p half greater-than comparisons
|
2787
2995
|
* evaluate to true, or false otherwise.
|
2788
2996
|
* NaN inputs generate true results.
|
2789
|
-
* \param[in] a - half2. Is only being read.
|
2790
|
-
* \param[in] b - half2. Is only being read.
|
2997
|
+
* \param[in] a - half2. Is only being read.
|
2998
|
+
* \param[in] b - half2. Is only being read.
|
2791
2999
|
*
|
2792
3000
|
* \returns bool
|
2793
|
-
*
|
3001
|
+
* - true if both \p half results of unordered
|
2794
3002
|
* greater-than comparison of vectors \p a and \p b are true;
|
2795
|
-
*
|
3003
|
+
* - false otherwise.
|
2796
3004
|
* \internal
|
2797
3005
|
* \exception-guarantee no-throw guarantee
|
2798
3006
|
* \behavior reentrant, thread safe
|
@@ -2805,11 +3013,11 @@ __CUDA_FP16_DECL__ bool __hbgtu2(const __half2 a, const __half2 b);
|
|
2805
3013
|
*
|
2806
3014
|
* \details Performs \p half if-equal comparison of inputs \p a and \p b.
|
2807
3015
|
* NaN inputs generate false results.
|
2808
|
-
* \param[in] a - half. Is only being read.
|
2809
|
-
* \param[in] b - half. Is only being read.
|
3016
|
+
* \param[in] a - half. Is only being read.
|
3017
|
+
* \param[in] b - half. Is only being read.
|
2810
3018
|
*
|
2811
3019
|
* \returns bool
|
2812
|
-
*
|
3020
|
+
* - The boolean result of if-equal comparison of \p a and \p b.
|
2813
3021
|
* \internal
|
2814
3022
|
* \exception-guarantee no-throw guarantee
|
2815
3023
|
* \behavior reentrant, thread safe
|
@@ -2822,11 +3030,11 @@ __CUDA_FP16_DECL__ bool __heq(const __half a, const __half b);
|
|
2822
3030
|
*
|
2823
3031
|
* \details Performs \p half not-equal comparison of inputs \p a and \p b.
|
2824
3032
|
* NaN inputs generate false results.
|
2825
|
-
* \param[in] a - half. Is only being read.
|
2826
|
-
* \param[in] b - half. Is only being read.
|
3033
|
+
* \param[in] a - half. Is only being read.
|
3034
|
+
* \param[in] b - half. Is only being read.
|
2827
3035
|
*
|
2828
3036
|
* \returns bool
|
2829
|
-
*
|
3037
|
+
* - The boolean result of not-equal comparison of \p a and \p b.
|
2830
3038
|
* \internal
|
2831
3039
|
* \exception-guarantee no-throw guarantee
|
2832
3040
|
* \behavior reentrant, thread safe
|
@@ -2839,11 +3047,11 @@ __CUDA_FP16_DECL__ bool __hne(const __half a, const __half b);
|
|
2839
3047
|
*
|
2840
3048
|
* \details Performs \p half less-equal comparison of inputs \p a and \p b.
|
2841
3049
|
* NaN inputs generate false results.
|
2842
|
-
* \param[in] a - half. Is only being read.
|
2843
|
-
* \param[in] b - half. Is only being read.
|
3050
|
+
* \param[in] a - half. Is only being read.
|
3051
|
+
* \param[in] b - half. Is only being read.
|
2844
3052
|
*
|
2845
3053
|
* \returns bool
|
2846
|
-
*
|
3054
|
+
* - The boolean result of less-equal comparison of \p a and \p b.
|
2847
3055
|
* \internal
|
2848
3056
|
* \exception-guarantee no-throw guarantee
|
2849
3057
|
* \behavior reentrant, thread safe
|
@@ -2856,11 +3064,11 @@ __CUDA_FP16_DECL__ bool __hle(const __half a, const __half b);
|
|
2856
3064
|
*
|
2857
3065
|
* \details Performs \p half greater-equal comparison of inputs \p a and \p b.
|
2858
3066
|
* NaN inputs generate false results.
|
2859
|
-
* \param[in] a - half. Is only being read.
|
2860
|
-
* \param[in] b - half. Is only being read.
|
3067
|
+
* \param[in] a - half. Is only being read.
|
3068
|
+
* \param[in] b - half. Is only being read.
|
2861
3069
|
*
|
2862
3070
|
* \returns bool
|
2863
|
-
*
|
3071
|
+
* - The boolean result of greater-equal comparison of \p a and \p b.
|
2864
3072
|
* \internal
|
2865
3073
|
* \exception-guarantee no-throw guarantee
|
2866
3074
|
* \behavior reentrant, thread safe
|
@@ -2873,11 +3081,11 @@ __CUDA_FP16_DECL__ bool __hge(const __half a, const __half b);
|
|
2873
3081
|
*
|
2874
3082
|
* \details Performs \p half less-than comparison of inputs \p a and \p b.
|
2875
3083
|
* NaN inputs generate false results.
|
2876
|
-
* \param[in] a - half. Is only being read.
|
2877
|
-
* \param[in] b - half. Is only being read.
|
3084
|
+
* \param[in] a - half. Is only being read.
|
3085
|
+
* \param[in] b - half. Is only being read.
|
2878
3086
|
*
|
2879
3087
|
* \returns bool
|
2880
|
-
*
|
3088
|
+
* - The boolean result of less-than comparison of \p a and \p b.
|
2881
3089
|
* \internal
|
2882
3090
|
* \exception-guarantee no-throw guarantee
|
2883
3091
|
* \behavior reentrant, thread safe
|
@@ -2890,11 +3098,11 @@ __CUDA_FP16_DECL__ bool __hlt(const __half a, const __half b);
|
|
2890
3098
|
*
|
2891
3099
|
* \details Performs \p half greater-than comparison of inputs \p a and \p b.
|
2892
3100
|
* NaN inputs generate false results.
|
2893
|
-
* \param[in] a - half. Is only being read.
|
2894
|
-
* \param[in] b - half. Is only being read.
|
3101
|
+
* \param[in] a - half. Is only being read.
|
3102
|
+
* \param[in] b - half. Is only being read.
|
2895
3103
|
*
|
2896
3104
|
* \returns bool
|
2897
|
-
*
|
3105
|
+
* - The boolean result of greater-than comparison of \p a and \p b.
|
2898
3106
|
* \internal
|
2899
3107
|
* \exception-guarantee no-throw guarantee
|
2900
3108
|
* \behavior reentrant, thread safe
|
@@ -2907,11 +3115,11 @@ __CUDA_FP16_DECL__ bool __hgt(const __half a, const __half b);
|
|
2907
3115
|
*
|
2908
3116
|
* \details Performs \p half if-equal comparison of inputs \p a and \p b.
|
2909
3117
|
* NaN inputs generate true results.
|
2910
|
-
* \param[in] a - half. Is only being read.
|
2911
|
-
* \param[in] b - half. Is only being read.
|
3118
|
+
* \param[in] a - half. Is only being read.
|
3119
|
+
* \param[in] b - half. Is only being read.
|
2912
3120
|
*
|
2913
3121
|
* \returns bool
|
2914
|
-
*
|
3122
|
+
* - The boolean result of unordered if-equal comparison of \p a and
|
2915
3123
|
* \p b.
|
2916
3124
|
* \internal
|
2917
3125
|
* \exception-guarantee no-throw guarantee
|
@@ -2925,11 +3133,11 @@ __CUDA_FP16_DECL__ bool __hequ(const __half a, const __half b);
|
|
2925
3133
|
*
|
2926
3134
|
* \details Performs \p half not-equal comparison of inputs \p a and \p b.
|
2927
3135
|
* NaN inputs generate true results.
|
2928
|
-
* \param[in] a - half. Is only being read.
|
2929
|
-
* \param[in] b - half. Is only being read.
|
3136
|
+
* \param[in] a - half. Is only being read.
|
3137
|
+
* \param[in] b - half. Is only being read.
|
2930
3138
|
*
|
2931
3139
|
* \returns bool
|
2932
|
-
*
|
3140
|
+
* - The boolean result of unordered not-equal comparison of \p a and
|
2933
3141
|
* \p b.
|
2934
3142
|
* \internal
|
2935
3143
|
* \exception-guarantee no-throw guarantee
|
@@ -2943,11 +3151,11 @@ __CUDA_FP16_DECL__ bool __hneu(const __half a, const __half b);
|
|
2943
3151
|
*
|
2944
3152
|
* \details Performs \p half less-equal comparison of inputs \p a and \p b.
|
2945
3153
|
* NaN inputs generate true results.
|
2946
|
-
* \param[in] a - half. Is only being read.
|
2947
|
-
* \param[in] b - half. Is only being read.
|
3154
|
+
* \param[in] a - half. Is only being read.
|
3155
|
+
* \param[in] b - half. Is only being read.
|
2948
3156
|
*
|
2949
3157
|
* \returns bool
|
2950
|
-
*
|
3158
|
+
* - The boolean result of unordered less-equal comparison of \p a and
|
2951
3159
|
* \p b.
|
2952
3160
|
* \internal
|
2953
3161
|
* \exception-guarantee no-throw guarantee
|
@@ -2961,11 +3169,11 @@ __CUDA_FP16_DECL__ bool __hleu(const __half a, const __half b);
|
|
2961
3169
|
*
|
2962
3170
|
* \details Performs \p half greater-equal comparison of inputs \p a and \p b.
|
2963
3171
|
* NaN inputs generate true results.
|
2964
|
-
* \param[in] a - half. Is only being read.
|
2965
|
-
* \param[in] b - half. Is only being read.
|
3172
|
+
* \param[in] a - half. Is only being read.
|
3173
|
+
* \param[in] b - half. Is only being read.
|
2966
3174
|
*
|
2967
3175
|
* \returns bool
|
2968
|
-
*
|
3176
|
+
* - The boolean result of unordered greater-equal comparison of \p a
|
2969
3177
|
* and \p b.
|
2970
3178
|
* \internal
|
2971
3179
|
* \exception-guarantee no-throw guarantee
|
@@ -2979,11 +3187,11 @@ __CUDA_FP16_DECL__ bool __hgeu(const __half a, const __half b);
|
|
2979
3187
|
*
|
2980
3188
|
* \details Performs \p half less-than comparison of inputs \p a and \p b.
|
2981
3189
|
* NaN inputs generate true results.
|
2982
|
-
* \param[in] a - half. Is only being read.
|
2983
|
-
* \param[in] b - half. Is only being read.
|
3190
|
+
* \param[in] a - half. Is only being read.
|
3191
|
+
* \param[in] b - half. Is only being read.
|
2984
3192
|
*
|
2985
3193
|
* \returns bool
|
2986
|
-
*
|
3194
|
+
* - The boolean result of unordered less-than comparison of \p a and
|
2987
3195
|
* \p b.
|
2988
3196
|
* \internal
|
2989
3197
|
* \exception-guarantee no-throw guarantee
|
@@ -2997,11 +3205,11 @@ __CUDA_FP16_DECL__ bool __hltu(const __half a, const __half b);
|
|
2997
3205
|
*
|
2998
3206
|
* \details Performs \p half greater-than comparison of inputs \p a and \p b.
|
2999
3207
|
* NaN inputs generate true results.
|
3000
|
-
* \param[in] a - half. Is only being read.
|
3001
|
-
* \param[in] b - half. Is only being read.
|
3208
|
+
* \param[in] a - half. Is only being read.
|
3209
|
+
* \param[in] b - half. Is only being read.
|
3002
3210
|
*
|
3003
3211
|
* \returns bool
|
3004
|
-
*
|
3212
|
+
* - The boolean result of unordered greater-than comparison of \p a
|
3005
3213
|
* and \p b.
|
3006
3214
|
* \internal
|
3007
3215
|
* \exception-guarantee no-throw guarantee
|
@@ -3014,55 +3222,17 @@ __CUDA_FP16_DECL__ bool __hgtu(const __half a, const __half b);
|
|
3014
3222
|
* \brief Determine whether \p half argument is a NaN.
|
3015
3223
|
*
|
3016
3224
|
* \details Determine whether \p half value \p a is a NaN.
|
3017
|
-
* \param[in] a - half. Is only being read.
|
3225
|
+
* \param[in] a - half. Is only being read.
|
3018
3226
|
*
|
3019
3227
|
* \returns bool
|
3020
|
-
*
|
3228
|
+
* - true iff argument is NaN.
|
3021
3229
|
* \internal
|
3022
3230
|
* \exception-guarantee no-throw guarantee
|
3023
3231
|
* \behavior reentrant, thread safe
|
3024
3232
|
* \endinternal
|
3025
3233
|
*/
|
3026
3234
|
__CUDA_FP16_DECL__ bool __hisnan(const __half a);
|
3027
|
-
#if __CUDA_ARCH__
|
3028
|
-
/**
|
3029
|
-
* \ingroup CUDA_MATH__HALF_COMPARISON
|
3030
|
-
* \brief Calculates \p half maximum of two input values.
|
3031
|
-
*
|
3032
|
-
* \details Calculates \p half max(\p a, \p b)
|
3033
|
-
* defined as (\p a > \p b) ? \p a : \p b.
|
3034
|
-
* - If either of inputs is NaN, the other input is returned.
|
3035
|
-
* - If both inputs are NaNs, then canonical NaN is returned.
|
3036
|
-
* - If values of both inputs are 0.0, then +0.0 > -0.0
|
3037
|
-
* \param[in] a - half. Is only being read.
|
3038
|
-
* \param[in] b - half. Is only being read.
|
3039
|
-
*
|
3040
|
-
* \returns half
|
3041
|
-
* \internal
|
3042
|
-
* \exception-guarantee no-throw guarantee
|
3043
|
-
* \behavior reentrant, thread safe
|
3044
|
-
* \endinternal
|
3045
|
-
*/
|
3046
|
-
__CUDA_FP16_DECL__ __half __hmax(const __half a, const __half b);
|
3047
|
-
/**
|
3048
|
-
* \ingroup CUDA_MATH__HALF_COMPARISON
|
3049
|
-
* \brief Calculates \p half minimum of two input values.
|
3050
|
-
*
|
3051
|
-
* \details Calculates \p half min(\p a, \p b)
|
3052
|
-
* defined as (\p a < \p b) ? \p a : \p b.
|
3053
|
-
* - If either of inputs is NaN, the other input is returned.
|
3054
|
-
* - If both inputs are NaNs, then canonical NaN is returned.
|
3055
|
-
* - If values of both inputs are 0.0, then +0.0 > -0.0
|
3056
|
-
* \param[in] a - half. Is only being read.
|
3057
|
-
* \param[in] b - half. Is only being read.
|
3058
|
-
*
|
3059
|
-
* \returns half
|
3060
|
-
* \internal
|
3061
|
-
* \exception-guarantee no-throw guarantee
|
3062
|
-
* \behavior reentrant, thread safe
|
3063
|
-
* \endinternal
|
3064
|
-
*/
|
3065
|
-
__CUDA_FP16_DECL__ __half __hmin(const __half a, const __half b);
|
3235
|
+
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 800)
|
3066
3236
|
/**
|
3067
3237
|
* \ingroup CUDA_MATH__HALF_COMPARISON
|
3068
3238
|
* \brief Calculates \p half maximum of two input values, NaNs pass through.
|
@@ -3113,7 +3283,7 @@ __CUDA_FP16_DECL__ __half __hmin_nan(const __half a, const __half b);
|
|
3113
3283
|
* \param[in] c - half. Is only being read.
|
3114
3284
|
*
|
3115
3285
|
* \returns half
|
3116
|
-
*
|
3286
|
+
* - The result of fused multiply-add operation on \p
|
3117
3287
|
* a, \p b, and \p c with relu saturation.
|
3118
3288
|
* \internal
|
3119
3289
|
* \exception-guarantee no-throw guarantee
|
@@ -3123,48 +3293,6 @@ __CUDA_FP16_DECL__ __half __hmin_nan(const __half a, const __half b);
|
|
3123
3293
|
__CUDA_FP16_DECL__ __half __hfma_relu(const __half a, const __half b, const __half c);
|
3124
3294
|
/**
|
3125
3295
|
* \ingroup CUDA_MATH__HALF2_COMPARISON
|
3126
|
-
* \brief Calculates \p half2 vector maximum of two inputs.
|
3127
|
-
*
|
3128
|
-
* \details Calculates \p half2 vector max(\p a, \p b).
|
3129
|
-
* Elementwise \p half operation is defined as
|
3130
|
-
* (\p a > \p b) ? \p a : \p b.
|
3131
|
-
* - If either of inputs is NaN, the other input is returned.
|
3132
|
-
* - If both inputs are NaNs, then canonical NaN is returned.
|
3133
|
-
* - If values of both inputs are 0.0, then +0.0 > -0.0
|
3134
|
-
* \param[in] a - half2. Is only being read.
|
3135
|
-
* \param[in] b - half2. Is only being read.
|
3136
|
-
*
|
3137
|
-
* \returns half2
|
3138
|
-
* \retval The result of elementwise maximum of vectors \p a and \p b
|
3139
|
-
* \internal
|
3140
|
-
* \exception-guarantee no-throw guarantee
|
3141
|
-
* \behavior reentrant, thread safe
|
3142
|
-
* \endinternal
|
3143
|
-
*/
|
3144
|
-
__CUDA_FP16_DECL__ __half2 __hmax2(const __half2 a, const __half2 b);
|
3145
|
-
/**
|
3146
|
-
* \ingroup CUDA_MATH__HALF2_COMPARISON
|
3147
|
-
* \brief Calculates \p half2 vector minimum of two inputs.
|
3148
|
-
*
|
3149
|
-
* \details Calculates \p half2 vector min(\p a, \p b).
|
3150
|
-
* Elementwise \p half operation is defined as
|
3151
|
-
* (\p a < \p b) ? \p a : \p b.
|
3152
|
-
* - If either of inputs is NaN, the other input is returned.
|
3153
|
-
* - If both inputs are NaNs, then canonical NaN is returned.
|
3154
|
-
* - If values of both inputs are 0.0, then +0.0 > -0.0
|
3155
|
-
* \param[in] a - half2. Is only being read.
|
3156
|
-
* \param[in] b - half2. Is only being read.
|
3157
|
-
*
|
3158
|
-
* \returns half2
|
3159
|
-
* \retval The result of elementwise minimum of vectors \p a and \p b
|
3160
|
-
* \internal
|
3161
|
-
* \exception-guarantee no-throw guarantee
|
3162
|
-
* \behavior reentrant, thread safe
|
3163
|
-
* \endinternal
|
3164
|
-
*/
|
3165
|
-
__CUDA_FP16_DECL__ __half2 __hmin2(const __half2 a, const __half2 b);
|
3166
|
-
/**
|
3167
|
-
* \ingroup CUDA_MATH__HALF2_COMPARISON
|
3168
3296
|
* \brief Calculates \p half2 vector maximum of two inputs, NaNs pass through.
|
3169
3297
|
*
|
3170
3298
|
* \details Calculates \p half2 vector max(\p a, \p b).
|
@@ -3176,7 +3304,7 @@ __CUDA_FP16_DECL__ __half2 __hmin2(const __half2 a, const __half2 b);
|
|
3176
3304
|
* \param[in] b - half2. Is only being read.
|
3177
3305
|
*
|
3178
3306
|
* \returns half2
|
3179
|
-
*
|
3307
|
+
* - The result of elementwise maximum of vectors \p a and \p b, with NaNs pass through
|
3180
3308
|
* \internal
|
3181
3309
|
* \exception-guarantee no-throw guarantee
|
3182
3310
|
* \behavior reentrant, thread safe
|
@@ -3196,7 +3324,7 @@ __CUDA_FP16_DECL__ __half2 __hmax2_nan(const __half2 a, const __half2 b);
|
|
3196
3324
|
* \param[in] b - half2. Is only being read.
|
3197
3325
|
*
|
3198
3326
|
* \returns half2
|
3199
|
-
*
|
3327
|
+
* - The result of elementwise minimum of vectors \p a and \p b, with NaNs pass through
|
3200
3328
|
* \internal
|
3201
3329
|
* \exception-guarantee no-throw guarantee
|
3202
3330
|
* \behavior reentrant, thread safe
|
@@ -3218,14 +3346,14 @@ __CUDA_FP16_DECL__ __half2 __hmin2_nan(const __half2 a, const __half2 b);
|
|
3218
3346
|
* \param[in] c - half2. Is only being read.
|
3219
3347
|
*
|
3220
3348
|
* \returns half2
|
3221
|
-
*
|
3349
|
+
* - The result of elementwise fused multiply-add operation on vectors \p a, \p b, and \p c with relu saturation.
|
3222
3350
|
* \internal
|
3223
3351
|
* \exception-guarantee no-throw guarantee
|
3224
3352
|
* \behavior reentrant, thread safe
|
3225
3353
|
* \endinternal
|
3226
3354
|
*/
|
3227
3355
|
__CUDA_FP16_DECL__ __half2 __hfma2_relu(const __half2 a, const __half2 b, const __half2 c);
|
3228
|
-
#endif /*__CUDA_ARCH__
|
3356
|
+
#endif /* !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 800) */
|
3229
3357
|
/**
|
3230
3358
|
* \ingroup CUDA_MATH__HALF2_ARITHMETIC
|
3231
3359
|
* \brief Performs fast complex multiply-accumulate
|
@@ -3238,7 +3366,7 @@ __CUDA_FP16_DECL__ __half2 __hfma2_relu(const __half2 a, const __half2 b, const
|
|
3238
3366
|
* \param[in] c - half2. Is only being read.
|
3239
3367
|
*
|
3240
3368
|
* \returns half2
|
3241
|
-
*
|
3369
|
+
* - The result of complex multiply-accumulate operation on complex numbers \p a, \p b, and \p c
|
3242
3370
|
* \internal
|
3243
3371
|
* \exception-guarantee no-throw guarantee
|
3244
3372
|
* \behavior reentrant, thread safe
|
@@ -3250,10 +3378,10 @@ __CUDA_FP16_DECL__ __half2 __hcmadd(const __half2 a, const __half2 b, const __ha
|
|
3250
3378
|
* \brief Calculates \p half square root in round-to-nearest-even mode.
|
3251
3379
|
*
|
3252
3380
|
* \details Calculates \p half square root of input \p a in round-to-nearest-even mode.
|
3253
|
-
* \param[in] a - half. Is only being read.
|
3381
|
+
* \param[in] a - half. Is only being read.
|
3254
3382
|
*
|
3255
3383
|
* \returns half
|
3256
|
-
*
|
3384
|
+
* - The square root of \p a.
|
3257
3385
|
* \internal
|
3258
3386
|
* \exception-guarantee no-throw guarantee
|
3259
3387
|
* \behavior reentrant, thread safe
|
@@ -3267,10 +3395,10 @@ __CUDA_FP16_DECL__ __half hsqrt(const __half a);
|
|
3267
3395
|
*
|
3268
3396
|
* \details Calculates \p half reciprocal square root of input \p a in round-to-nearest
|
3269
3397
|
* mode.
|
3270
|
-
* \param[in] a - half. Is only being read.
|
3398
|
+
* \param[in] a - half. Is only being read.
|
3271
3399
|
*
|
3272
3400
|
* \returns half
|
3273
|
-
*
|
3401
|
+
* - The reciprocal square root of \p a.
|
3274
3402
|
* \internal
|
3275
3403
|
* \exception-guarantee no-throw guarantee
|
3276
3404
|
* \behavior reentrant, thread safe
|
@@ -3282,10 +3410,10 @@ __CUDA_FP16_DECL__ __half hrsqrt(const __half a);
|
|
3282
3410
|
* \brief Calculates \p half reciprocal in round-to-nearest-even mode.
|
3283
3411
|
*
|
3284
3412
|
* \details Calculates \p half reciprocal of input \p a in round-to-nearest-even mode.
|
3285
|
-
* \param[in] a - half. Is only being read.
|
3413
|
+
* \param[in] a - half. Is only being read.
|
3286
3414
|
*
|
3287
3415
|
* \returns half
|
3288
|
-
*
|
3416
|
+
* - The reciprocal of \p a.
|
3289
3417
|
* \internal
|
3290
3418
|
* \exception-guarantee no-throw guarantee
|
3291
3419
|
* \behavior reentrant, thread safe
|
@@ -3298,10 +3426,10 @@ __CUDA_FP16_DECL__ __half hrcp(const __half a);
|
|
3298
3426
|
*
|
3299
3427
|
* \details Calculates \p half natural logarithm of input \p a in round-to-nearest-even
|
3300
3428
|
* mode.
|
3301
|
-
* \param[in] a - half. Is only being read.
|
3429
|
+
* \param[in] a - half. Is only being read.
|
3302
3430
|
*
|
3303
3431
|
* \returns half
|
3304
|
-
*
|
3432
|
+
* - The natural logarithm of \p a.
|
3305
3433
|
* \internal
|
3306
3434
|
* \exception-guarantee no-throw guarantee
|
3307
3435
|
* \behavior reentrant, thread safe
|
@@ -3314,10 +3442,10 @@ __CUDA_FP16_DECL__ __half hlog(const __half a);
|
|
3314
3442
|
*
|
3315
3443
|
* \details Calculates \p half binary logarithm of input \p a in round-to-nearest-even
|
3316
3444
|
* mode.
|
3317
|
-
* \param[in] a - half. Is only being read.
|
3445
|
+
* \param[in] a - half. Is only being read.
|
3318
3446
|
*
|
3319
3447
|
* \returns half
|
3320
|
-
*
|
3448
|
+
* - The binary logarithm of \p a.
|
3321
3449
|
* \internal
|
3322
3450
|
* \exception-guarantee no-throw guarantee
|
3323
3451
|
* \behavior reentrant, thread safe
|
@@ -3330,10 +3458,10 @@ __CUDA_FP16_DECL__ __half hlog2(const __half a);
|
|
3330
3458
|
*
|
3331
3459
|
* \details Calculates \p half decimal logarithm of input \p a in round-to-nearest-even
|
3332
3460
|
* mode.
|
3333
|
-
* \param[in] a - half. Is only being read.
|
3461
|
+
* \param[in] a - half. Is only being read.
|
3334
3462
|
*
|
3335
3463
|
* \returns half
|
3336
|
-
*
|
3464
|
+
* - The decimal logarithm of \p a.
|
3337
3465
|
* \internal
|
3338
3466
|
* \exception-guarantee no-throw guarantee
|
3339
3467
|
* \behavior reentrant, thread safe
|
@@ -3347,10 +3475,10 @@ __CUDA_FP16_DECL__ __half hlog10(const __half a);
|
|
3347
3475
|
*
|
3348
3476
|
* \details Calculates \p half natural exponential function of input \p a in
|
3349
3477
|
* round-to-nearest-even mode.
|
3350
|
-
* \param[in] a - half. Is only being read.
|
3478
|
+
* \param[in] a - half. Is only being read.
|
3351
3479
|
*
|
3352
3480
|
* \returns half
|
3353
|
-
*
|
3481
|
+
* - The natural exponential function on \p a.
|
3354
3482
|
* \internal
|
3355
3483
|
* \exception-guarantee no-throw guarantee
|
3356
3484
|
* \behavior reentrant, thread safe
|
@@ -3364,10 +3492,10 @@ __CUDA_FP16_DECL__ __half hexp(const __half a);
|
|
3364
3492
|
*
|
3365
3493
|
* \details Calculates \p half binary exponential function of input \p a in
|
3366
3494
|
* round-to-nearest-even mode.
|
3367
|
-
* \param[in] a - half. Is only being read.
|
3495
|
+
* \param[in] a - half. Is only being read.
|
3368
3496
|
*
|
3369
3497
|
* \returns half
|
3370
|
-
*
|
3498
|
+
* - The binary exponential function on \p a.
|
3371
3499
|
* \internal
|
3372
3500
|
* \exception-guarantee no-throw guarantee
|
3373
3501
|
* \behavior reentrant, thread safe
|
@@ -3381,10 +3509,10 @@ __CUDA_FP16_DECL__ __half hexp2(const __half a);
|
|
3381
3509
|
*
|
3382
3510
|
* \details Calculates \p half decimal exponential function of input \p a in
|
3383
3511
|
* round-to-nearest-even mode.
|
3384
|
-
* \param[in] a - half. Is only being read.
|
3512
|
+
* \param[in] a - half. Is only being read.
|
3385
3513
|
*
|
3386
3514
|
* \returns half
|
3387
|
-
*
|
3515
|
+
* - The decimal exponential function on \p a.
|
3388
3516
|
* \internal
|
3389
3517
|
* \exception-guarantee no-throw guarantee
|
3390
3518
|
* \behavior reentrant, thread safe
|
@@ -3396,10 +3524,10 @@ __CUDA_FP16_DECL__ __half hexp10(const __half a);
|
|
3396
3524
|
* \brief Calculates \p half cosine in round-to-nearest-even mode.
|
3397
3525
|
*
|
3398
3526
|
* \details Calculates \p half cosine of input \p a in round-to-nearest-even mode.
|
3399
|
-
* \param[in] a - half. Is only being read.
|
3527
|
+
* \param[in] a - half. Is only being read.
|
3400
3528
|
*
|
3401
3529
|
* \returns half
|
3402
|
-
*
|
3530
|
+
* - The cosine of \p a.
|
3403
3531
|
* \internal
|
3404
3532
|
* \exception-guarantee no-throw guarantee
|
3405
3533
|
* \behavior reentrant, thread safe
|
@@ -3411,10 +3539,10 @@ __CUDA_FP16_DECL__ __half hcos(const __half a);
|
|
3411
3539
|
* \brief Calculates \p half sine in round-to-nearest-even mode.
|
3412
3540
|
*
|
3413
3541
|
* \details Calculates \p half sine of input \p a in round-to-nearest-even mode.
|
3414
|
-
* \param[in] a - half. Is only being read.
|
3542
|
+
* \param[in] a - half. Is only being read.
|
3415
3543
|
*
|
3416
3544
|
* \returns half
|
3417
|
-
*
|
3545
|
+
* - The sine of \p a.
|
3418
3546
|
* \internal
|
3419
3547
|
* \exception-guarantee no-throw guarantee
|
3420
3548
|
* \behavior reentrant, thread safe
|
@@ -3427,10 +3555,10 @@ __CUDA_FP16_DECL__ __half hsin(const __half a);
|
|
3427
3555
|
*
|
3428
3556
|
* \details Calculates \p half2 square root of input vector \p a in round-to-nearest
|
3429
3557
|
* mode.
|
3430
|
-
* \param[in] a - half2. Is only being read.
|
3558
|
+
* \param[in] a - half2. Is only being read.
|
3431
3559
|
*
|
3432
3560
|
* \returns half2
|
3433
|
-
*
|
3561
|
+
* - The elementwise square root on vector \p a.
|
3434
3562
|
* \internal
|
3435
3563
|
* \exception-guarantee no-throw guarantee
|
3436
3564
|
* \behavior reentrant, thread safe
|
@@ -3444,10 +3572,10 @@ __CUDA_FP16_DECL__ __half2 h2sqrt(const __half2 a);
|
|
3444
3572
|
*
|
3445
3573
|
* \details Calculates \p half2 reciprocal square root of input vector \p a in
|
3446
3574
|
* round-to-nearest-even mode.
|
3447
|
-
* \param[in] a - half2. Is only being read.
|
3575
|
+
* \param[in] a - half2. Is only being read.
|
3448
3576
|
*
|
3449
3577
|
* \returns half2
|
3450
|
-
*
|
3578
|
+
* - The elementwise reciprocal square root on vector \p a.
|
3451
3579
|
* \internal
|
3452
3580
|
* \exception-guarantee no-throw guarantee
|
3453
3581
|
* \behavior reentrant, thread safe
|
@@ -3460,10 +3588,10 @@ __CUDA_FP16_DECL__ __half2 h2rsqrt(const __half2 a);
|
|
3460
3588
|
*
|
3461
3589
|
* \details Calculates \p half2 reciprocal of input vector \p a in round-to-nearest-even
|
3462
3590
|
* mode.
|
3463
|
-
* \param[in] a - half2. Is only being read.
|
3591
|
+
* \param[in] a - half2. Is only being read.
|
3464
3592
|
*
|
3465
3593
|
* \returns half2
|
3466
|
-
*
|
3594
|
+
* - The elementwise reciprocal on vector \p a.
|
3467
3595
|
* \internal
|
3468
3596
|
* \exception-guarantee no-throw guarantee
|
3469
3597
|
* \behavior reentrant, thread safe
|
@@ -3477,10 +3605,10 @@ __CUDA_FP16_DECL__ __half2 h2rcp(const __half2 a);
|
|
3477
3605
|
*
|
3478
3606
|
* \details Calculates \p half2 natural logarithm of input vector \p a in
|
3479
3607
|
* round-to-nearest-even mode.
|
3480
|
-
* \param[in] a - half2. Is only being read.
|
3608
|
+
* \param[in] a - half2. Is only being read.
|
3481
3609
|
*
|
3482
3610
|
* \returns half2
|
3483
|
-
*
|
3611
|
+
* - The elementwise natural logarithm on vector \p a.
|
3484
3612
|
* \internal
|
3485
3613
|
* \exception-guarantee no-throw guarantee
|
3486
3614
|
* \behavior reentrant, thread safe
|
@@ -3494,10 +3622,10 @@ __CUDA_FP16_DECL__ __half2 h2log(const __half2 a);
|
|
3494
3622
|
*
|
3495
3623
|
* \details Calculates \p half2 binary logarithm of input vector \p a in round-to-nearest
|
3496
3624
|
* mode.
|
3497
|
-
* \param[in] a - half2. Is only being read.
|
3625
|
+
* \param[in] a - half2. Is only being read.
|
3498
3626
|
*
|
3499
3627
|
* \returns half2
|
3500
|
-
*
|
3628
|
+
* - The elementwise binary logarithm on vector \p a.
|
3501
3629
|
* \internal
|
3502
3630
|
* \exception-guarantee no-throw guarantee
|
3503
3631
|
* \behavior reentrant, thread safe
|
@@ -3511,10 +3639,10 @@ __CUDA_FP16_DECL__ __half2 h2log2(const __half2 a);
|
|
3511
3639
|
*
|
3512
3640
|
* \details Calculates \p half2 decimal logarithm of input vector \p a in
|
3513
3641
|
* round-to-nearest-even mode.
|
3514
|
-
* \param[in] a - half2. Is only being read.
|
3642
|
+
* \param[in] a - half2. Is only being read.
|
3515
3643
|
*
|
3516
3644
|
* \returns half2
|
3517
|
-
*
|
3645
|
+
* - The elementwise decimal logarithm on vector \p a.
|
3518
3646
|
* \internal
|
3519
3647
|
* \exception-guarantee no-throw guarantee
|
3520
3648
|
* \behavior reentrant, thread safe
|
@@ -3528,10 +3656,10 @@ __CUDA_FP16_DECL__ __half2 h2log10(const __half2 a);
|
|
3528
3656
|
*
|
3529
3657
|
* \details Calculates \p half2 exponential function of input vector \p a in
|
3530
3658
|
* round-to-nearest-even mode.
|
3531
|
-
* \param[in] a - half2. Is only being read.
|
3659
|
+
* \param[in] a - half2. Is only being read.
|
3532
3660
|
*
|
3533
3661
|
* \returns half2
|
3534
|
-
*
|
3662
|
+
* - The elementwise exponential function on vector \p a.
|
3535
3663
|
* \internal
|
3536
3664
|
* \exception-guarantee no-throw guarantee
|
3537
3665
|
* \behavior reentrant, thread safe
|
@@ -3545,10 +3673,10 @@ __CUDA_FP16_DECL__ __half2 h2exp(const __half2 a);
|
|
3545
3673
|
*
|
3546
3674
|
* \details Calculates \p half2 binary exponential function of input vector \p a in
|
3547
3675
|
* round-to-nearest-even mode.
|
3548
|
-
* \param[in] a - half2. Is only being read.
|
3676
|
+
* \param[in] a - half2. Is only being read.
|
3549
3677
|
*
|
3550
3678
|
* \returns half2
|
3551
|
-
*
|
3679
|
+
* - The elementwise binary exponential function on vector \p a.
|
3552
3680
|
* \internal
|
3553
3681
|
* \exception-guarantee no-throw guarantee
|
3554
3682
|
* \behavior reentrant, thread safe
|
@@ -3559,13 +3687,13 @@ __CUDA_FP16_DECL__ __half2 h2exp2(const __half2 a);
|
|
3559
3687
|
* \ingroup CUDA_MATH__HALF2_FUNCTIONS
|
3560
3688
|
* \brief Calculates \p half2 vector decimal exponential function in
|
3561
3689
|
* round-to-nearest-even mode.
|
3562
|
-
*
|
3690
|
+
*
|
3563
3691
|
* \details Calculates \p half2 decimal exponential function of input vector \p a in
|
3564
3692
|
* round-to-nearest-even mode.
|
3565
|
-
* \param[in] a - half2. Is only being read.
|
3566
|
-
*
|
3693
|
+
* \param[in] a - half2. Is only being read.
|
3694
|
+
*
|
3567
3695
|
* \returns half2
|
3568
|
-
*
|
3696
|
+
* - The elementwise decimal exponential function on vector \p a.
|
3569
3697
|
* \internal
|
3570
3698
|
* \exception-guarantee no-throw guarantee
|
3571
3699
|
* \behavior reentrant, thread safe
|
@@ -3575,13 +3703,13 @@ __CUDA_FP16_DECL__ __half2 h2exp10(const __half2 a);
|
|
3575
3703
|
/**
|
3576
3704
|
* \ingroup CUDA_MATH__HALF2_FUNCTIONS
|
3577
3705
|
* \brief Calculates \p half2 vector cosine in round-to-nearest-even mode.
|
3578
|
-
*
|
3706
|
+
*
|
3579
3707
|
* \details Calculates \p half2 cosine of input vector \p a in round-to-nearest-even
|
3580
3708
|
* mode.
|
3581
|
-
* \param[in] a - half2. Is only being read.
|
3582
|
-
*
|
3709
|
+
* \param[in] a - half2. Is only being read.
|
3710
|
+
*
|
3583
3711
|
* \returns half2
|
3584
|
-
*
|
3712
|
+
* - The elementwise cosine on vector \p a.
|
3585
3713
|
* \internal
|
3586
3714
|
* \exception-guarantee no-throw guarantee
|
3587
3715
|
* \behavior reentrant, thread safe
|
@@ -3591,12 +3719,12 @@ __CUDA_FP16_DECL__ __half2 h2cos(const __half2 a);
|
|
3591
3719
|
/**
|
3592
3720
|
* \ingroup CUDA_MATH__HALF2_FUNCTIONS
|
3593
3721
|
* \brief Calculates \p half2 vector sine in round-to-nearest-even mode.
|
3594
|
-
*
|
3722
|
+
*
|
3595
3723
|
* \details Calculates \p half2 sine of input vector \p a in round-to-nearest-even mode.
|
3596
|
-
* \param[in] a - half2. Is only being read.
|
3597
|
-
*
|
3724
|
+
* \param[in] a - half2. Is only being read.
|
3725
|
+
*
|
3598
3726
|
* \returns half2
|
3599
|
-
*
|
3727
|
+
* - The elementwise sine on vector \p a.
|
3600
3728
|
* \internal
|
3601
3729
|
* \exception-guarantee no-throw guarantee
|
3602
3730
|
* \behavior reentrant, thread safe
|
@@ -3604,19 +3732,52 @@ __CUDA_FP16_DECL__ __half2 h2cos(const __half2 a);
|
|
3604
3732
|
*/
|
3605
3733
|
__CUDA_FP16_DECL__ __half2 h2sin(const __half2 a);
|
3606
3734
|
|
3607
|
-
#endif /*if __CUDA_ARCH__
|
3735
|
+
#endif /*if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)*/
|
3608
3736
|
|
3609
|
-
#if __CUDA_ARCH__
|
3737
|
+
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 600)
|
3610
3738
|
|
3739
|
+
/**
|
3740
|
+
* \ingroup CUDA_MATH__HALF2_ARITHMETIC
|
3741
|
+
* \brief Vector add \p val to the value stored at \p address in global or shared memory, and writes this
|
3742
|
+
* value back to \p address. The atomicity of the add operation is guaranteed separately for each of the
|
3743
|
+
* two __half elements; the entire __half2 is not guaranteed to be atomic as a single 32-bit access.
|
3744
|
+
*
|
3745
|
+
* \details The location of \p address must be in global or shared memory. This operation has undefined
|
3746
|
+
* behavior otherwise. This operation is only supported by devices of compute capability 6.x and higher.
|
3747
|
+
*
|
3748
|
+
* \param[in] address - half2*. An address in global or shared memory.
|
3749
|
+
* \param[in] val - half2. The value to be added.
|
3750
|
+
*
|
3751
|
+
* \returns half2
|
3752
|
+
* - The old value read from \p address.
|
3753
|
+
*
|
3754
|
+
* \note_ref_guide_atomic
|
3755
|
+
*/
|
3611
3756
|
__CUDA_FP16_DECL__ __half2 atomicAdd(__half2 *const address, const __half2 val);
|
3612
3757
|
|
3613
|
-
#endif /*if __CUDA_ARCH__
|
3758
|
+
#endif /*if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 600)*/
|
3614
3759
|
|
3615
|
-
#if __CUDA_ARCH__
|
3760
|
+
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 700)
|
3616
3761
|
|
3762
|
+
/**
|
3763
|
+
* \ingroup CUDA_MATH__HALF_ARITHMETIC
|
3764
|
+
* \brief Adds \p val to the value stored at \p address in global or shared memory, and writes this value
|
3765
|
+
* back to \p address. This operation is performed in one atomic operation.
|
3766
|
+
*
|
3767
|
+
* \details The location of \p address must be in global or shared memory. This operation has undefined
|
3768
|
+
* behavior otherwise. This operation is only supported by devices of compute capability 7.x and higher.
|
3769
|
+
*
|
3770
|
+
* \param[in] address - half*. An address in global or shared memory.
|
3771
|
+
* \param[in] val - half. The value to be added.
|
3772
|
+
*
|
3773
|
+
* \returns half
|
3774
|
+
* - The old value read from \p address.
|
3775
|
+
*
|
3776
|
+
* \note_ref_guide_atomic
|
3777
|
+
*/
|
3617
3778
|
__CUDA_FP16_DECL__ __half atomicAdd(__half *const address, const __half val);
|
3618
3779
|
|
3619
|
-
#endif /*if __CUDA_ARCH__
|
3780
|
+
#endif /*if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 700)*/
|
3620
3781
|
|
3621
3782
|
#endif /* defined(__CUDACC__) */
|
3622
3783
|
|
@@ -3627,5 +3788,7 @@ __CUDA_FP16_DECL__ __half atomicAdd(__half *const address, const __half val);
|
|
3627
3788
|
|
3628
3789
|
/* Note the .hpp file is included even for host-side compilation, to capture the "half" & "half2" definitions */
|
3629
3790
|
#include "cuda_fp16.hpp"
|
3791
|
+
#undef ___CUDA_FP16_STRINGIFY_INNERMOST
|
3792
|
+
#undef __CUDA_FP16_STRINGIFY
|
3630
3793
|
|
3631
3794
|
#endif /* end of include guard: __CUDA_FP16_H__ */
|