libbin 0.9.0 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c632bd10fef318615e63edccbebd3b161ce7c9e3ccb5a2efec3b43bda3565cde
4
- data.tar.gz: cd8853a5ca548a3221710d6388ecbd54f46cf9e3721410af7a1f581d115c738d
3
+ metadata.gz: 230be2238c11ec4505d14cea8a43181b6b006cd3db0eccc4d40d1fb117912681
4
+ data.tar.gz: c21f1a9ab288be527d14eeea80f248f9332d98d3dc8351dd811536aaa24850f0
5
5
  SHA512:
6
- metadata.gz: bf807641924427087c89d3f3d89fc91c41a91cf2a6729bba0185592752d071fca322c3655b90912c288c7b3ce98a1a46db6d5471e5e4fd403f7a8570980547e9
7
- data.tar.gz: acd847eb8f52ff55c7c1c92fe86fa6bf11cd20845189f6cd13957333b32feeda29bc620d9e8152d835aba2ba9ff45313357f7f75d54a7de126eb7c3d781ab9e9
6
+ metadata.gz: a8fbca741eb24035f80751e3eaf49a3b1b89ac1dbb8f4ae9cec3b58c77ddaf2e8b93c74af781cde5756e07d4a4fd115d07e703a8b5f8ba537da104c82f4a845d
7
+ data.tar.gz: 307b2a33d3d6d3357bc8c59079dfa0bbdb62a9e189894abde7d9b32bcf180c42b5614dcdb4ccdcebdc593c40e32afe59c3ad018db50a3ecff8568c833034778e
data/LICENSE CHANGED
File without changes
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+
3
+ create_makefile("libbin_c")
data/ext/libbin/half.c ADDED
@@ -0,0 +1,719 @@
1
+ // Branch-free implementation of half-precision (16 bit) floating point
2
+ // Copyright 2006 Mike Acton <macton@gmail.com>
3
+ //
4
+ // Permission is hereby granted, free of charge, to any person obtaining a
5
+ // copy of this software and associated documentation files (the "Software"),
6
+ // to deal in the Software without restriction, including without limitation
7
+ // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
+ // and/or sell copies of the Software, and to permit persons to whom the
9
+ // Software is furnished to do so, subject to the following conditions:
10
+ //
11
+ // The above copyright notice and this permission notice shall be included
12
+ // in all copies or substantial portions of the Software.
13
+ //
14
+ // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
+ // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
+ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
+ // THE SOFTWARE
21
+ //
22
+ // Half-precision floating point format
23
+ // ------------------------------------
24
+ //
25
+ // | Field | Last | First | Note
26
+ // |----------|------|-------|----------
27
+ // | Sign | 15 | 15 |
28
+ // | Exponent | 14 | 10 | Bias = 15
29
+ // | Mantissa | 9 | 0 |
30
+ //
31
+ // Compiling
32
+ // ---------
33
+ //
34
+ // Preferred compile flags for GCC:
35
+ // -O3 -fstrict-aliasing -std=c99 -pedantic -Wall -Wstrict-aliasing
36
+ //
37
+ // This file is a C99 source file, intended to be compiled with a C99
38
+ // compliant compiler. However, for the moment it remains combatible
39
+ // with C++98. Therefore if you are using a compiler that poorly implements
40
+ // C standards (e.g. MSVC), it may be compiled as C++. This is not
41
+ // guaranteed for future versions.
42
+ //
43
+
44
+ #include "half.h"
45
+
46
+ // Load immediate
47
+ static inline uint32_t _uint32_li( uint32_t a )
48
+ {
49
+ return (a);
50
+ }
51
+
52
+ // Decrement
53
+ static inline uint32_t _uint32_dec( uint32_t a )
54
+ {
55
+ return (a - 1);
56
+ }
57
+
58
+ // Increment
59
+ static inline uint32_t _uint32_inc( uint32_t a )
60
+ {
61
+ return (a + 1);
62
+ }
63
+
64
+ // Complement
65
+ static inline uint32_t _uint32_not( uint32_t a )
66
+ {
67
+ return (~a);
68
+ }
69
+
70
+ // Negate
71
+ static inline uint32_t _uint32_neg( uint32_t a )
72
+ {
73
+ return (-a);
74
+ }
75
+
76
+ // Extend sign
77
+ static inline uint32_t _uint32_ext( uint32_t a )
78
+ {
79
+ return (((int32_t)a)>>31);
80
+ }
81
+
82
+ // And
83
+ static inline uint32_t _uint32_and( uint32_t a, uint32_t b )
84
+ {
85
+ return (a & b);
86
+ }
87
+
88
+ // Exclusive Or
89
+ static inline uint32_t _uint32_xor( uint32_t a, uint32_t b )
90
+ {
91
+ return (a ^ b);
92
+ }
93
+
94
+ // And with Complement
95
+ static inline uint32_t _uint32_andc( uint32_t a, uint32_t b )
96
+ {
97
+ return (a & ~b);
98
+ }
99
+
100
+ // Or
101
+ static inline uint32_t _uint32_or( uint32_t a, uint32_t b )
102
+ {
103
+ return (a | b);
104
+ }
105
+
106
+ // Shift Right Logical
107
+ static inline uint32_t _uint32_srl( uint32_t a, int sa )
108
+ {
109
+ return (a >> sa);
110
+ }
111
+
112
+ // Shift Left Logical
113
+ static inline uint32_t _uint32_sll( uint32_t a, int sa )
114
+ {
115
+ return (a << sa);
116
+ }
117
+
118
+ // Add
119
+ static inline uint32_t _uint32_add( uint32_t a, uint32_t b )
120
+ {
121
+ return (a + b);
122
+ }
123
+
124
+ // Subtract
125
+ static inline uint32_t _uint32_sub( uint32_t a, uint32_t b )
126
+ {
127
+ return (a - b);
128
+ }
129
+
130
+ // Multiply
131
+ static inline uint32_t _uint32_mul( uint32_t a, uint32_t b )
132
+ {
133
+ return (a * b);
134
+ }
135
+
136
+ // Select on Sign bit
137
+ static inline uint32_t _uint32_sels( uint32_t test, uint32_t a, uint32_t b )
138
+ {
139
+ const uint32_t mask = _uint32_ext( test );
140
+ const uint32_t sel_a = _uint32_and( a, mask );
141
+ const uint32_t sel_b = _uint32_andc( b, mask );
142
+ const uint32_t result = _uint32_or( sel_a, sel_b );
143
+
144
+ return (result);
145
+ }
146
+
147
+ // Select Bits on mask
148
+ static inline uint32_t _uint32_selb( uint32_t mask, uint32_t a, uint32_t b )
149
+ {
150
+ const uint32_t sel_a = _uint32_and( a, mask );
151
+ const uint32_t sel_b = _uint32_andc( b, mask );
152
+ const uint32_t result = _uint32_or( sel_a, sel_b );
153
+
154
+ return (result);
155
+ }
156
+
157
+ // Load Immediate
158
+ static inline uint16_t _uint16_li( uint16_t a )
159
+ {
160
+ return (a);
161
+ }
162
+
163
+ // Extend sign
164
+ static inline uint16_t _uint16_ext( uint16_t a )
165
+ {
166
+ return (((int16_t)a)>>15);
167
+ }
168
+
169
+ // Negate
170
+ static inline uint16_t _uint16_neg( uint16_t a )
171
+ {
172
+ return (-a);
173
+ }
174
+
175
+ // Complement
176
+ static inline uint16_t _uint16_not( uint16_t a )
177
+ {
178
+ return (~a);
179
+ }
180
+
181
+ // Decrement
182
+ static inline uint16_t _uint16_dec( uint16_t a )
183
+ {
184
+ return (a - 1);
185
+ }
186
+
187
+ // Shift Left Logical
188
+ static inline uint16_t _uint16_sll( uint16_t a, int sa )
189
+ {
190
+ return (a << sa);
191
+ }
192
+
193
+ // Shift Right Logical
194
+ static inline uint16_t _uint16_srl( uint16_t a, int sa )
195
+ {
196
+ return (a >> sa);
197
+ }
198
+
199
+ // Add
200
+ static inline uint16_t _uint16_add( uint16_t a, uint16_t b )
201
+ {
202
+ return (a + b);
203
+ }
204
+
205
+ // Subtract
206
+ static inline uint16_t _uint16_sub( uint16_t a, uint16_t b )
207
+ {
208
+ return (a - b);
209
+ }
210
+
211
+ // And
212
+ static inline uint16_t _uint16_and( uint16_t a, uint16_t b )
213
+ {
214
+ return (a & b);
215
+ }
216
+
217
+ // Or
218
+ static inline uint16_t _uint16_or( uint16_t a, uint16_t b )
219
+ {
220
+ return (a | b);
221
+ }
222
+
223
+ // Exclusive Or
224
+ static inline uint16_t _uint16_xor( uint16_t a, uint16_t b )
225
+ {
226
+ return (a ^ b);
227
+ }
228
+
229
+ // And with Complement
230
+ static inline uint16_t _uint16_andc( uint16_t a, uint16_t b )
231
+ {
232
+ return (a & ~b);
233
+ }
234
+
235
+ // And then Shift Right Logical
236
+ static inline uint16_t _uint16_andsrl( uint16_t a, uint16_t b, int sa )
237
+ {
238
+ return ((a & b) >> sa);
239
+ }
240
+
241
+ // Shift Right Logical then Mask
242
+ static inline uint16_t _uint16_srlm( uint16_t a, int sa, uint16_t mask )
243
+ {
244
+ return ((a >> sa) & mask);
245
+ }
246
+
247
+ // Add then Mask
248
+ static inline uint16_t _uint16_addm( uint16_t a, uint16_t b, uint16_t mask )
249
+ {
250
+ return ((a + b) & mask);
251
+ }
252
+
253
+
254
+ // Select on Sign bit
255
+ static inline uint16_t _uint16_sels( uint16_t test, uint16_t a, uint16_t b )
256
+ {
257
+ const uint16_t mask = _uint16_ext( test );
258
+ const uint16_t sel_a = _uint16_and( a, mask );
259
+ const uint16_t sel_b = _uint16_andc( b, mask );
260
+ const uint16_t result = _uint16_or( sel_a, sel_b );
261
+
262
+ return (result);
263
+ }
264
+
265
+ // Count Leading Zeros
266
+ static inline uint32_t _uint32_cntlz( uint32_t x )
267
+ {
268
+ #ifdef __GNUC__
269
+ /* NOTE: __builtin_clz is undefined for x == 0 */
270
+ /* On PowerPC, this will map to insn: cntlzw */
271
+ /* On Pentium, this will map to insn: clz */
272
+ uint32_t is_x_nez_msb = _uint32_neg( x );
273
+ uint32_t nlz = __builtin_clz( x );
274
+ uint32_t result = _uint32_sels( is_x_nez_msb, nlz, 0x00000020 );
275
+ return (result);
276
+ #else
277
+ const uint32_t x0 = _uint32_srl( x, 1 );
278
+ const uint32_t x1 = _uint32_or( x, x0 );
279
+ const uint32_t x2 = _uint32_srl( x1, 2 );
280
+ const uint32_t x3 = _uint32_or( x1, x2 );
281
+ const uint32_t x4 = _uint32_srl( x3, 4 );
282
+ const uint32_t x5 = _uint32_or( x3, x4 );
283
+ const uint32_t x6 = _uint32_srl( x5, 8 );
284
+ const uint32_t x7 = _uint32_or( x5, x6 );
285
+ const uint32_t x8 = _uint32_srl( x7, 16 );
286
+ const uint32_t x9 = _uint32_or( x7, x8 );
287
+ const uint32_t xA = _uint32_not( x9 );
288
+ const uint32_t xB = _uint32_srl( xA, 1 );
289
+ const uint32_t xC = _uint32_and( xB, 0x55555555 );
290
+ const uint32_t xD = _uint32_sub( xA, xC );
291
+ const uint32_t xE = _uint32_and( xD, 0x33333333 );
292
+ const uint32_t xF = _uint32_srl( xD, 2 );
293
+ const uint32_t x10 = _uint32_and( xF, 0x33333333 );
294
+ const uint32_t x11 = _uint32_add( xE, x10 );
295
+ const uint32_t x12 = _uint32_srl( x11, 4 );
296
+ const uint32_t x13 = _uint32_add( x11, x12 );
297
+ const uint32_t x14 = _uint32_and( x13, 0x0f0f0f0f );
298
+ const uint32_t x15 = _uint32_srl( x14, 8 );
299
+ const uint32_t x16 = _uint32_add( x14, x15 );
300
+ const uint32_t x17 = _uint32_srl( x16, 16 );
301
+ const uint32_t x18 = _uint32_add( x16, x17 );
302
+ const uint32_t x19 = _uint32_and( x18, 0x0000003f );
303
+ return ( x19 );
304
+ #endif
305
+ }
306
+
307
+ // Count Leading Zeros
308
+ static inline uint16_t _uint16_cntlz( uint16_t x )
309
+ {
310
+ #ifdef __GNUC__
311
+ uint16_t nlz32 = (uint16_t)_uint32_cntlz( (uint32_t)x );
312
+ uint32_t nlz = _uint32_sub( nlz32, 16 );
313
+ return (nlz);
314
+ #else
315
+ const uint16_t x0 = _uint16_srl( x, 1 );
316
+ const uint16_t x1 = _uint16_or( x, x0 );
317
+ const uint16_t x2 = _uint16_srl( x1, 2 );
318
+ const uint16_t x3 = _uint16_or( x1, x2 );
319
+ const uint16_t x4 = _uint16_srl( x3, 4 );
320
+ const uint16_t x5 = _uint16_or( x3, x4 );
321
+ const uint16_t x6 = _uint16_srl( x5, 8 );
322
+ const uint16_t x7 = _uint16_or( x5, x6 );
323
+ const uint16_t x8 = _uint16_not( x7 );
324
+ const uint16_t x9 = _uint16_srlm( x8, 1, 0x5555 );
325
+ const uint16_t xA = _uint16_sub( x8, x9 );
326
+ const uint16_t xB = _uint16_and( xA, 0x3333 );
327
+ const uint16_t xC = _uint16_srlm( xA, 2, 0x3333 );
328
+ const uint16_t xD = _uint16_add( xB, xC );
329
+ const uint16_t xE = _uint16_srl( xD, 4 );
330
+ const uint16_t xF = _uint16_addm( xD, xE, 0x0f0f );
331
+ const uint16_t x10 = _uint16_srl( xF, 8 );
332
+ const uint16_t x11 = _uint16_addm( xF, x10, 0x001f );
333
+ return ( x11 );
334
+ #endif
335
+ }
336
+
337
+ uint16_t
338
+ half_from_float( uint32_t f )
339
+ {
340
+ const uint32_t one = _uint32_li( 0x00000001 );
341
+ const uint32_t f_s_mask = _uint32_li( 0x80000000 );
342
+ const uint32_t f_e_mask = _uint32_li( 0x7f800000 );
343
+ const uint32_t f_m_mask = _uint32_li( 0x007fffff );
344
+ const uint32_t f_m_hidden_bit = _uint32_li( 0x00800000 );
345
+ const uint32_t f_m_round_bit = _uint32_li( 0x00001000 );
346
+ const uint32_t f_snan_mask = _uint32_li( 0x7fc00000 );
347
+ const uint32_t f_e_pos = _uint32_li( 0x00000017 );
348
+ const uint32_t h_e_pos = _uint32_li( 0x0000000a );
349
+ const uint32_t h_e_mask = _uint32_li( 0x00007c00 );
350
+ const uint32_t h_snan_mask = _uint32_li( 0x00007e00 );
351
+ const uint32_t h_e_mask_value = _uint32_li( 0x0000001f );
352
+ const uint32_t f_h_s_pos_offset = _uint32_li( 0x00000010 );
353
+ const uint32_t f_h_bias_offset = _uint32_li( 0x00000070 );
354
+ const uint32_t f_h_m_pos_offset = _uint32_li( 0x0000000d );
355
+ const uint32_t h_nan_min = _uint32_li( 0x00007c01 );
356
+ const uint32_t f_h_e_biased_flag = _uint32_li( 0x0000008f );
357
+ const uint32_t f_s = _uint32_and( f, f_s_mask );
358
+ const uint32_t f_e = _uint32_and( f, f_e_mask );
359
+ const uint16_t h_s = _uint32_srl( f_s, f_h_s_pos_offset );
360
+ const uint32_t f_m = _uint32_and( f, f_m_mask );
361
+ const uint16_t f_e_amount = _uint32_srl( f_e, f_e_pos );
362
+ const uint32_t f_e_half_bias = _uint32_sub( f_e_amount, f_h_bias_offset );
363
+ const uint32_t f_snan = _uint32_and( f, f_snan_mask );
364
+ const uint32_t f_m_round_mask = _uint32_and( f_m, f_m_round_bit );
365
+ const uint32_t f_m_round_offset = _uint32_sll( f_m_round_mask, one );
366
+ const uint32_t f_m_rounded = _uint32_add( f_m, f_m_round_offset );
367
+ const uint32_t f_m_denorm_sa = _uint32_sub( one, f_e_half_bias );
368
+ const uint32_t f_m_with_hidden = _uint32_or( f_m_rounded, f_m_hidden_bit );
369
+ const uint32_t f_m_denorm = _uint32_srl( f_m_with_hidden, f_m_denorm_sa );
370
+ const uint32_t h_m_denorm = _uint32_srl( f_m_denorm, f_h_m_pos_offset );
371
+ const uint32_t f_m_rounded_overflow = _uint32_and( f_m_rounded, f_m_hidden_bit );
372
+ const uint32_t m_nan = _uint32_srl( f_m, f_h_m_pos_offset );
373
+ const uint32_t h_em_nan = _uint32_or( h_e_mask, m_nan );
374
+ const uint32_t h_e_norm_overflow_offset = _uint32_inc( f_e_half_bias );
375
+ const uint32_t h_e_norm_overflow = _uint32_sll( h_e_norm_overflow_offset, h_e_pos );
376
+ const uint32_t h_e_norm = _uint32_sll( f_e_half_bias, h_e_pos );
377
+ const uint32_t h_m_norm = _uint32_srl( f_m_rounded, f_h_m_pos_offset );
378
+ const uint32_t h_em_norm = _uint32_or( h_e_norm, h_m_norm );
379
+ const uint32_t is_h_ndenorm_msb = _uint32_sub( f_h_bias_offset, f_e_amount );
380
+ const uint32_t is_f_e_flagged_msb = _uint32_sub( f_h_e_biased_flag, f_e_half_bias );
381
+ const uint32_t is_h_denorm_msb = _uint32_not( is_h_ndenorm_msb );
382
+ const uint32_t is_f_m_eqz_msb = _uint32_dec( f_m );
383
+ const uint32_t is_h_nan_eqz_msb = _uint32_dec( m_nan );
384
+ const uint32_t is_f_inf_msb = _uint32_and( is_f_e_flagged_msb, is_f_m_eqz_msb );
385
+ const uint32_t is_f_nan_underflow_msb = _uint32_and( is_f_e_flagged_msb, is_h_nan_eqz_msb );
386
+ const uint32_t is_e_overflow_msb = _uint32_sub( h_e_mask_value, f_e_half_bias );
387
+ const uint32_t is_h_inf_msb = _uint32_or( is_e_overflow_msb, is_f_inf_msb );
388
+ const uint32_t is_f_nsnan_msb = _uint32_sub( f_snan, f_snan_mask );
389
+ const uint32_t is_m_norm_overflow_msb = _uint32_neg( f_m_rounded_overflow );
390
+ const uint32_t is_f_snan_msb = _uint32_not( is_f_nsnan_msb );
391
+ const uint32_t h_em_overflow_result = _uint32_sels( is_m_norm_overflow_msb, h_e_norm_overflow, h_em_norm );
392
+ const uint32_t h_em_nan_result = _uint32_sels( is_f_e_flagged_msb, h_em_nan, h_em_overflow_result );
393
+ const uint32_t h_em_nan_underflow_result = _uint32_sels( is_f_nan_underflow_msb, h_nan_min, h_em_nan_result );
394
+ const uint32_t h_em_inf_result = _uint32_sels( is_h_inf_msb, h_e_mask, h_em_nan_underflow_result );
395
+ const uint32_t h_em_denorm_result = _uint32_sels( is_h_denorm_msb, h_m_denorm, h_em_inf_result );
396
+ const uint32_t h_em_snan_result = _uint32_sels( is_f_snan_msb, h_snan_mask, h_em_denorm_result );
397
+ const uint32_t h_result = _uint32_or( h_s, h_em_snan_result );
398
+
399
+ return (uint16_t)(h_result);
400
+ }
401
+
402
+ uint32_t
403
+ half_to_float( uint16_t h )
404
+ {
405
+ const uint32_t h_e_mask = _uint32_li( 0x00007c00 );
406
+ const uint32_t h_m_mask = _uint32_li( 0x000003ff );
407
+ const uint32_t h_s_mask = _uint32_li( 0x00008000 );
408
+ const uint32_t h_f_s_pos_offset = _uint32_li( 0x00000010 );
409
+ const uint32_t h_f_e_pos_offset = _uint32_li( 0x0000000d );
410
+ const uint32_t h_f_bias_offset = _uint32_li( 0x0001c000 );
411
+ const uint32_t f_e_mask = _uint32_li( 0x7f800000 );
412
+ const uint32_t f_m_mask = _uint32_li( 0x007fffff );
413
+ const uint32_t h_f_e_denorm_bias = _uint32_li( 0x0000007e );
414
+ const uint32_t h_f_m_denorm_sa_bias = _uint32_li( 0x00000008 );
415
+ const uint32_t f_e_pos = _uint32_li( 0x00000017 );
416
+ const uint32_t h_e_mask_minus_one = _uint32_li( 0x00007bff );
417
+ const uint32_t h_e = _uint32_and( h, h_e_mask );
418
+ const uint32_t h_m = _uint32_and( h, h_m_mask );
419
+ const uint32_t h_s = _uint32_and( h, h_s_mask );
420
+ const uint32_t h_e_f_bias = _uint32_add( h_e, h_f_bias_offset );
421
+ const uint32_t h_m_nlz = _uint32_cntlz( h_m );
422
+ const uint32_t f_s = _uint32_sll( h_s, h_f_s_pos_offset );
423
+ const uint32_t f_e = _uint32_sll( h_e_f_bias, h_f_e_pos_offset );
424
+ const uint32_t f_m = _uint32_sll( h_m, h_f_e_pos_offset );
425
+ const uint32_t f_em = _uint32_or( f_e, f_m );
426
+ const uint32_t h_f_m_sa = _uint32_sub( h_m_nlz, h_f_m_denorm_sa_bias );
427
+ const uint32_t f_e_denorm_unpacked = _uint32_sub( h_f_e_denorm_bias, h_f_m_sa );
428
+ const uint32_t h_f_m = _uint32_sll( h_m, h_f_m_sa );
429
+ const uint32_t f_m_denorm = _uint32_and( h_f_m, f_m_mask );
430
+ const uint32_t f_e_denorm = _uint32_sll( f_e_denorm_unpacked, f_e_pos );
431
+ const uint32_t f_em_denorm = _uint32_or( f_e_denorm, f_m_denorm );
432
+ const uint32_t f_em_nan = _uint32_or( f_e_mask, f_m );
433
+ const uint32_t is_e_eqz_msb = _uint32_dec( h_e );
434
+ const uint32_t is_m_nez_msb = _uint32_neg( h_m );
435
+ const uint32_t is_e_flagged_msb = _uint32_sub( h_e_mask_minus_one, h_e );
436
+ const uint32_t is_zero_msb = _uint32_andc( is_e_eqz_msb, is_m_nez_msb );
437
+ const uint32_t is_inf_msb = _uint32_andc( is_e_flagged_msb, is_m_nez_msb );
438
+ const uint32_t is_denorm_msb = _uint32_and( is_m_nez_msb, is_e_eqz_msb );
439
+ const uint32_t is_nan_msb = _uint32_and( is_e_flagged_msb, is_m_nez_msb );
440
+ const uint32_t is_zero = _uint32_ext( is_zero_msb );
441
+ const uint32_t f_zero_result = _uint32_andc( f_em, is_zero );
442
+ const uint32_t f_denorm_result = _uint32_sels( is_denorm_msb, f_em_denorm, f_zero_result );
443
+ const uint32_t f_inf_result = _uint32_sels( is_inf_msb, f_e_mask, f_denorm_result );
444
+ const uint32_t f_nan_result = _uint32_sels( is_nan_msb, f_em_nan, f_inf_result );
445
+ const uint32_t f_result = _uint32_or( f_s, f_nan_result );
446
+
447
+ return (f_result);
448
+ }
449
+
450
+ // half_add
451
+ // --------
452
+ //
453
+ // (SUM) uint16_t z = half_add( x, y );
454
+ // (DIFFERENCE) uint16_t z = half_add( x, -y );
455
+ //
456
+ // * Difference of ZEROs is always +ZERO
457
+ // * Sum round with guard + round + sticky bit (grs)
458
+ // * QNaN + <x> = QNaN
459
+ // * <x> + +INF = +INF
460
+ // * <x> - -INF = -INF
461
+ // * INF - INF = SNaN
462
+ //
463
+ // Will have exactly (0 ulps difference) the same result as:
464
+ // (Round up)
465
+ //
466
+ // union FLOAT_32
467
+ // {
468
+ // float f32;
469
+ // uint32_t u32;
470
+ // };
471
+ //
472
+ // union FLOAT_32 fx = { .u32 = half_to_float( x ) };
473
+ // union FLOAT_32 fy = { .u32 = half_to_float( y ) };
474
+ // union FLOAT_32 fz = { .f32 = fx.f32 + fy.f32 };
475
+ // uint16_t z = float_to_half( fz );
476
+ //
477
+ uint16_t
478
+ half_add( uint16_t x, uint16_t y )
479
+ {
480
+ const uint16_t one = _uint16_li( 0x0001 );
481
+ const uint16_t msb_to_lsb_sa = _uint16_li( 0x000f );
482
+ const uint16_t h_s_mask = _uint16_li( 0x8000 );
483
+ const uint16_t h_e_mask = _uint16_li( 0x7c00 );
484
+ const uint16_t h_m_mask = _uint16_li( 0x03ff );
485
+ const uint16_t h_m_msb_mask = _uint16_li( 0x2000 );
486
+ const uint16_t h_m_msb_sa = _uint16_li( 0x000d );
487
+ const uint16_t h_m_hidden = _uint16_li( 0x0400 );
488
+ const uint16_t h_e_pos = _uint16_li( 0x000a );
489
+ const uint16_t h_e_bias_minus_one = _uint16_li( 0x000e );
490
+ const uint16_t h_m_grs_carry = _uint16_li( 0x4000 );
491
+ const uint16_t h_m_grs_carry_pos = _uint16_li( 0x000e );
492
+ const uint16_t h_grs_size = _uint16_li( 0x0003 );
493
+ const uint16_t h_snan = _uint16_li( 0xfe00 );
494
+ const uint16_t h_e_mask_minus_one = _uint16_li( 0x7bff );
495
+ const uint16_t h_grs_round_carry = _uint16_sll( one, h_grs_size );
496
+ const uint16_t h_grs_round_mask = _uint16_sub( h_grs_round_carry, one );
497
+ const uint16_t x_e = _uint16_and( x, h_e_mask );
498
+ const uint16_t y_e = _uint16_and( y, h_e_mask );
499
+ const uint16_t is_y_e_larger_msb = _uint16_sub( x_e, y_e );
500
+ const uint16_t a = _uint16_sels( is_y_e_larger_msb, y, x);
501
+ const uint16_t a_s = _uint16_and( a, h_s_mask );
502
+ const uint16_t a_e = _uint16_and( a, h_e_mask );
503
+ const uint16_t a_m_no_hidden_bit = _uint16_and( a, h_m_mask );
504
+ const uint16_t a_em_no_hidden_bit = _uint16_or( a_e, a_m_no_hidden_bit );
505
+ const uint16_t b = _uint16_sels( is_y_e_larger_msb, x, y);
506
+ const uint16_t b_s = _uint16_and( b, h_s_mask );
507
+ const uint16_t b_e = _uint16_and( b, h_e_mask );
508
+ const uint16_t b_m_no_hidden_bit = _uint16_and( b, h_m_mask );
509
+ const uint16_t b_em_no_hidden_bit = _uint16_or( b_e, b_m_no_hidden_bit );
510
+ const uint16_t is_diff_sign_msb = _uint16_xor( a_s, b_s );
511
+ const uint16_t is_a_inf_msb = _uint16_sub( h_e_mask_minus_one, a_em_no_hidden_bit );
512
+ const uint16_t is_b_inf_msb = _uint16_sub( h_e_mask_minus_one, b_em_no_hidden_bit );
513
+ const uint16_t is_undenorm_msb = _uint16_dec( a_e );
514
+ const uint16_t is_undenorm = _uint16_ext( is_undenorm_msb );
515
+ const uint16_t is_both_inf_msb = _uint16_and( is_a_inf_msb, is_b_inf_msb );
516
+ const uint16_t is_invalid_inf_op_msb = _uint16_and( is_both_inf_msb, b_s );
517
+ const uint16_t is_a_e_nez_msb = _uint16_neg( a_e );
518
+ const uint16_t is_b_e_nez_msb = _uint16_neg( b_e );
519
+ const uint16_t is_a_e_nez = _uint16_ext( is_a_e_nez_msb );
520
+ const uint16_t is_b_e_nez = _uint16_ext( is_b_e_nez_msb );
521
+ const uint16_t a_m_hidden_bit = _uint16_and( is_a_e_nez, h_m_hidden );
522
+ const uint16_t b_m_hidden_bit = _uint16_and( is_b_e_nez, h_m_hidden );
523
+ const uint16_t a_m_no_grs = _uint16_or( a_m_no_hidden_bit, a_m_hidden_bit );
524
+ const uint16_t b_m_no_grs = _uint16_or( b_m_no_hidden_bit, b_m_hidden_bit );
525
+ const uint16_t diff_e = _uint16_sub( a_e, b_e );
526
+ const uint16_t a_e_unbias = _uint16_sub( a_e, h_e_bias_minus_one );
527
+ const uint16_t a_m = _uint16_sll( a_m_no_grs, h_grs_size );
528
+ const uint16_t a_e_biased = _uint16_srl( a_e, h_e_pos );
529
+ const uint16_t m_sa_unbias = _uint16_srl( a_e_unbias, h_e_pos );
530
+ const uint16_t m_sa_default = _uint16_srl( diff_e, h_e_pos );
531
+ const uint16_t m_sa_unbias_mask = _uint16_andc( is_a_e_nez_msb, is_b_e_nez_msb );
532
+ const uint16_t m_sa = _uint16_sels( m_sa_unbias_mask, m_sa_unbias, m_sa_default );
533
+ const uint16_t b_m_no_sticky = _uint16_sll( b_m_no_grs, h_grs_size );
534
+ const uint16_t sh_m = _uint16_srl( b_m_no_sticky, m_sa );
535
+ const uint16_t sticky_overflow = _uint16_sll( one, m_sa );
536
+ const uint16_t sticky_mask = _uint16_dec( sticky_overflow );
537
+ const uint16_t sticky_collect = _uint16_and( b_m_no_sticky, sticky_mask );
538
+ const uint16_t is_sticky_set_msb = _uint16_neg( sticky_collect );
539
+ const uint16_t sticky = _uint16_srl( is_sticky_set_msb, msb_to_lsb_sa);
540
+ const uint16_t b_m = _uint16_or( sh_m, sticky );
541
+ const uint16_t is_c_m_ab_pos_msb = _uint16_sub( b_m, a_m );
542
+ const uint16_t c_inf = _uint16_or( a_s, h_e_mask );
543
+ const uint16_t c_m_sum = _uint16_add( a_m, b_m );
544
+ const uint16_t c_m_diff_ab = _uint16_sub( a_m, b_m );
545
+ const uint16_t c_m_diff_ba = _uint16_sub( b_m, a_m );
546
+ const uint16_t c_m_smag_diff = _uint16_sels( is_c_m_ab_pos_msb, c_m_diff_ab, c_m_diff_ba );
547
+ const uint16_t c_s_diff = _uint16_sels( is_c_m_ab_pos_msb, a_s, b_s );
548
+ const uint16_t c_s = _uint16_sels( is_diff_sign_msb, c_s_diff, a_s );
549
+ const uint16_t c_m_smag_diff_nlz = _uint16_cntlz( c_m_smag_diff );
550
+ const uint16_t diff_norm_sa = _uint16_sub( c_m_smag_diff_nlz, one );
551
+ const uint16_t is_diff_denorm_msb = _uint16_sub( a_e_biased, diff_norm_sa );
552
+ const uint16_t is_diff_denorm = _uint16_ext( is_diff_denorm_msb );
553
+ const uint16_t is_a_or_b_norm_msb = _uint16_neg( a_e_biased );
554
+ const uint16_t diff_denorm_sa = _uint16_dec( a_e_biased );
555
+ const uint16_t c_m_diff_denorm = _uint16_sll( c_m_smag_diff, diff_denorm_sa );
556
+ const uint16_t c_m_diff_norm = _uint16_sll( c_m_smag_diff, diff_norm_sa );
557
+ const uint16_t c_e_diff_norm = _uint16_sub( a_e_biased, diff_norm_sa );
558
+ const uint16_t c_m_diff_ab_norm = _uint16_sels( is_diff_denorm_msb, c_m_diff_denorm, c_m_diff_norm );
559
+ const uint16_t c_e_diff_ab_norm = _uint16_andc( c_e_diff_norm, is_diff_denorm );
560
+ const uint16_t c_m_diff = _uint16_sels( is_a_or_b_norm_msb, c_m_diff_ab_norm, c_m_smag_diff );
561
+ const uint16_t c_e_diff = _uint16_sels( is_a_or_b_norm_msb, c_e_diff_ab_norm, a_e_biased );
562
+ const uint16_t is_diff_eqz_msb = _uint16_dec( c_m_diff );
563
+ const uint16_t is_diff_exactly_zero_msb = _uint16_and( is_diff_sign_msb, is_diff_eqz_msb );
564
+ const uint16_t is_diff_exactly_zero = _uint16_ext( is_diff_exactly_zero_msb );
565
+ const uint16_t c_m_added = _uint16_sels( is_diff_sign_msb, c_m_diff, c_m_sum );
566
+ const uint16_t c_e_added = _uint16_sels( is_diff_sign_msb, c_e_diff, a_e_biased );
567
+ const uint16_t c_m_carry = _uint16_and( c_m_added, h_m_grs_carry );
568
+ const uint16_t is_c_m_carry_msb = _uint16_neg( c_m_carry );
569
+ const uint16_t c_e_hidden_offset = _uint16_andsrl( c_m_added, h_m_grs_carry, h_m_grs_carry_pos );
570
+ const uint16_t c_m_sub_hidden = _uint16_srl( c_m_added, one );
571
+ const uint16_t c_m_no_hidden = _uint16_sels( is_c_m_carry_msb, c_m_sub_hidden, c_m_added );
572
+ const uint16_t c_e_no_hidden = _uint16_add( c_e_added, c_e_hidden_offset );
573
+ const uint16_t c_m_no_hidden_msb = _uint16_and( c_m_no_hidden, h_m_msb_mask );
574
+ const uint16_t undenorm_m_msb_odd = _uint16_srl( c_m_no_hidden_msb, h_m_msb_sa );
575
+ const uint16_t undenorm_fix_e = _uint16_and( is_undenorm, undenorm_m_msb_odd );
576
+ const uint16_t c_e_fixed = _uint16_add( c_e_no_hidden, undenorm_fix_e );
577
+ const uint16_t c_m_round_amount = _uint16_and( c_m_no_hidden, h_grs_round_mask );
578
+ const uint16_t c_m_rounded = _uint16_add( c_m_no_hidden, c_m_round_amount );
579
+ const uint16_t c_m_round_overflow = _uint16_andsrl( c_m_rounded, h_m_grs_carry, h_m_grs_carry_pos );
580
+ const uint16_t c_e_rounded = _uint16_add( c_e_fixed, c_m_round_overflow );
581
+ const uint16_t c_m_no_grs = _uint16_srlm( c_m_rounded, h_grs_size, h_m_mask );
582
+ const uint16_t c_e = _uint16_sll( c_e_rounded, h_e_pos );
583
+ const uint16_t c_em = _uint16_or( c_e, c_m_no_grs );
584
+ const uint16_t c_normal = _uint16_or( c_s, c_em );
585
+ const uint16_t c_inf_result = _uint16_sels( is_a_inf_msb, c_inf, c_normal );
586
+ const uint16_t c_zero_result = _uint16_andc( c_inf_result, is_diff_exactly_zero );
587
+ const uint16_t c_result = _uint16_sels( is_invalid_inf_op_msb, h_snan, c_zero_result );
588
+
589
+ return (c_result);
590
+ }
591
+
592
+ // half_mul
593
+ // --------
594
+ //
595
+ // May have 0 or 1 ulp difference from the following result:
596
+ // (Round to nearest)
597
+ // NOTE: Rounding mode differs between conversion and multiply
598
+ //
599
+ // union FLOAT_32
600
+ // {
601
+ // float f32;
602
+ // uint32_t u32;
603
+ // };
604
+ //
605
+ // union FLOAT_32 fx = { .u32 = half_to_float( x ) };
606
+ // union FLOAT_32 fy = { .u32 = half_to_float( y ) };
607
+ // union FLOAT_32 fz = { .f32 = fx.f32 * fy.f32 };
608
+ // uint16_t z = float_to_half( fz );
609
+ //
610
+ uint16_t
611
+ half_mul( uint16_t x, uint16_t y )
612
+ {
613
+ const uint32_t one = _uint32_li( 0x00000001 );
614
+ const uint32_t h_s_mask = _uint32_li( 0x00008000 );
615
+ const uint32_t h_e_mask = _uint32_li( 0x00007c00 );
616
+ const uint32_t h_m_mask = _uint32_li( 0x000003ff );
617
+ const uint32_t h_m_hidden = _uint32_li( 0x00000400 );
618
+ const uint32_t h_e_pos = _uint32_li( 0x0000000a );
619
+ const uint32_t h_e_bias = _uint32_li( 0x0000000f );
620
+ const uint32_t h_m_bit_count = _uint32_li( 0x0000000a );
621
+ const uint32_t h_m_bit_half_count = _uint32_li( 0x00000005 );
622
+ const uint32_t h_nan_min = _uint32_li( 0x00007c01 );
623
+ const uint32_t h_e_mask_minus_one = _uint32_li( 0x00007bff );
624
+ const uint32_t h_snan = _uint32_li( 0x0000fe00 );
625
+ const uint32_t m_round_overflow_bit = _uint32_li( 0x00000020 );
626
+ const uint32_t m_hidden_bit = _uint32_li( 0x00100000 );
627
+ const uint32_t a_s = _uint32_and( x, h_s_mask );
628
+ const uint32_t b_s = _uint32_and( y, h_s_mask );
629
+ const uint32_t c_s = _uint32_xor( a_s, b_s );
630
+ const uint32_t x_e = _uint32_and( x, h_e_mask );
631
+ const uint32_t x_e_eqz_msb = _uint32_dec( x_e );
632
+ const uint32_t a = _uint32_sels( x_e_eqz_msb, y, x );
633
+ const uint32_t b = _uint32_sels( x_e_eqz_msb, x, y );
634
+ const uint32_t a_e = _uint32_and( a, h_e_mask );
635
+ const uint32_t b_e = _uint32_and( b, h_e_mask );
636
+ const uint32_t a_m = _uint32_and( a, h_m_mask );
637
+ const uint32_t b_m = _uint32_and( b, h_m_mask );
638
+ const uint32_t a_e_amount = _uint32_srl( a_e, h_e_pos );
639
+ const uint32_t b_e_amount = _uint32_srl( b_e, h_e_pos );
640
+ const uint32_t a_m_with_hidden = _uint32_or( a_m, h_m_hidden );
641
+ const uint32_t b_m_with_hidden = _uint32_or( b_m, h_m_hidden );
642
+ const uint32_t c_m_normal = _uint32_mul( a_m_with_hidden, b_m_with_hidden );
643
+ const uint32_t c_m_denorm_biased = _uint32_mul( a_m_with_hidden, b_m );
644
+ const uint32_t c_e_denorm_unbias_e = _uint32_sub( h_e_bias, a_e_amount );
645
+ const uint32_t c_m_denorm_round_amount = _uint32_and( c_m_denorm_biased, h_m_mask );
646
+ const uint32_t c_m_denorm_rounded = _uint32_add( c_m_denorm_biased, c_m_denorm_round_amount );
647
+ const uint32_t c_m_denorm_inplace = _uint32_srl( c_m_denorm_rounded, h_m_bit_count );
648
+ const uint32_t c_m_denorm_unbiased = _uint32_srl( c_m_denorm_inplace, c_e_denorm_unbias_e );
649
+ const uint32_t c_m_denorm = _uint32_and( c_m_denorm_unbiased, h_m_mask );
650
+ const uint32_t c_e_amount_biased = _uint32_add( a_e_amount, b_e_amount );
651
+ const uint32_t c_e_amount_unbiased = _uint32_sub( c_e_amount_biased, h_e_bias );
652
+ const uint32_t is_c_e_unbiased_underflow = _uint32_ext( c_e_amount_unbiased );
653
+ const uint32_t c_e_underflow_half_sa = _uint32_neg( c_e_amount_unbiased );
654
+ const uint32_t c_e_underflow_sa = _uint32_sll( c_e_underflow_half_sa, one );
655
+ const uint32_t c_m_underflow = _uint32_srl( c_m_normal, c_e_underflow_sa );
656
+ const uint32_t c_e_underflow_added = _uint32_andc( c_e_amount_unbiased, is_c_e_unbiased_underflow );
657
+ const uint32_t c_m_underflow_added = _uint32_selb( is_c_e_unbiased_underflow, c_m_underflow, c_m_normal );
658
+ const uint32_t is_mul_overflow_test = _uint32_and( c_e_underflow_added, m_round_overflow_bit );
659
+ const uint32_t is_mul_overflow_msb = _uint32_neg( is_mul_overflow_test );
660
+ const uint32_t c_e_norm_radix_corrected = _uint32_inc( c_e_underflow_added );
661
+ const uint32_t c_m_norm_radix_corrected = _uint32_srl( c_m_underflow_added, one );
662
+ const uint32_t c_m_norm_hidden_bit = _uint32_and( c_m_norm_radix_corrected, m_hidden_bit );
663
+ const uint32_t is_c_m_norm_no_hidden_msb = _uint32_dec( c_m_norm_hidden_bit );
664
+ const uint32_t c_m_norm_lo = _uint32_srl( c_m_norm_radix_corrected, h_m_bit_half_count );
665
+ const uint32_t c_m_norm_lo_nlz = _uint16_cntlz( c_m_norm_lo );
666
+ const uint32_t is_c_m_hidden_nunderflow_msb = _uint32_sub( c_m_norm_lo_nlz, c_e_norm_radix_corrected );
667
+ const uint32_t is_c_m_hidden_underflow_msb = _uint32_not( is_c_m_hidden_nunderflow_msb );
668
+ const uint32_t is_c_m_hidden_underflow = _uint32_ext( is_c_m_hidden_underflow_msb );
669
+ const uint32_t c_m_hidden_underflow_normalized_sa = _uint32_srl( c_m_norm_lo_nlz, one );
670
+ const uint32_t c_m_hidden_underflow_normalized = _uint32_sll( c_m_norm_radix_corrected, c_m_hidden_underflow_normalized_sa );
671
+ const uint32_t c_m_hidden_normalized = _uint32_sll( c_m_norm_radix_corrected, c_m_norm_lo_nlz );
672
+ const uint32_t c_e_hidden_normalized = _uint32_sub( c_e_norm_radix_corrected, c_m_norm_lo_nlz );
673
+ const uint32_t c_e_hidden = _uint32_andc( c_e_hidden_normalized, is_c_m_hidden_underflow );
674
+ const uint32_t c_m_hidden = _uint32_sels( is_c_m_hidden_underflow_msb, c_m_hidden_underflow_normalized, c_m_hidden_normalized );
675
+ const uint32_t c_m_normalized = _uint32_sels( is_c_m_norm_no_hidden_msb, c_m_hidden, c_m_norm_radix_corrected );
676
+ const uint32_t c_e_normalized = _uint32_sels( is_c_m_norm_no_hidden_msb, c_e_hidden, c_e_norm_radix_corrected );
677
+ const uint32_t c_m_norm_round_amount = _uint32_and( c_m_normalized, h_m_mask );
678
+ const uint32_t c_m_norm_rounded = _uint32_add( c_m_normalized, c_m_norm_round_amount );
679
+ const uint32_t is_round_overflow_test = _uint32_and( c_e_normalized, m_round_overflow_bit );
680
+ const uint32_t is_round_overflow_msb = _uint32_neg( is_round_overflow_test );
681
+ const uint32_t c_m_norm_inplace = _uint32_srl( c_m_norm_rounded, h_m_bit_count );
682
+ const uint32_t c_m = _uint32_and( c_m_norm_inplace, h_m_mask );
683
+ const uint32_t c_e_norm_inplace = _uint32_sll( c_e_normalized, h_e_pos );
684
+ const uint32_t c_e = _uint32_and( c_e_norm_inplace, h_e_mask );
685
+ const uint32_t c_em_nan = _uint32_or( h_e_mask, a_m );
686
+ const uint32_t c_nan = _uint32_or( a_s, c_em_nan );
687
+ const uint32_t c_denorm = _uint32_or( c_s, c_m_denorm );
688
+ const uint32_t c_inf = _uint32_or( c_s, h_e_mask );
689
+ const uint32_t c_em_norm = _uint32_or( c_e, c_m );
690
+ const uint32_t is_a_e_flagged_msb = _uint32_sub( h_e_mask_minus_one, a_e );
691
+ const uint32_t is_b_e_flagged_msb = _uint32_sub( h_e_mask_minus_one, b_e );
692
+ const uint32_t is_a_e_eqz_msb = _uint32_dec( a_e );
693
+ const uint32_t is_a_m_eqz_msb = _uint32_dec( a_m );
694
+ const uint32_t is_b_e_eqz_msb = _uint32_dec( b_e );
695
+ const uint32_t is_b_m_eqz_msb = _uint32_dec( b_m );
696
+ const uint32_t is_b_eqz_msb = _uint32_and( is_b_e_eqz_msb, is_b_m_eqz_msb );
697
+ const uint32_t is_a_eqz_msb = _uint32_and( is_a_e_eqz_msb, is_a_m_eqz_msb );
698
+ const uint32_t is_c_nan_via_a_msb = _uint32_andc( is_a_e_flagged_msb, is_b_e_flagged_msb );
699
+ const uint32_t is_c_nan_via_b_msb = _uint32_andc( is_b_e_flagged_msb, is_b_m_eqz_msb );
700
+ const uint32_t is_c_nan_msb = _uint32_or( is_c_nan_via_a_msb, is_c_nan_via_b_msb );
701
+ const uint32_t is_c_denorm_msb = _uint32_andc( is_b_e_eqz_msb, is_a_e_flagged_msb );
702
+ const uint32_t is_a_inf_msb = _uint32_and( is_a_e_flagged_msb, is_a_m_eqz_msb );
703
+ const uint32_t is_c_snan_msb = _uint32_and( is_a_inf_msb, is_b_eqz_msb );
704
+ const uint32_t is_c_nan_min_via_a_msb = _uint32_and( is_a_e_flagged_msb, is_b_eqz_msb );
705
+ const uint32_t is_c_nan_min_via_b_msb = _uint32_and( is_b_e_flagged_msb, is_a_eqz_msb );
706
+ const uint32_t is_c_nan_min_msb = _uint32_or( is_c_nan_min_via_a_msb, is_c_nan_min_via_b_msb );
707
+ const uint32_t is_c_inf_msb = _uint32_or( is_a_e_flagged_msb, is_b_e_flagged_msb );
708
+ const uint32_t is_overflow_msb = _uint32_or( is_round_overflow_msb, is_mul_overflow_msb );
709
+ const uint32_t c_em_overflow_result = _uint32_sels( is_overflow_msb, h_e_mask, c_em_norm );
710
+ const uint32_t c_common_result = _uint32_or( c_s, c_em_overflow_result );
711
+ const uint32_t c_zero_result = _uint32_sels( is_b_eqz_msb, c_s, c_common_result );
712
+ const uint32_t c_nan_result = _uint32_sels( is_c_nan_msb, c_nan, c_zero_result );
713
+ const uint32_t c_nan_min_result = _uint32_sels( is_c_nan_min_msb, h_nan_min, c_nan_result );
714
+ const uint32_t c_inf_result = _uint32_sels( is_c_inf_msb, c_inf, c_nan_min_result );
715
+ const uint32_t c_denorm_result = _uint32_sels( is_c_denorm_msb, c_denorm, c_inf_result);
716
+ const uint32_t c_result = _uint32_sels( is_c_snan_msb, h_snan, c_denorm_result );
717
+
718
+ return (uint16_t)(c_result);
719
+ }