libbin 0.9.0 → 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +0 -0
- data/ext/libbin/extconf.rb +3 -0
- data/ext/libbin/half.c +719 -0
- data/ext/libbin/half.h +18 -0
- data/ext/libbin/libbin_c.c +65 -0
- data/ext/libbin/pghalf.c +449 -0
- data/ext/libbin/pghalf.h +9 -0
- data/lib/libbin.rb +51 -54
- data/lib/libbin/alignment.rb +0 -0
- data/lib/libbin/data_types.rb +63 -35
- data/libbin.gemspec +3 -2
- metadata +10 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 230be2238c11ec4505d14cea8a43181b6b006cd3db0eccc4d40d1fb117912681
|
4
|
+
data.tar.gz: c21f1a9ab288be527d14eeea80f248f9332d98d3dc8351dd811536aaa24850f0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a8fbca741eb24035f80751e3eaf49a3b1b89ac1dbb8f4ae9cec3b58c77ddaf2e8b93c74af781cde5756e07d4a4fd115d07e703a8b5f8ba537da104c82f4a845d
|
7
|
+
data.tar.gz: 307b2a33d3d6d3357bc8c59079dfa0bbdb62a9e189894abde7d9b32bcf180c42b5614dcdb4ccdcebdc593c40e32afe59c3ad018db50a3ecff8568c833034778e
|
data/LICENSE
CHANGED
File without changes
|
data/ext/libbin/half.c
ADDED
@@ -0,0 +1,719 @@
|
|
1
|
+
// Branch-free implementation of half-precision (16 bit) floating point
|
2
|
+
// Copyright 2006 Mike Acton <macton@gmail.com>
|
3
|
+
//
|
4
|
+
// Permission is hereby granted, free of charge, to any person obtaining a
|
5
|
+
// copy of this software and associated documentation files (the "Software"),
|
6
|
+
// to deal in the Software without restriction, including without limitation
|
7
|
+
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
8
|
+
// and/or sell copies of the Software, and to permit persons to whom the
|
9
|
+
// Software is furnished to do so, subject to the following conditions:
|
10
|
+
//
|
11
|
+
// The above copyright notice and this permission notice shall be included
|
12
|
+
// in all copies or substantial portions of the Software.
|
13
|
+
//
|
14
|
+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
15
|
+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
16
|
+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
17
|
+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
18
|
+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
19
|
+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
20
|
+
// THE SOFTWARE
|
21
|
+
//
|
22
|
+
// Half-precision floating point format
|
23
|
+
// ------------------------------------
|
24
|
+
//
|
25
|
+
// | Field | Last | First | Note
|
26
|
+
// |----------|------|-------|----------
|
27
|
+
// | Sign | 15 | 15 |
|
28
|
+
// | Exponent | 14 | 10 | Bias = 15
|
29
|
+
// | Mantissa | 9 | 0 |
|
30
|
+
//
|
31
|
+
// Compiling
|
32
|
+
// ---------
|
33
|
+
//
|
34
|
+
// Preferred compile flags for GCC:
|
35
|
+
// -O3 -fstrict-aliasing -std=c99 -pedantic -Wall -Wstrict-aliasing
|
36
|
+
//
|
37
|
+
// This file is a C99 source file, intended to be compiled with a C99
|
38
|
+
// compliant compiler. However, for the moment it remains combatible
|
39
|
+
// with C++98. Therefore if you are using a compiler that poorly implements
|
40
|
+
// C standards (e.g. MSVC), it may be compiled as C++. This is not
|
41
|
+
// guaranteed for future versions.
|
42
|
+
//
|
43
|
+
|
44
|
+
#include "half.h"
|
45
|
+
|
46
|
+
// Load immediate
|
47
|
+
static inline uint32_t _uint32_li( uint32_t a )
|
48
|
+
{
|
49
|
+
return (a);
|
50
|
+
}
|
51
|
+
|
52
|
+
// Decrement
|
53
|
+
static inline uint32_t _uint32_dec( uint32_t a )
|
54
|
+
{
|
55
|
+
return (a - 1);
|
56
|
+
}
|
57
|
+
|
58
|
+
// Increment
|
59
|
+
static inline uint32_t _uint32_inc( uint32_t a )
|
60
|
+
{
|
61
|
+
return (a + 1);
|
62
|
+
}
|
63
|
+
|
64
|
+
// Complement
|
65
|
+
static inline uint32_t _uint32_not( uint32_t a )
|
66
|
+
{
|
67
|
+
return (~a);
|
68
|
+
}
|
69
|
+
|
70
|
+
// Negate
|
71
|
+
static inline uint32_t _uint32_neg( uint32_t a )
|
72
|
+
{
|
73
|
+
return (-a);
|
74
|
+
}
|
75
|
+
|
76
|
+
// Extend sign
|
77
|
+
static inline uint32_t _uint32_ext( uint32_t a )
|
78
|
+
{
|
79
|
+
return (((int32_t)a)>>31);
|
80
|
+
}
|
81
|
+
|
82
|
+
// And
|
83
|
+
static inline uint32_t _uint32_and( uint32_t a, uint32_t b )
|
84
|
+
{
|
85
|
+
return (a & b);
|
86
|
+
}
|
87
|
+
|
88
|
+
// Exclusive Or
|
89
|
+
static inline uint32_t _uint32_xor( uint32_t a, uint32_t b )
|
90
|
+
{
|
91
|
+
return (a ^ b);
|
92
|
+
}
|
93
|
+
|
94
|
+
// And with Complement
|
95
|
+
static inline uint32_t _uint32_andc( uint32_t a, uint32_t b )
|
96
|
+
{
|
97
|
+
return (a & ~b);
|
98
|
+
}
|
99
|
+
|
100
|
+
// Or
|
101
|
+
static inline uint32_t _uint32_or( uint32_t a, uint32_t b )
|
102
|
+
{
|
103
|
+
return (a | b);
|
104
|
+
}
|
105
|
+
|
106
|
+
// Shift Right Logical
|
107
|
+
static inline uint32_t _uint32_srl( uint32_t a, int sa )
|
108
|
+
{
|
109
|
+
return (a >> sa);
|
110
|
+
}
|
111
|
+
|
112
|
+
// Shift Left Logical
|
113
|
+
static inline uint32_t _uint32_sll( uint32_t a, int sa )
|
114
|
+
{
|
115
|
+
return (a << sa);
|
116
|
+
}
|
117
|
+
|
118
|
+
// Add
|
119
|
+
static inline uint32_t _uint32_add( uint32_t a, uint32_t b )
|
120
|
+
{
|
121
|
+
return (a + b);
|
122
|
+
}
|
123
|
+
|
124
|
+
// Subtract
|
125
|
+
static inline uint32_t _uint32_sub( uint32_t a, uint32_t b )
|
126
|
+
{
|
127
|
+
return (a - b);
|
128
|
+
}
|
129
|
+
|
130
|
+
// Multiply
|
131
|
+
static inline uint32_t _uint32_mul( uint32_t a, uint32_t b )
|
132
|
+
{
|
133
|
+
return (a * b);
|
134
|
+
}
|
135
|
+
|
136
|
+
// Select on Sign bit
|
137
|
+
static inline uint32_t _uint32_sels( uint32_t test, uint32_t a, uint32_t b )
|
138
|
+
{
|
139
|
+
const uint32_t mask = _uint32_ext( test );
|
140
|
+
const uint32_t sel_a = _uint32_and( a, mask );
|
141
|
+
const uint32_t sel_b = _uint32_andc( b, mask );
|
142
|
+
const uint32_t result = _uint32_or( sel_a, sel_b );
|
143
|
+
|
144
|
+
return (result);
|
145
|
+
}
|
146
|
+
|
147
|
+
// Select Bits on mask
|
148
|
+
static inline uint32_t _uint32_selb( uint32_t mask, uint32_t a, uint32_t b )
|
149
|
+
{
|
150
|
+
const uint32_t sel_a = _uint32_and( a, mask );
|
151
|
+
const uint32_t sel_b = _uint32_andc( b, mask );
|
152
|
+
const uint32_t result = _uint32_or( sel_a, sel_b );
|
153
|
+
|
154
|
+
return (result);
|
155
|
+
}
|
156
|
+
|
157
|
+
// Load Immediate
|
158
|
+
static inline uint16_t _uint16_li( uint16_t a )
|
159
|
+
{
|
160
|
+
return (a);
|
161
|
+
}
|
162
|
+
|
163
|
+
// Extend sign
|
164
|
+
static inline uint16_t _uint16_ext( uint16_t a )
|
165
|
+
{
|
166
|
+
return (((int16_t)a)>>15);
|
167
|
+
}
|
168
|
+
|
169
|
+
// Negate
|
170
|
+
static inline uint16_t _uint16_neg( uint16_t a )
|
171
|
+
{
|
172
|
+
return (-a);
|
173
|
+
}
|
174
|
+
|
175
|
+
// Complement
|
176
|
+
static inline uint16_t _uint16_not( uint16_t a )
|
177
|
+
{
|
178
|
+
return (~a);
|
179
|
+
}
|
180
|
+
|
181
|
+
// Decrement
|
182
|
+
static inline uint16_t _uint16_dec( uint16_t a )
|
183
|
+
{
|
184
|
+
return (a - 1);
|
185
|
+
}
|
186
|
+
|
187
|
+
// Shift Left Logical
|
188
|
+
static inline uint16_t _uint16_sll( uint16_t a, int sa )
|
189
|
+
{
|
190
|
+
return (a << sa);
|
191
|
+
}
|
192
|
+
|
193
|
+
// Shift Right Logical
|
194
|
+
static inline uint16_t _uint16_srl( uint16_t a, int sa )
|
195
|
+
{
|
196
|
+
return (a >> sa);
|
197
|
+
}
|
198
|
+
|
199
|
+
// Add
|
200
|
+
static inline uint16_t _uint16_add( uint16_t a, uint16_t b )
|
201
|
+
{
|
202
|
+
return (a + b);
|
203
|
+
}
|
204
|
+
|
205
|
+
// Subtract
|
206
|
+
static inline uint16_t _uint16_sub( uint16_t a, uint16_t b )
|
207
|
+
{
|
208
|
+
return (a - b);
|
209
|
+
}
|
210
|
+
|
211
|
+
// And
|
212
|
+
static inline uint16_t _uint16_and( uint16_t a, uint16_t b )
|
213
|
+
{
|
214
|
+
return (a & b);
|
215
|
+
}
|
216
|
+
|
217
|
+
// Or
|
218
|
+
static inline uint16_t _uint16_or( uint16_t a, uint16_t b )
|
219
|
+
{
|
220
|
+
return (a | b);
|
221
|
+
}
|
222
|
+
|
223
|
+
// Exclusive Or
|
224
|
+
static inline uint16_t _uint16_xor( uint16_t a, uint16_t b )
|
225
|
+
{
|
226
|
+
return (a ^ b);
|
227
|
+
}
|
228
|
+
|
229
|
+
// And with Complement
|
230
|
+
static inline uint16_t _uint16_andc( uint16_t a, uint16_t b )
|
231
|
+
{
|
232
|
+
return (a & ~b);
|
233
|
+
}
|
234
|
+
|
235
|
+
// And then Shift Right Logical
|
236
|
+
static inline uint16_t _uint16_andsrl( uint16_t a, uint16_t b, int sa )
|
237
|
+
{
|
238
|
+
return ((a & b) >> sa);
|
239
|
+
}
|
240
|
+
|
241
|
+
// Shift Right Logical then Mask
|
242
|
+
static inline uint16_t _uint16_srlm( uint16_t a, int sa, uint16_t mask )
|
243
|
+
{
|
244
|
+
return ((a >> sa) & mask);
|
245
|
+
}
|
246
|
+
|
247
|
+
// Add then Mask
|
248
|
+
static inline uint16_t _uint16_addm( uint16_t a, uint16_t b, uint16_t mask )
|
249
|
+
{
|
250
|
+
return ((a + b) & mask);
|
251
|
+
}
|
252
|
+
|
253
|
+
|
254
|
+
// Select on Sign bit
|
255
|
+
static inline uint16_t _uint16_sels( uint16_t test, uint16_t a, uint16_t b )
|
256
|
+
{
|
257
|
+
const uint16_t mask = _uint16_ext( test );
|
258
|
+
const uint16_t sel_a = _uint16_and( a, mask );
|
259
|
+
const uint16_t sel_b = _uint16_andc( b, mask );
|
260
|
+
const uint16_t result = _uint16_or( sel_a, sel_b );
|
261
|
+
|
262
|
+
return (result);
|
263
|
+
}
|
264
|
+
|
265
|
+
// Count Leading Zeros
|
266
|
+
static inline uint32_t _uint32_cntlz( uint32_t x )
|
267
|
+
{
|
268
|
+
#ifdef __GNUC__
|
269
|
+
/* NOTE: __builtin_clz is undefined for x == 0 */
|
270
|
+
/* On PowerPC, this will map to insn: cntlzw */
|
271
|
+
/* On Pentium, this will map to insn: clz */
|
272
|
+
uint32_t is_x_nez_msb = _uint32_neg( x );
|
273
|
+
uint32_t nlz = __builtin_clz( x );
|
274
|
+
uint32_t result = _uint32_sels( is_x_nez_msb, nlz, 0x00000020 );
|
275
|
+
return (result);
|
276
|
+
#else
|
277
|
+
const uint32_t x0 = _uint32_srl( x, 1 );
|
278
|
+
const uint32_t x1 = _uint32_or( x, x0 );
|
279
|
+
const uint32_t x2 = _uint32_srl( x1, 2 );
|
280
|
+
const uint32_t x3 = _uint32_or( x1, x2 );
|
281
|
+
const uint32_t x4 = _uint32_srl( x3, 4 );
|
282
|
+
const uint32_t x5 = _uint32_or( x3, x4 );
|
283
|
+
const uint32_t x6 = _uint32_srl( x5, 8 );
|
284
|
+
const uint32_t x7 = _uint32_or( x5, x6 );
|
285
|
+
const uint32_t x8 = _uint32_srl( x7, 16 );
|
286
|
+
const uint32_t x9 = _uint32_or( x7, x8 );
|
287
|
+
const uint32_t xA = _uint32_not( x9 );
|
288
|
+
const uint32_t xB = _uint32_srl( xA, 1 );
|
289
|
+
const uint32_t xC = _uint32_and( xB, 0x55555555 );
|
290
|
+
const uint32_t xD = _uint32_sub( xA, xC );
|
291
|
+
const uint32_t xE = _uint32_and( xD, 0x33333333 );
|
292
|
+
const uint32_t xF = _uint32_srl( xD, 2 );
|
293
|
+
const uint32_t x10 = _uint32_and( xF, 0x33333333 );
|
294
|
+
const uint32_t x11 = _uint32_add( xE, x10 );
|
295
|
+
const uint32_t x12 = _uint32_srl( x11, 4 );
|
296
|
+
const uint32_t x13 = _uint32_add( x11, x12 );
|
297
|
+
const uint32_t x14 = _uint32_and( x13, 0x0f0f0f0f );
|
298
|
+
const uint32_t x15 = _uint32_srl( x14, 8 );
|
299
|
+
const uint32_t x16 = _uint32_add( x14, x15 );
|
300
|
+
const uint32_t x17 = _uint32_srl( x16, 16 );
|
301
|
+
const uint32_t x18 = _uint32_add( x16, x17 );
|
302
|
+
const uint32_t x19 = _uint32_and( x18, 0x0000003f );
|
303
|
+
return ( x19 );
|
304
|
+
#endif
|
305
|
+
}
|
306
|
+
|
307
|
+
// Count Leading Zeros
|
308
|
+
static inline uint16_t _uint16_cntlz( uint16_t x )
|
309
|
+
{
|
310
|
+
#ifdef __GNUC__
|
311
|
+
uint16_t nlz32 = (uint16_t)_uint32_cntlz( (uint32_t)x );
|
312
|
+
uint32_t nlz = _uint32_sub( nlz32, 16 );
|
313
|
+
return (nlz);
|
314
|
+
#else
|
315
|
+
const uint16_t x0 = _uint16_srl( x, 1 );
|
316
|
+
const uint16_t x1 = _uint16_or( x, x0 );
|
317
|
+
const uint16_t x2 = _uint16_srl( x1, 2 );
|
318
|
+
const uint16_t x3 = _uint16_or( x1, x2 );
|
319
|
+
const uint16_t x4 = _uint16_srl( x3, 4 );
|
320
|
+
const uint16_t x5 = _uint16_or( x3, x4 );
|
321
|
+
const uint16_t x6 = _uint16_srl( x5, 8 );
|
322
|
+
const uint16_t x7 = _uint16_or( x5, x6 );
|
323
|
+
const uint16_t x8 = _uint16_not( x7 );
|
324
|
+
const uint16_t x9 = _uint16_srlm( x8, 1, 0x5555 );
|
325
|
+
const uint16_t xA = _uint16_sub( x8, x9 );
|
326
|
+
const uint16_t xB = _uint16_and( xA, 0x3333 );
|
327
|
+
const uint16_t xC = _uint16_srlm( xA, 2, 0x3333 );
|
328
|
+
const uint16_t xD = _uint16_add( xB, xC );
|
329
|
+
const uint16_t xE = _uint16_srl( xD, 4 );
|
330
|
+
const uint16_t xF = _uint16_addm( xD, xE, 0x0f0f );
|
331
|
+
const uint16_t x10 = _uint16_srl( xF, 8 );
|
332
|
+
const uint16_t x11 = _uint16_addm( xF, x10, 0x001f );
|
333
|
+
return ( x11 );
|
334
|
+
#endif
|
335
|
+
}
|
336
|
+
|
337
|
+
uint16_t
|
338
|
+
half_from_float( uint32_t f )
|
339
|
+
{
|
340
|
+
const uint32_t one = _uint32_li( 0x00000001 );
|
341
|
+
const uint32_t f_s_mask = _uint32_li( 0x80000000 );
|
342
|
+
const uint32_t f_e_mask = _uint32_li( 0x7f800000 );
|
343
|
+
const uint32_t f_m_mask = _uint32_li( 0x007fffff );
|
344
|
+
const uint32_t f_m_hidden_bit = _uint32_li( 0x00800000 );
|
345
|
+
const uint32_t f_m_round_bit = _uint32_li( 0x00001000 );
|
346
|
+
const uint32_t f_snan_mask = _uint32_li( 0x7fc00000 );
|
347
|
+
const uint32_t f_e_pos = _uint32_li( 0x00000017 );
|
348
|
+
const uint32_t h_e_pos = _uint32_li( 0x0000000a );
|
349
|
+
const uint32_t h_e_mask = _uint32_li( 0x00007c00 );
|
350
|
+
const uint32_t h_snan_mask = _uint32_li( 0x00007e00 );
|
351
|
+
const uint32_t h_e_mask_value = _uint32_li( 0x0000001f );
|
352
|
+
const uint32_t f_h_s_pos_offset = _uint32_li( 0x00000010 );
|
353
|
+
const uint32_t f_h_bias_offset = _uint32_li( 0x00000070 );
|
354
|
+
const uint32_t f_h_m_pos_offset = _uint32_li( 0x0000000d );
|
355
|
+
const uint32_t h_nan_min = _uint32_li( 0x00007c01 );
|
356
|
+
const uint32_t f_h_e_biased_flag = _uint32_li( 0x0000008f );
|
357
|
+
const uint32_t f_s = _uint32_and( f, f_s_mask );
|
358
|
+
const uint32_t f_e = _uint32_and( f, f_e_mask );
|
359
|
+
const uint16_t h_s = _uint32_srl( f_s, f_h_s_pos_offset );
|
360
|
+
const uint32_t f_m = _uint32_and( f, f_m_mask );
|
361
|
+
const uint16_t f_e_amount = _uint32_srl( f_e, f_e_pos );
|
362
|
+
const uint32_t f_e_half_bias = _uint32_sub( f_e_amount, f_h_bias_offset );
|
363
|
+
const uint32_t f_snan = _uint32_and( f, f_snan_mask );
|
364
|
+
const uint32_t f_m_round_mask = _uint32_and( f_m, f_m_round_bit );
|
365
|
+
const uint32_t f_m_round_offset = _uint32_sll( f_m_round_mask, one );
|
366
|
+
const uint32_t f_m_rounded = _uint32_add( f_m, f_m_round_offset );
|
367
|
+
const uint32_t f_m_denorm_sa = _uint32_sub( one, f_e_half_bias );
|
368
|
+
const uint32_t f_m_with_hidden = _uint32_or( f_m_rounded, f_m_hidden_bit );
|
369
|
+
const uint32_t f_m_denorm = _uint32_srl( f_m_with_hidden, f_m_denorm_sa );
|
370
|
+
const uint32_t h_m_denorm = _uint32_srl( f_m_denorm, f_h_m_pos_offset );
|
371
|
+
const uint32_t f_m_rounded_overflow = _uint32_and( f_m_rounded, f_m_hidden_bit );
|
372
|
+
const uint32_t m_nan = _uint32_srl( f_m, f_h_m_pos_offset );
|
373
|
+
const uint32_t h_em_nan = _uint32_or( h_e_mask, m_nan );
|
374
|
+
const uint32_t h_e_norm_overflow_offset = _uint32_inc( f_e_half_bias );
|
375
|
+
const uint32_t h_e_norm_overflow = _uint32_sll( h_e_norm_overflow_offset, h_e_pos );
|
376
|
+
const uint32_t h_e_norm = _uint32_sll( f_e_half_bias, h_e_pos );
|
377
|
+
const uint32_t h_m_norm = _uint32_srl( f_m_rounded, f_h_m_pos_offset );
|
378
|
+
const uint32_t h_em_norm = _uint32_or( h_e_norm, h_m_norm );
|
379
|
+
const uint32_t is_h_ndenorm_msb = _uint32_sub( f_h_bias_offset, f_e_amount );
|
380
|
+
const uint32_t is_f_e_flagged_msb = _uint32_sub( f_h_e_biased_flag, f_e_half_bias );
|
381
|
+
const uint32_t is_h_denorm_msb = _uint32_not( is_h_ndenorm_msb );
|
382
|
+
const uint32_t is_f_m_eqz_msb = _uint32_dec( f_m );
|
383
|
+
const uint32_t is_h_nan_eqz_msb = _uint32_dec( m_nan );
|
384
|
+
const uint32_t is_f_inf_msb = _uint32_and( is_f_e_flagged_msb, is_f_m_eqz_msb );
|
385
|
+
const uint32_t is_f_nan_underflow_msb = _uint32_and( is_f_e_flagged_msb, is_h_nan_eqz_msb );
|
386
|
+
const uint32_t is_e_overflow_msb = _uint32_sub( h_e_mask_value, f_e_half_bias );
|
387
|
+
const uint32_t is_h_inf_msb = _uint32_or( is_e_overflow_msb, is_f_inf_msb );
|
388
|
+
const uint32_t is_f_nsnan_msb = _uint32_sub( f_snan, f_snan_mask );
|
389
|
+
const uint32_t is_m_norm_overflow_msb = _uint32_neg( f_m_rounded_overflow );
|
390
|
+
const uint32_t is_f_snan_msb = _uint32_not( is_f_nsnan_msb );
|
391
|
+
const uint32_t h_em_overflow_result = _uint32_sels( is_m_norm_overflow_msb, h_e_norm_overflow, h_em_norm );
|
392
|
+
const uint32_t h_em_nan_result = _uint32_sels( is_f_e_flagged_msb, h_em_nan, h_em_overflow_result );
|
393
|
+
const uint32_t h_em_nan_underflow_result = _uint32_sels( is_f_nan_underflow_msb, h_nan_min, h_em_nan_result );
|
394
|
+
const uint32_t h_em_inf_result = _uint32_sels( is_h_inf_msb, h_e_mask, h_em_nan_underflow_result );
|
395
|
+
const uint32_t h_em_denorm_result = _uint32_sels( is_h_denorm_msb, h_m_denorm, h_em_inf_result );
|
396
|
+
const uint32_t h_em_snan_result = _uint32_sels( is_f_snan_msb, h_snan_mask, h_em_denorm_result );
|
397
|
+
const uint32_t h_result = _uint32_or( h_s, h_em_snan_result );
|
398
|
+
|
399
|
+
return (uint16_t)(h_result);
|
400
|
+
}
|
401
|
+
|
402
|
+
uint32_t
|
403
|
+
half_to_float( uint16_t h )
|
404
|
+
{
|
405
|
+
const uint32_t h_e_mask = _uint32_li( 0x00007c00 );
|
406
|
+
const uint32_t h_m_mask = _uint32_li( 0x000003ff );
|
407
|
+
const uint32_t h_s_mask = _uint32_li( 0x00008000 );
|
408
|
+
const uint32_t h_f_s_pos_offset = _uint32_li( 0x00000010 );
|
409
|
+
const uint32_t h_f_e_pos_offset = _uint32_li( 0x0000000d );
|
410
|
+
const uint32_t h_f_bias_offset = _uint32_li( 0x0001c000 );
|
411
|
+
const uint32_t f_e_mask = _uint32_li( 0x7f800000 );
|
412
|
+
const uint32_t f_m_mask = _uint32_li( 0x007fffff );
|
413
|
+
const uint32_t h_f_e_denorm_bias = _uint32_li( 0x0000007e );
|
414
|
+
const uint32_t h_f_m_denorm_sa_bias = _uint32_li( 0x00000008 );
|
415
|
+
const uint32_t f_e_pos = _uint32_li( 0x00000017 );
|
416
|
+
const uint32_t h_e_mask_minus_one = _uint32_li( 0x00007bff );
|
417
|
+
const uint32_t h_e = _uint32_and( h, h_e_mask );
|
418
|
+
const uint32_t h_m = _uint32_and( h, h_m_mask );
|
419
|
+
const uint32_t h_s = _uint32_and( h, h_s_mask );
|
420
|
+
const uint32_t h_e_f_bias = _uint32_add( h_e, h_f_bias_offset );
|
421
|
+
const uint32_t h_m_nlz = _uint32_cntlz( h_m );
|
422
|
+
const uint32_t f_s = _uint32_sll( h_s, h_f_s_pos_offset );
|
423
|
+
const uint32_t f_e = _uint32_sll( h_e_f_bias, h_f_e_pos_offset );
|
424
|
+
const uint32_t f_m = _uint32_sll( h_m, h_f_e_pos_offset );
|
425
|
+
const uint32_t f_em = _uint32_or( f_e, f_m );
|
426
|
+
const uint32_t h_f_m_sa = _uint32_sub( h_m_nlz, h_f_m_denorm_sa_bias );
|
427
|
+
const uint32_t f_e_denorm_unpacked = _uint32_sub( h_f_e_denorm_bias, h_f_m_sa );
|
428
|
+
const uint32_t h_f_m = _uint32_sll( h_m, h_f_m_sa );
|
429
|
+
const uint32_t f_m_denorm = _uint32_and( h_f_m, f_m_mask );
|
430
|
+
const uint32_t f_e_denorm = _uint32_sll( f_e_denorm_unpacked, f_e_pos );
|
431
|
+
const uint32_t f_em_denorm = _uint32_or( f_e_denorm, f_m_denorm );
|
432
|
+
const uint32_t f_em_nan = _uint32_or( f_e_mask, f_m );
|
433
|
+
const uint32_t is_e_eqz_msb = _uint32_dec( h_e );
|
434
|
+
const uint32_t is_m_nez_msb = _uint32_neg( h_m );
|
435
|
+
const uint32_t is_e_flagged_msb = _uint32_sub( h_e_mask_minus_one, h_e );
|
436
|
+
const uint32_t is_zero_msb = _uint32_andc( is_e_eqz_msb, is_m_nez_msb );
|
437
|
+
const uint32_t is_inf_msb = _uint32_andc( is_e_flagged_msb, is_m_nez_msb );
|
438
|
+
const uint32_t is_denorm_msb = _uint32_and( is_m_nez_msb, is_e_eqz_msb );
|
439
|
+
const uint32_t is_nan_msb = _uint32_and( is_e_flagged_msb, is_m_nez_msb );
|
440
|
+
const uint32_t is_zero = _uint32_ext( is_zero_msb );
|
441
|
+
const uint32_t f_zero_result = _uint32_andc( f_em, is_zero );
|
442
|
+
const uint32_t f_denorm_result = _uint32_sels( is_denorm_msb, f_em_denorm, f_zero_result );
|
443
|
+
const uint32_t f_inf_result = _uint32_sels( is_inf_msb, f_e_mask, f_denorm_result );
|
444
|
+
const uint32_t f_nan_result = _uint32_sels( is_nan_msb, f_em_nan, f_inf_result );
|
445
|
+
const uint32_t f_result = _uint32_or( f_s, f_nan_result );
|
446
|
+
|
447
|
+
return (f_result);
|
448
|
+
}
|
449
|
+
|
450
|
+
// half_add
|
451
|
+
// --------
|
452
|
+
//
|
453
|
+
// (SUM) uint16_t z = half_add( x, y );
|
454
|
+
// (DIFFERENCE) uint16_t z = half_add( x, -y );
|
455
|
+
//
|
456
|
+
// * Difference of ZEROs is always +ZERO
|
457
|
+
// * Sum round with guard + round + sticky bit (grs)
|
458
|
+
// * QNaN + <x> = QNaN
|
459
|
+
// * <x> + +INF = +INF
|
460
|
+
// * <x> - -INF = -INF
|
461
|
+
// * INF - INF = SNaN
|
462
|
+
//
|
463
|
+
// Will have exactly (0 ulps difference) the same result as:
|
464
|
+
// (Round up)
|
465
|
+
//
|
466
|
+
// union FLOAT_32
|
467
|
+
// {
|
468
|
+
// float f32;
|
469
|
+
// uint32_t u32;
|
470
|
+
// };
|
471
|
+
//
|
472
|
+
// union FLOAT_32 fx = { .u32 = half_to_float( x ) };
|
473
|
+
// union FLOAT_32 fy = { .u32 = half_to_float( y ) };
|
474
|
+
// union FLOAT_32 fz = { .f32 = fx.f32 + fy.f32 };
|
475
|
+
// uint16_t z = float_to_half( fz );
|
476
|
+
//
|
477
|
+
uint16_t
|
478
|
+
half_add( uint16_t x, uint16_t y )
|
479
|
+
{
|
480
|
+
const uint16_t one = _uint16_li( 0x0001 );
|
481
|
+
const uint16_t msb_to_lsb_sa = _uint16_li( 0x000f );
|
482
|
+
const uint16_t h_s_mask = _uint16_li( 0x8000 );
|
483
|
+
const uint16_t h_e_mask = _uint16_li( 0x7c00 );
|
484
|
+
const uint16_t h_m_mask = _uint16_li( 0x03ff );
|
485
|
+
const uint16_t h_m_msb_mask = _uint16_li( 0x2000 );
|
486
|
+
const uint16_t h_m_msb_sa = _uint16_li( 0x000d );
|
487
|
+
const uint16_t h_m_hidden = _uint16_li( 0x0400 );
|
488
|
+
const uint16_t h_e_pos = _uint16_li( 0x000a );
|
489
|
+
const uint16_t h_e_bias_minus_one = _uint16_li( 0x000e );
|
490
|
+
const uint16_t h_m_grs_carry = _uint16_li( 0x4000 );
|
491
|
+
const uint16_t h_m_grs_carry_pos = _uint16_li( 0x000e );
|
492
|
+
const uint16_t h_grs_size = _uint16_li( 0x0003 );
|
493
|
+
const uint16_t h_snan = _uint16_li( 0xfe00 );
|
494
|
+
const uint16_t h_e_mask_minus_one = _uint16_li( 0x7bff );
|
495
|
+
const uint16_t h_grs_round_carry = _uint16_sll( one, h_grs_size );
|
496
|
+
const uint16_t h_grs_round_mask = _uint16_sub( h_grs_round_carry, one );
|
497
|
+
const uint16_t x_e = _uint16_and( x, h_e_mask );
|
498
|
+
const uint16_t y_e = _uint16_and( y, h_e_mask );
|
499
|
+
const uint16_t is_y_e_larger_msb = _uint16_sub( x_e, y_e );
|
500
|
+
const uint16_t a = _uint16_sels( is_y_e_larger_msb, y, x);
|
501
|
+
const uint16_t a_s = _uint16_and( a, h_s_mask );
|
502
|
+
const uint16_t a_e = _uint16_and( a, h_e_mask );
|
503
|
+
const uint16_t a_m_no_hidden_bit = _uint16_and( a, h_m_mask );
|
504
|
+
const uint16_t a_em_no_hidden_bit = _uint16_or( a_e, a_m_no_hidden_bit );
|
505
|
+
const uint16_t b = _uint16_sels( is_y_e_larger_msb, x, y);
|
506
|
+
const uint16_t b_s = _uint16_and( b, h_s_mask );
|
507
|
+
const uint16_t b_e = _uint16_and( b, h_e_mask );
|
508
|
+
const uint16_t b_m_no_hidden_bit = _uint16_and( b, h_m_mask );
|
509
|
+
const uint16_t b_em_no_hidden_bit = _uint16_or( b_e, b_m_no_hidden_bit );
|
510
|
+
const uint16_t is_diff_sign_msb = _uint16_xor( a_s, b_s );
|
511
|
+
const uint16_t is_a_inf_msb = _uint16_sub( h_e_mask_minus_one, a_em_no_hidden_bit );
|
512
|
+
const uint16_t is_b_inf_msb = _uint16_sub( h_e_mask_minus_one, b_em_no_hidden_bit );
|
513
|
+
const uint16_t is_undenorm_msb = _uint16_dec( a_e );
|
514
|
+
const uint16_t is_undenorm = _uint16_ext( is_undenorm_msb );
|
515
|
+
const uint16_t is_both_inf_msb = _uint16_and( is_a_inf_msb, is_b_inf_msb );
|
516
|
+
const uint16_t is_invalid_inf_op_msb = _uint16_and( is_both_inf_msb, b_s );
|
517
|
+
const uint16_t is_a_e_nez_msb = _uint16_neg( a_e );
|
518
|
+
const uint16_t is_b_e_nez_msb = _uint16_neg( b_e );
|
519
|
+
const uint16_t is_a_e_nez = _uint16_ext( is_a_e_nez_msb );
|
520
|
+
const uint16_t is_b_e_nez = _uint16_ext( is_b_e_nez_msb );
|
521
|
+
const uint16_t a_m_hidden_bit = _uint16_and( is_a_e_nez, h_m_hidden );
|
522
|
+
const uint16_t b_m_hidden_bit = _uint16_and( is_b_e_nez, h_m_hidden );
|
523
|
+
const uint16_t a_m_no_grs = _uint16_or( a_m_no_hidden_bit, a_m_hidden_bit );
|
524
|
+
const uint16_t b_m_no_grs = _uint16_or( b_m_no_hidden_bit, b_m_hidden_bit );
|
525
|
+
const uint16_t diff_e = _uint16_sub( a_e, b_e );
|
526
|
+
const uint16_t a_e_unbias = _uint16_sub( a_e, h_e_bias_minus_one );
|
527
|
+
const uint16_t a_m = _uint16_sll( a_m_no_grs, h_grs_size );
|
528
|
+
const uint16_t a_e_biased = _uint16_srl( a_e, h_e_pos );
|
529
|
+
const uint16_t m_sa_unbias = _uint16_srl( a_e_unbias, h_e_pos );
|
530
|
+
const uint16_t m_sa_default = _uint16_srl( diff_e, h_e_pos );
|
531
|
+
const uint16_t m_sa_unbias_mask = _uint16_andc( is_a_e_nez_msb, is_b_e_nez_msb );
|
532
|
+
const uint16_t m_sa = _uint16_sels( m_sa_unbias_mask, m_sa_unbias, m_sa_default );
|
533
|
+
const uint16_t b_m_no_sticky = _uint16_sll( b_m_no_grs, h_grs_size );
|
534
|
+
const uint16_t sh_m = _uint16_srl( b_m_no_sticky, m_sa );
|
535
|
+
const uint16_t sticky_overflow = _uint16_sll( one, m_sa );
|
536
|
+
const uint16_t sticky_mask = _uint16_dec( sticky_overflow );
|
537
|
+
const uint16_t sticky_collect = _uint16_and( b_m_no_sticky, sticky_mask );
|
538
|
+
const uint16_t is_sticky_set_msb = _uint16_neg( sticky_collect );
|
539
|
+
const uint16_t sticky = _uint16_srl( is_sticky_set_msb, msb_to_lsb_sa);
|
540
|
+
const uint16_t b_m = _uint16_or( sh_m, sticky );
|
541
|
+
const uint16_t is_c_m_ab_pos_msb = _uint16_sub( b_m, a_m );
|
542
|
+
const uint16_t c_inf = _uint16_or( a_s, h_e_mask );
|
543
|
+
const uint16_t c_m_sum = _uint16_add( a_m, b_m );
|
544
|
+
const uint16_t c_m_diff_ab = _uint16_sub( a_m, b_m );
|
545
|
+
const uint16_t c_m_diff_ba = _uint16_sub( b_m, a_m );
|
546
|
+
const uint16_t c_m_smag_diff = _uint16_sels( is_c_m_ab_pos_msb, c_m_diff_ab, c_m_diff_ba );
|
547
|
+
const uint16_t c_s_diff = _uint16_sels( is_c_m_ab_pos_msb, a_s, b_s );
|
548
|
+
const uint16_t c_s = _uint16_sels( is_diff_sign_msb, c_s_diff, a_s );
|
549
|
+
const uint16_t c_m_smag_diff_nlz = _uint16_cntlz( c_m_smag_diff );
|
550
|
+
const uint16_t diff_norm_sa = _uint16_sub( c_m_smag_diff_nlz, one );
|
551
|
+
const uint16_t is_diff_denorm_msb = _uint16_sub( a_e_biased, diff_norm_sa );
|
552
|
+
const uint16_t is_diff_denorm = _uint16_ext( is_diff_denorm_msb );
|
553
|
+
const uint16_t is_a_or_b_norm_msb = _uint16_neg( a_e_biased );
|
554
|
+
const uint16_t diff_denorm_sa = _uint16_dec( a_e_biased );
|
555
|
+
const uint16_t c_m_diff_denorm = _uint16_sll( c_m_smag_diff, diff_denorm_sa );
|
556
|
+
const uint16_t c_m_diff_norm = _uint16_sll( c_m_smag_diff, diff_norm_sa );
|
557
|
+
const uint16_t c_e_diff_norm = _uint16_sub( a_e_biased, diff_norm_sa );
|
558
|
+
const uint16_t c_m_diff_ab_norm = _uint16_sels( is_diff_denorm_msb, c_m_diff_denorm, c_m_diff_norm );
|
559
|
+
const uint16_t c_e_diff_ab_norm = _uint16_andc( c_e_diff_norm, is_diff_denorm );
|
560
|
+
const uint16_t c_m_diff = _uint16_sels( is_a_or_b_norm_msb, c_m_diff_ab_norm, c_m_smag_diff );
|
561
|
+
const uint16_t c_e_diff = _uint16_sels( is_a_or_b_norm_msb, c_e_diff_ab_norm, a_e_biased );
|
562
|
+
const uint16_t is_diff_eqz_msb = _uint16_dec( c_m_diff );
|
563
|
+
const uint16_t is_diff_exactly_zero_msb = _uint16_and( is_diff_sign_msb, is_diff_eqz_msb );
|
564
|
+
const uint16_t is_diff_exactly_zero = _uint16_ext( is_diff_exactly_zero_msb );
|
565
|
+
const uint16_t c_m_added = _uint16_sels( is_diff_sign_msb, c_m_diff, c_m_sum );
|
566
|
+
const uint16_t c_e_added = _uint16_sels( is_diff_sign_msb, c_e_diff, a_e_biased );
|
567
|
+
const uint16_t c_m_carry = _uint16_and( c_m_added, h_m_grs_carry );
|
568
|
+
const uint16_t is_c_m_carry_msb = _uint16_neg( c_m_carry );
|
569
|
+
const uint16_t c_e_hidden_offset = _uint16_andsrl( c_m_added, h_m_grs_carry, h_m_grs_carry_pos );
|
570
|
+
const uint16_t c_m_sub_hidden = _uint16_srl( c_m_added, one );
|
571
|
+
const uint16_t c_m_no_hidden = _uint16_sels( is_c_m_carry_msb, c_m_sub_hidden, c_m_added );
|
572
|
+
const uint16_t c_e_no_hidden = _uint16_add( c_e_added, c_e_hidden_offset );
|
573
|
+
const uint16_t c_m_no_hidden_msb = _uint16_and( c_m_no_hidden, h_m_msb_mask );
|
574
|
+
const uint16_t undenorm_m_msb_odd = _uint16_srl( c_m_no_hidden_msb, h_m_msb_sa );
|
575
|
+
const uint16_t undenorm_fix_e = _uint16_and( is_undenorm, undenorm_m_msb_odd );
|
576
|
+
const uint16_t c_e_fixed = _uint16_add( c_e_no_hidden, undenorm_fix_e );
|
577
|
+
const uint16_t c_m_round_amount = _uint16_and( c_m_no_hidden, h_grs_round_mask );
|
578
|
+
const uint16_t c_m_rounded = _uint16_add( c_m_no_hidden, c_m_round_amount );
|
579
|
+
const uint16_t c_m_round_overflow = _uint16_andsrl( c_m_rounded, h_m_grs_carry, h_m_grs_carry_pos );
|
580
|
+
const uint16_t c_e_rounded = _uint16_add( c_e_fixed, c_m_round_overflow );
|
581
|
+
const uint16_t c_m_no_grs = _uint16_srlm( c_m_rounded, h_grs_size, h_m_mask );
|
582
|
+
const uint16_t c_e = _uint16_sll( c_e_rounded, h_e_pos );
|
583
|
+
const uint16_t c_em = _uint16_or( c_e, c_m_no_grs );
|
584
|
+
const uint16_t c_normal = _uint16_or( c_s, c_em );
|
585
|
+
const uint16_t c_inf_result = _uint16_sels( is_a_inf_msb, c_inf, c_normal );
|
586
|
+
const uint16_t c_zero_result = _uint16_andc( c_inf_result, is_diff_exactly_zero );
|
587
|
+
const uint16_t c_result = _uint16_sels( is_invalid_inf_op_msb, h_snan, c_zero_result );
|
588
|
+
|
589
|
+
return (c_result);
|
590
|
+
}
|
591
|
+
|
592
|
+
// half_mul
|
593
|
+
// --------
|
594
|
+
//
|
595
|
+
// May have 0 or 1 ulp difference from the following result:
|
596
|
+
// (Round to nearest)
|
597
|
+
// NOTE: Rounding mode differs between conversion and multiply
|
598
|
+
//
|
599
|
+
// union FLOAT_32
|
600
|
+
// {
|
601
|
+
// float f32;
|
602
|
+
// uint32_t u32;
|
603
|
+
// };
|
604
|
+
//
|
605
|
+
// union FLOAT_32 fx = { .u32 = half_to_float( x ) };
|
606
|
+
// union FLOAT_32 fy = { .u32 = half_to_float( y ) };
|
607
|
+
// union FLOAT_32 fz = { .f32 = fx.f32 * fy.f32 };
|
608
|
+
// uint16_t z = float_to_half( fz );
|
609
|
+
//
|
610
|
+
uint16_t
|
611
|
+
half_mul( uint16_t x, uint16_t y )
|
612
|
+
{
|
613
|
+
const uint32_t one = _uint32_li( 0x00000001 );
|
614
|
+
const uint32_t h_s_mask = _uint32_li( 0x00008000 );
|
615
|
+
const uint32_t h_e_mask = _uint32_li( 0x00007c00 );
|
616
|
+
const uint32_t h_m_mask = _uint32_li( 0x000003ff );
|
617
|
+
const uint32_t h_m_hidden = _uint32_li( 0x00000400 );
|
618
|
+
const uint32_t h_e_pos = _uint32_li( 0x0000000a );
|
619
|
+
const uint32_t h_e_bias = _uint32_li( 0x0000000f );
|
620
|
+
const uint32_t h_m_bit_count = _uint32_li( 0x0000000a );
|
621
|
+
const uint32_t h_m_bit_half_count = _uint32_li( 0x00000005 );
|
622
|
+
const uint32_t h_nan_min = _uint32_li( 0x00007c01 );
|
623
|
+
const uint32_t h_e_mask_minus_one = _uint32_li( 0x00007bff );
|
624
|
+
const uint32_t h_snan = _uint32_li( 0x0000fe00 );
|
625
|
+
const uint32_t m_round_overflow_bit = _uint32_li( 0x00000020 );
|
626
|
+
const uint32_t m_hidden_bit = _uint32_li( 0x00100000 );
|
627
|
+
const uint32_t a_s = _uint32_and( x, h_s_mask );
|
628
|
+
const uint32_t b_s = _uint32_and( y, h_s_mask );
|
629
|
+
const uint32_t c_s = _uint32_xor( a_s, b_s );
|
630
|
+
const uint32_t x_e = _uint32_and( x, h_e_mask );
|
631
|
+
const uint32_t x_e_eqz_msb = _uint32_dec( x_e );
|
632
|
+
const uint32_t a = _uint32_sels( x_e_eqz_msb, y, x );
|
633
|
+
const uint32_t b = _uint32_sels( x_e_eqz_msb, x, y );
|
634
|
+
const uint32_t a_e = _uint32_and( a, h_e_mask );
|
635
|
+
const uint32_t b_e = _uint32_and( b, h_e_mask );
|
636
|
+
const uint32_t a_m = _uint32_and( a, h_m_mask );
|
637
|
+
const uint32_t b_m = _uint32_and( b, h_m_mask );
|
638
|
+
const uint32_t a_e_amount = _uint32_srl( a_e, h_e_pos );
|
639
|
+
const uint32_t b_e_amount = _uint32_srl( b_e, h_e_pos );
|
640
|
+
const uint32_t a_m_with_hidden = _uint32_or( a_m, h_m_hidden );
|
641
|
+
const uint32_t b_m_with_hidden = _uint32_or( b_m, h_m_hidden );
|
642
|
+
const uint32_t c_m_normal = _uint32_mul( a_m_with_hidden, b_m_with_hidden );
|
643
|
+
const uint32_t c_m_denorm_biased = _uint32_mul( a_m_with_hidden, b_m );
|
644
|
+
const uint32_t c_e_denorm_unbias_e = _uint32_sub( h_e_bias, a_e_amount );
|
645
|
+
const uint32_t c_m_denorm_round_amount = _uint32_and( c_m_denorm_biased, h_m_mask );
|
646
|
+
const uint32_t c_m_denorm_rounded = _uint32_add( c_m_denorm_biased, c_m_denorm_round_amount );
|
647
|
+
const uint32_t c_m_denorm_inplace = _uint32_srl( c_m_denorm_rounded, h_m_bit_count );
|
648
|
+
const uint32_t c_m_denorm_unbiased = _uint32_srl( c_m_denorm_inplace, c_e_denorm_unbias_e );
|
649
|
+
const uint32_t c_m_denorm = _uint32_and( c_m_denorm_unbiased, h_m_mask );
|
650
|
+
const uint32_t c_e_amount_biased = _uint32_add( a_e_amount, b_e_amount );
|
651
|
+
const uint32_t c_e_amount_unbiased = _uint32_sub( c_e_amount_biased, h_e_bias );
|
652
|
+
const uint32_t is_c_e_unbiased_underflow = _uint32_ext( c_e_amount_unbiased );
|
653
|
+
const uint32_t c_e_underflow_half_sa = _uint32_neg( c_e_amount_unbiased );
|
654
|
+
const uint32_t c_e_underflow_sa = _uint32_sll( c_e_underflow_half_sa, one );
|
655
|
+
const uint32_t c_m_underflow = _uint32_srl( c_m_normal, c_e_underflow_sa );
|
656
|
+
const uint32_t c_e_underflow_added = _uint32_andc( c_e_amount_unbiased, is_c_e_unbiased_underflow );
|
657
|
+
const uint32_t c_m_underflow_added = _uint32_selb( is_c_e_unbiased_underflow, c_m_underflow, c_m_normal );
|
658
|
+
const uint32_t is_mul_overflow_test = _uint32_and( c_e_underflow_added, m_round_overflow_bit );
|
659
|
+
const uint32_t is_mul_overflow_msb = _uint32_neg( is_mul_overflow_test );
|
660
|
+
const uint32_t c_e_norm_radix_corrected = _uint32_inc( c_e_underflow_added );
|
661
|
+
const uint32_t c_m_norm_radix_corrected = _uint32_srl( c_m_underflow_added, one );
|
662
|
+
const uint32_t c_m_norm_hidden_bit = _uint32_and( c_m_norm_radix_corrected, m_hidden_bit );
|
663
|
+
const uint32_t is_c_m_norm_no_hidden_msb = _uint32_dec( c_m_norm_hidden_bit );
|
664
|
+
const uint32_t c_m_norm_lo = _uint32_srl( c_m_norm_radix_corrected, h_m_bit_half_count );
|
665
|
+
const uint32_t c_m_norm_lo_nlz = _uint16_cntlz( c_m_norm_lo );
|
666
|
+
const uint32_t is_c_m_hidden_nunderflow_msb = _uint32_sub( c_m_norm_lo_nlz, c_e_norm_radix_corrected );
|
667
|
+
const uint32_t is_c_m_hidden_underflow_msb = _uint32_not( is_c_m_hidden_nunderflow_msb );
|
668
|
+
const uint32_t is_c_m_hidden_underflow = _uint32_ext( is_c_m_hidden_underflow_msb );
|
669
|
+
const uint32_t c_m_hidden_underflow_normalized_sa = _uint32_srl( c_m_norm_lo_nlz, one );
|
670
|
+
const uint32_t c_m_hidden_underflow_normalized = _uint32_sll( c_m_norm_radix_corrected, c_m_hidden_underflow_normalized_sa );
|
671
|
+
const uint32_t c_m_hidden_normalized = _uint32_sll( c_m_norm_radix_corrected, c_m_norm_lo_nlz );
|
672
|
+
const uint32_t c_e_hidden_normalized = _uint32_sub( c_e_norm_radix_corrected, c_m_norm_lo_nlz );
|
673
|
+
const uint32_t c_e_hidden = _uint32_andc( c_e_hidden_normalized, is_c_m_hidden_underflow );
|
674
|
+
const uint32_t c_m_hidden = _uint32_sels( is_c_m_hidden_underflow_msb, c_m_hidden_underflow_normalized, c_m_hidden_normalized );
|
675
|
+
const uint32_t c_m_normalized = _uint32_sels( is_c_m_norm_no_hidden_msb, c_m_hidden, c_m_norm_radix_corrected );
|
676
|
+
const uint32_t c_e_normalized = _uint32_sels( is_c_m_norm_no_hidden_msb, c_e_hidden, c_e_norm_radix_corrected );
|
677
|
+
const uint32_t c_m_norm_round_amount = _uint32_and( c_m_normalized, h_m_mask );
|
678
|
+
const uint32_t c_m_norm_rounded = _uint32_add( c_m_normalized, c_m_norm_round_amount );
|
679
|
+
const uint32_t is_round_overflow_test = _uint32_and( c_e_normalized, m_round_overflow_bit );
|
680
|
+
const uint32_t is_round_overflow_msb = _uint32_neg( is_round_overflow_test );
|
681
|
+
const uint32_t c_m_norm_inplace = _uint32_srl( c_m_norm_rounded, h_m_bit_count );
|
682
|
+
const uint32_t c_m = _uint32_and( c_m_norm_inplace, h_m_mask );
|
683
|
+
const uint32_t c_e_norm_inplace = _uint32_sll( c_e_normalized, h_e_pos );
|
684
|
+
const uint32_t c_e = _uint32_and( c_e_norm_inplace, h_e_mask );
|
685
|
+
const uint32_t c_em_nan = _uint32_or( h_e_mask, a_m );
|
686
|
+
const uint32_t c_nan = _uint32_or( a_s, c_em_nan );
|
687
|
+
const uint32_t c_denorm = _uint32_or( c_s, c_m_denorm );
|
688
|
+
const uint32_t c_inf = _uint32_or( c_s, h_e_mask );
|
689
|
+
const uint32_t c_em_norm = _uint32_or( c_e, c_m );
|
690
|
+
const uint32_t is_a_e_flagged_msb = _uint32_sub( h_e_mask_minus_one, a_e );
|
691
|
+
const uint32_t is_b_e_flagged_msb = _uint32_sub( h_e_mask_minus_one, b_e );
|
692
|
+
const uint32_t is_a_e_eqz_msb = _uint32_dec( a_e );
|
693
|
+
const uint32_t is_a_m_eqz_msb = _uint32_dec( a_m );
|
694
|
+
const uint32_t is_b_e_eqz_msb = _uint32_dec( b_e );
|
695
|
+
const uint32_t is_b_m_eqz_msb = _uint32_dec( b_m );
|
696
|
+
const uint32_t is_b_eqz_msb = _uint32_and( is_b_e_eqz_msb, is_b_m_eqz_msb );
|
697
|
+
const uint32_t is_a_eqz_msb = _uint32_and( is_a_e_eqz_msb, is_a_m_eqz_msb );
|
698
|
+
const uint32_t is_c_nan_via_a_msb = _uint32_andc( is_a_e_flagged_msb, is_b_e_flagged_msb );
|
699
|
+
const uint32_t is_c_nan_via_b_msb = _uint32_andc( is_b_e_flagged_msb, is_b_m_eqz_msb );
|
700
|
+
const uint32_t is_c_nan_msb = _uint32_or( is_c_nan_via_a_msb, is_c_nan_via_b_msb );
|
701
|
+
const uint32_t is_c_denorm_msb = _uint32_andc( is_b_e_eqz_msb, is_a_e_flagged_msb );
|
702
|
+
const uint32_t is_a_inf_msb = _uint32_and( is_a_e_flagged_msb, is_a_m_eqz_msb );
|
703
|
+
const uint32_t is_c_snan_msb = _uint32_and( is_a_inf_msb, is_b_eqz_msb );
|
704
|
+
const uint32_t is_c_nan_min_via_a_msb = _uint32_and( is_a_e_flagged_msb, is_b_eqz_msb );
|
705
|
+
const uint32_t is_c_nan_min_via_b_msb = _uint32_and( is_b_e_flagged_msb, is_a_eqz_msb );
|
706
|
+
const uint32_t is_c_nan_min_msb = _uint32_or( is_c_nan_min_via_a_msb, is_c_nan_min_via_b_msb );
|
707
|
+
const uint32_t is_c_inf_msb = _uint32_or( is_a_e_flagged_msb, is_b_e_flagged_msb );
|
708
|
+
const uint32_t is_overflow_msb = _uint32_or( is_round_overflow_msb, is_mul_overflow_msb );
|
709
|
+
const uint32_t c_em_overflow_result = _uint32_sels( is_overflow_msb, h_e_mask, c_em_norm );
|
710
|
+
const uint32_t c_common_result = _uint32_or( c_s, c_em_overflow_result );
|
711
|
+
const uint32_t c_zero_result = _uint32_sels( is_b_eqz_msb, c_s, c_common_result );
|
712
|
+
const uint32_t c_nan_result = _uint32_sels( is_c_nan_msb, c_nan, c_zero_result );
|
713
|
+
const uint32_t c_nan_min_result = _uint32_sels( is_c_nan_min_msb, h_nan_min, c_nan_result );
|
714
|
+
const uint32_t c_inf_result = _uint32_sels( is_c_inf_msb, c_inf, c_nan_min_result );
|
715
|
+
const uint32_t c_denorm_result = _uint32_sels( is_c_denorm_msb, c_denorm, c_inf_result);
|
716
|
+
const uint32_t c_result = _uint32_sels( is_c_snan_msb, h_snan, c_denorm_result );
|
717
|
+
|
718
|
+
return (uint16_t)(c_result);
|
719
|
+
}
|