libbin 0.9.0 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/ext/libbin/half.h ADDED
@@ -0,0 +1,18 @@
1
+ #ifndef HALF_H
2
+ #define HALF_H
3
+
4
+ #include <stdint.h>
5
+
6
+ uint32_t half_to_float( uint16_t h );
7
+ uint16_t half_from_float( uint32_t f );
8
+ uint16_t half_add( uint16_t arg0, uint16_t arg1 );
9
+ uint16_t half_mul( uint16_t arg0, uint16_t arg1 );
10
+
11
+ static inline uint16_t
12
+ half_sub( uint16_t ha, uint16_t hb )
13
+ {
14
+ // (a-b) is the same as (a+(-b))
15
+ return half_add( ha, hb ^ 0x8000 );
16
+ }
17
+
18
+ #endif /* HALF_H */
@@ -0,0 +1,65 @@
1
+ #include "ruby.h"
2
+ #include "./half.h"
3
+ #include "./pghalf.h"
4
+
5
+ union float_u {
6
+ float f;
7
+ uint32_t i;
8
+ };
9
+
10
+ static VALUE pghalf_from_string_p(VALUE self, VALUE str, VALUE pack_str) {
11
+ Check_Type(str, T_STRING);
12
+ Check_Type(pack_str, T_STRING);
13
+ VALUE arr = rb_funcall(str, rb_intern("unpack"), 1, pack_str);
14
+ uint16_t val = NUM2USHORT(rb_funcall(arr, rb_intern("first"), 0));
15
+ union float_u res;
16
+
17
+ res.i = pghalf_to_float(val);
18
+ return DBL2NUM(res.f);
19
+ }
20
+
21
+ static VALUE half_from_string_p(VALUE self, VALUE str, VALUE pack_str) {
22
+ Check_Type(str, T_STRING);
23
+ Check_Type(pack_str, T_STRING);
24
+ VALUE arr = rb_funcall(str, rb_intern("unpack"), 1, pack_str);
25
+ uint16_t val = NUM2USHORT(rb_funcall(arr, rb_intern("first"), 0));
26
+ union float_u res;
27
+
28
+ res.i = half_to_float(val);
29
+ return DBL2NUM(res.f);
30
+ }
31
+
32
+ static VALUE pghalf_to_string_p(VALUE self, VALUE number, VALUE pack_str) {
33
+ Check_Type(number, T_FLOAT);
34
+ union float_u val;
35
+ uint16_t res;
36
+
37
+ val.f = NUM2DBL(number);
38
+ res = pghalf_from_float(val.i);
39
+ VALUE arr = rb_ary_new3(1, UINT2NUM(res) );
40
+
41
+ return rb_funcall(arr, rb_intern("pack"), 1, pack_str);
42
+ }
43
+
44
+ static VALUE half_to_string_p(VALUE self, VALUE number, VALUE pack_str) {
45
+ Check_Type(number, T_FLOAT);
46
+ union float_u val;
47
+ uint16_t res;
48
+
49
+ val.f = NUM2DBL(number);
50
+ res = half_from_float(val.i);
51
+ VALUE arr = rb_ary_new3(1, UINT2NUM(res) );
52
+
53
+ return rb_funcall(arr, rb_intern("pack"), 1, pack_str);
54
+ }
55
+
56
+ void Init_libbin_c() {
57
+ ID id;
58
+ VALUE mod;
59
+ id = rb_intern("LibBin");
60
+ mod = rb_const_get(rb_cObject, id);
61
+ rb_define_module_function(mod, "half_from_string", half_from_string_p, 2);
62
+ rb_define_module_function(mod, "half_to_string", half_to_string_p, 2);
63
+ rb_define_module_function(mod, "pghalf_from_string", pghalf_from_string_p, 2);
64
+ rb_define_module_function(mod, "pghalf_to_string", pghalf_to_string_p, 2);
65
+ }
@@ -0,0 +1,449 @@
1
+ // Branch-free implementation of half-precision (16 bit) floating point
2
+ // Copyright 2006 Mike Acton <macton@gmail.com>
3
+ // Copyright 2019 Brice Videau <brice.videau@gmail.com>
4
+ //
5
+ // Permission is hereby granted, free of charge, to any person obtaining a
6
+ // copy of this software and associated documentation files (the "Software"),
7
+ // to deal in the Software without restriction, including without limitation
8
+ // the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ // and/or sell copies of the Software, and to permit persons to whom the
10
+ // Software is furnished to do so, subject to the following conditions:
11
+ //
12
+ // The above copyright notice and this permission notice shall be included
13
+ // in all copies or substantial portions of the Software.
14
+ //
15
+ // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ // THE SOFTWARE
22
+ //
23
+ // PlatinumGames Half-precision floating point format
24
+ // ------------------------------------
25
+ //
26
+ // | Field | Last | First | Note
27
+ // |----------|------|-------|----------
28
+ // | Sign | 15 | 15 |
29
+ // | Exponent | 14 | 9 | Bias = 47
30
+ // | Mantissa | 8 | 0 |
31
+ //
32
+ // Compiling
33
+ // ---------
34
+ //
35
+ // Preferred compile flags for GCC:
36
+ // -O3 -fstrict-aliasing -std=c99 -pedantic -Wall -Wstrict-aliasing
37
+ //
38
+ // This file is a C99 source file, intended to be compiled with a C99
39
+ // compliant compiler. However, for the moment it remains combatible
40
+ // with C++98. Therefore if you are using a compiler that poorly implements
41
+ // C standards (e.g. MSVC), it may be compiled as C++. This is not
42
+ // guaranteed for future versions.
43
+ //
44
+
45
+ #include "pghalf.h"
46
+
47
+ // Load immediate
48
+ static inline uint32_t _uint32_li( uint32_t a )
49
+ {
50
+ return (a);
51
+ }
52
+
53
+ // Decrement
54
+ static inline uint32_t _uint32_dec( uint32_t a )
55
+ {
56
+ return (a - 1);
57
+ }
58
+
59
+ // Increment
60
+ static inline uint32_t _uint32_inc( uint32_t a )
61
+ {
62
+ return (a + 1);
63
+ }
64
+
65
+ // Complement
66
+ static inline uint32_t _uint32_not( uint32_t a )
67
+ {
68
+ return (~a);
69
+ }
70
+
71
+ // Negate
72
+ static inline uint32_t _uint32_neg( uint32_t a )
73
+ {
74
+ return (-a);
75
+ }
76
+
77
+ // Extend sign
78
+ static inline uint32_t _uint32_ext( uint32_t a )
79
+ {
80
+ return (((int32_t)a)>>31);
81
+ }
82
+
83
+ // And
84
+ static inline uint32_t _uint32_and( uint32_t a, uint32_t b )
85
+ {
86
+ return (a & b);
87
+ }
88
+
89
+ // Exclusive Or
90
+ static inline uint32_t _uint32_xor( uint32_t a, uint32_t b )
91
+ {
92
+ return (a ^ b);
93
+ }
94
+
95
+ // And with Complement
96
+ static inline uint32_t _uint32_andc( uint32_t a, uint32_t b )
97
+ {
98
+ return (a & ~b);
99
+ }
100
+
101
+ // Or
102
+ static inline uint32_t _uint32_or( uint32_t a, uint32_t b )
103
+ {
104
+ return (a | b);
105
+ }
106
+
107
+ // Shift Right Logical
108
+ static inline uint32_t _uint32_srl( uint32_t a, int sa )
109
+ {
110
+ return (a >> sa);
111
+ }
112
+
113
+ // Shift Left Logical
114
+ static inline uint32_t _uint32_sll( uint32_t a, int sa )
115
+ {
116
+ return (a << sa);
117
+ }
118
+
119
+ // Add
120
+ static inline uint32_t _uint32_add( uint32_t a, uint32_t b )
121
+ {
122
+ return (a + b);
123
+ }
124
+
125
+ // Subtract
126
+ static inline uint32_t _uint32_sub( uint32_t a, uint32_t b )
127
+ {
128
+ return (a - b);
129
+ }
130
+
131
+ // Multiply
132
+ static inline uint32_t _uint32_mul( uint32_t a, uint32_t b )
133
+ {
134
+ return (a * b);
135
+ }
136
+
137
+ // Select on Sign bit
138
+ static inline uint32_t _uint32_sels( uint32_t test, uint32_t a, uint32_t b )
139
+ {
140
+ const uint32_t mask = _uint32_ext( test );
141
+ const uint32_t sel_a = _uint32_and( a, mask );
142
+ const uint32_t sel_b = _uint32_andc( b, mask );
143
+ const uint32_t result = _uint32_or( sel_a, sel_b );
144
+
145
+ return (result);
146
+ }
147
+
148
+ // Select Bits on mask
149
+ static inline uint32_t _uint32_selb( uint32_t mask, uint32_t a, uint32_t b )
150
+ {
151
+ const uint32_t sel_a = _uint32_and( a, mask );
152
+ const uint32_t sel_b = _uint32_andc( b, mask );
153
+ const uint32_t result = _uint32_or( sel_a, sel_b );
154
+
155
+ return (result);
156
+ }
157
+
158
+ // Load Immediate
159
+ static inline uint16_t _uint16_li( uint16_t a )
160
+ {
161
+ return (a);
162
+ }
163
+
164
+ // Extend sign
165
+ static inline uint16_t _uint16_ext( uint16_t a )
166
+ {
167
+ return (((int16_t)a)>>15);
168
+ }
169
+
170
+ // Negate
171
+ static inline uint16_t _uint16_neg( uint16_t a )
172
+ {
173
+ return (-a);
174
+ }
175
+
176
+ // Complement
177
+ static inline uint16_t _uint16_not( uint16_t a )
178
+ {
179
+ return (~a);
180
+ }
181
+
182
+ // Decrement
183
+ static inline uint16_t _uint16_dec( uint16_t a )
184
+ {
185
+ return (a - 1);
186
+ }
187
+
188
+ // Shift Left Logical
189
+ static inline uint16_t _uint16_sll( uint16_t a, int sa )
190
+ {
191
+ return (a << sa);
192
+ }
193
+
194
+ // Shift Right Logical
195
+ static inline uint16_t _uint16_srl( uint16_t a, int sa )
196
+ {
197
+ return (a >> sa);
198
+ }
199
+
200
+ // Add
201
+ static inline uint16_t _uint16_add( uint16_t a, uint16_t b )
202
+ {
203
+ return (a + b);
204
+ }
205
+
206
+ // Subtract
207
+ static inline uint16_t _uint16_sub( uint16_t a, uint16_t b )
208
+ {
209
+ return (a - b);
210
+ }
211
+
212
+ // And
213
+ static inline uint16_t _uint16_and( uint16_t a, uint16_t b )
214
+ {
215
+ return (a & b);
216
+ }
217
+
218
+ // Or
219
+ static inline uint16_t _uint16_or( uint16_t a, uint16_t b )
220
+ {
221
+ return (a | b);
222
+ }
223
+
224
+ // Exclusive Or
225
+ static inline uint16_t _uint16_xor( uint16_t a, uint16_t b )
226
+ {
227
+ return (a ^ b);
228
+ }
229
+
230
+ // And with Complement
231
+ static inline uint16_t _uint16_andc( uint16_t a, uint16_t b )
232
+ {
233
+ return (a & ~b);
234
+ }
235
+
236
+ // And then Shift Right Logical
237
+ static inline uint16_t _uint16_andsrl( uint16_t a, uint16_t b, int sa )
238
+ {
239
+ return ((a & b) >> sa);
240
+ }
241
+
242
+ // Shift Right Logical then Mask
243
+ static inline uint16_t _uint16_srlm( uint16_t a, int sa, uint16_t mask )
244
+ {
245
+ return ((a >> sa) & mask);
246
+ }
247
+
248
+ // Add then Mask
249
+ static inline uint16_t _uint16_addm( uint16_t a, uint16_t b, uint16_t mask )
250
+ {
251
+ return ((a + b) & mask);
252
+ }
253
+
254
+
255
+ // Select on Sign bit
256
+ static inline uint16_t _uint16_sels( uint16_t test, uint16_t a, uint16_t b )
257
+ {
258
+ const uint16_t mask = _uint16_ext( test );
259
+ const uint16_t sel_a = _uint16_and( a, mask );
260
+ const uint16_t sel_b = _uint16_andc( b, mask );
261
+ const uint16_t result = _uint16_or( sel_a, sel_b );
262
+
263
+ return (result);
264
+ }
265
+
266
+ // Count Leading Zeros
267
+ static inline uint32_t _uint32_cntlz( uint32_t x )
268
+ {
269
+ #ifdef __GNUC__
270
+ /* NOTE: __builtin_clz is undefined for x == 0 */
271
+ /* On PowerPC, this will map to insn: cntlzw */
272
+ /* On Pentium, this will map to insn: clz */
273
+ uint32_t is_x_nez_msb = _uint32_neg( x );
274
+ uint32_t nlz = __builtin_clz( x );
275
+ uint32_t result = _uint32_sels( is_x_nez_msb, nlz, 0x00000020 );
276
+ return (result);
277
+ #else
278
+ const uint32_t x0 = _uint32_srl( x, 1 );
279
+ const uint32_t x1 = _uint32_or( x, x0 );
280
+ const uint32_t x2 = _uint32_srl( x1, 2 );
281
+ const uint32_t x3 = _uint32_or( x1, x2 );
282
+ const uint32_t x4 = _uint32_srl( x3, 4 );
283
+ const uint32_t x5 = _uint32_or( x3, x4 );
284
+ const uint32_t x6 = _uint32_srl( x5, 8 );
285
+ const uint32_t x7 = _uint32_or( x5, x6 );
286
+ const uint32_t x8 = _uint32_srl( x7, 16 );
287
+ const uint32_t x9 = _uint32_or( x7, x8 );
288
+ const uint32_t xA = _uint32_not( x9 );
289
+ const uint32_t xB = _uint32_srl( xA, 1 );
290
+ const uint32_t xC = _uint32_and( xB, 0x55555555 );
291
+ const uint32_t xD = _uint32_sub( xA, xC );
292
+ const uint32_t xE = _uint32_and( xD, 0x33333333 );
293
+ const uint32_t xF = _uint32_srl( xD, 2 );
294
+ const uint32_t x10 = _uint32_and( xF, 0x33333333 );
295
+ const uint32_t x11 = _uint32_add( xE, x10 );
296
+ const uint32_t x12 = _uint32_srl( x11, 4 );
297
+ const uint32_t x13 = _uint32_add( x11, x12 );
298
+ const uint32_t x14 = _uint32_and( x13, 0x0f0f0f0f );
299
+ const uint32_t x15 = _uint32_srl( x14, 8 );
300
+ const uint32_t x16 = _uint32_add( x14, x15 );
301
+ const uint32_t x17 = _uint32_srl( x16, 16 );
302
+ const uint32_t x18 = _uint32_add( x16, x17 );
303
+ const uint32_t x19 = _uint32_and( x18, 0x0000003f );
304
+ return ( x19 );
305
+ #endif
306
+ }
307
+
308
+ // Count Leading Zeros
309
+ static inline uint16_t _uint16_cntlz( uint16_t x )
310
+ {
311
+ #ifdef __GNUC__
312
+ uint16_t nlz32 = (uint16_t)_uint32_cntlz( (uint32_t)x );
313
+ uint32_t nlz = _uint32_sub( nlz32, 16 );
314
+ return (nlz);
315
+ #else
316
+ const uint16_t x0 = _uint16_srl( x, 1 );
317
+ const uint16_t x1 = _uint16_or( x, x0 );
318
+ const uint16_t x2 = _uint16_srl( x1, 2 );
319
+ const uint16_t x3 = _uint16_or( x1, x2 );
320
+ const uint16_t x4 = _uint16_srl( x3, 4 );
321
+ const uint16_t x5 = _uint16_or( x3, x4 );
322
+ const uint16_t x6 = _uint16_srl( x5, 8 );
323
+ const uint16_t x7 = _uint16_or( x5, x6 );
324
+ const uint16_t x8 = _uint16_not( x7 );
325
+ const uint16_t x9 = _uint16_srlm( x8, 1, 0x5555 );
326
+ const uint16_t xA = _uint16_sub( x8, x9 );
327
+ const uint16_t xB = _uint16_and( xA, 0x3333 );
328
+ const uint16_t xC = _uint16_srlm( xA, 2, 0x3333 );
329
+ const uint16_t xD = _uint16_add( xB, xC );
330
+ const uint16_t xE = _uint16_srl( xD, 4 );
331
+ const uint16_t xF = _uint16_addm( xD, xE, 0x0f0f );
332
+ const uint16_t x10 = _uint16_srl( xF, 8 );
333
+ const uint16_t x11 = _uint16_addm( xF, x10, 0x001f );
334
+ return ( x11 );
335
+ #endif
336
+ }
337
+
338
+ uint16_t
339
+ pghalf_from_float( uint32_t f )
340
+ {
341
+ const uint32_t one = _uint32_li( 0x00000001 );
342
+ const uint32_t f_s_mask = _uint32_li( 0x80000000 ); //bit 31
343
+ const uint32_t f_e_mask = _uint32_li( 0x7f800000 ); //bits 30-23
344
+ const uint32_t f_m_mask = _uint32_li( 0x007fffff ); //bits 22-0
345
+ const uint32_t f_m_hidden_bit = _uint32_li( 0x00800000 ); //1<<f_e_pos
346
+ const uint32_t f_m_round_bit = _uint32_li( 0x00002000 ); //1<<(f_e_pos - h_e_pos - 1)
347
+ const uint32_t f_snan_mask = _uint32_li( 0x7fc00000 ); //f_e_mask + 1 << (f_e_pos - 1)
348
+ const uint32_t f_e_pos = _uint32_li( 0x00000017 ); //23
349
+ const uint32_t h_e_pos = _uint32_li( 0x00000009 ); //9
350
+ const uint32_t h_e_mask = _uint32_li( 0x00007e00 ); //bits 14-9
351
+ const uint32_t h_snan_mask = _uint32_li( 0x00007f00 ); //h_e_mask + 1 << (h_e_pos - 1)
352
+ const uint32_t h_e_mask_value = _uint32_li( 0x0000003f ); //h_e_mask >> 9
353
+ const uint32_t f_h_s_pos_offset = _uint32_li( 0x00000010 ); //f_s_pos - h_s_pos
354
+ const uint32_t f_h_bias_offset = _uint32_li( 0x00000050 ); //f_bias - h_bias
355
+ const uint32_t f_h_m_pos_offset = _uint32_li( 0x0000000e ); //f_e_pos - h_e_pos
356
+ const uint32_t h_nan_min = _uint32_li( 0x00007e01 ); //h_e_mask + 1
357
+ const uint32_t f_h_e_biased_flag = _uint32_li( 0x000000af ); //f_bias + h_bias + 1
358
+ const uint32_t f_s = _uint32_and( f, f_s_mask );
359
+ const uint32_t f_e = _uint32_and( f, f_e_mask );
360
+ const uint16_t h_s = _uint32_srl( f_s, f_h_s_pos_offset );
361
+ const uint32_t f_m = _uint32_and( f, f_m_mask );
362
+ const uint16_t f_e_amount = _uint32_srl( f_e, f_e_pos );
363
+ const uint32_t f_e_half_bias = _uint32_sub( f_e_amount, f_h_bias_offset );
364
+ const uint32_t f_snan = _uint32_and( f, f_snan_mask );
365
+ const uint32_t f_m_round_mask = _uint32_and( f_m, f_m_round_bit );
366
+ const uint32_t f_m_round_offset = _uint32_sll( f_m_round_mask, one );
367
+ const uint32_t f_m_rounded = _uint32_add( f_m, f_m_round_offset );
368
+ const uint32_t f_m_denorm_sa = _uint32_sub( one, f_e_half_bias );
369
+ const uint32_t f_m_with_hidden = _uint32_or( f_m_rounded, f_m_hidden_bit );
370
+ const uint32_t f_m_denorm = _uint32_srl( f_m_with_hidden, f_m_denorm_sa );
371
+ const uint32_t h_m_denorm = _uint32_srl( f_m_denorm, f_h_m_pos_offset );
372
+ const uint32_t f_m_rounded_overflow = _uint32_and( f_m_rounded, f_m_hidden_bit );
373
+ const uint32_t m_nan = _uint32_srl( f_m, f_h_m_pos_offset );
374
+ const uint32_t h_em_nan = _uint32_or( h_e_mask, m_nan );
375
+ const uint32_t h_e_norm_overflow_offset = _uint32_inc( f_e_half_bias );
376
+ const uint32_t h_e_norm_overflow = _uint32_sll( h_e_norm_overflow_offset, h_e_pos );
377
+ const uint32_t h_e_norm = _uint32_sll( f_e_half_bias, h_e_pos );
378
+ const uint32_t h_m_norm = _uint32_srl( f_m_rounded, f_h_m_pos_offset );
379
+ const uint32_t h_em_norm = _uint32_or( h_e_norm, h_m_norm );
380
+ const uint32_t is_h_ndenorm_msb = _uint32_sub( f_h_bias_offset, f_e_amount );
381
+ const uint32_t is_f_e_flagged_msb = _uint32_sub( f_h_e_biased_flag, f_e_half_bias );
382
+ const uint32_t is_h_denorm_msb = _uint32_not( is_h_ndenorm_msb );
383
+ const uint32_t is_f_m_eqz_msb = _uint32_dec( f_m );
384
+ const uint32_t is_h_nan_eqz_msb = _uint32_dec( m_nan );
385
+ const uint32_t is_f_inf_msb = _uint32_and( is_f_e_flagged_msb, is_f_m_eqz_msb );
386
+ const uint32_t is_f_nan_underflow_msb = _uint32_and( is_f_e_flagged_msb, is_h_nan_eqz_msb );
387
+ const uint32_t is_e_overflow_msb = _uint32_sub( h_e_mask_value, f_e_half_bias );
388
+ const uint32_t is_h_inf_msb = _uint32_or( is_e_overflow_msb, is_f_inf_msb );
389
+ const uint32_t is_f_nsnan_msb = _uint32_sub( f_snan, f_snan_mask );
390
+ const uint32_t is_m_norm_overflow_msb = _uint32_neg( f_m_rounded_overflow );
391
+ const uint32_t is_f_snan_msb = _uint32_not( is_f_nsnan_msb );
392
+ const uint32_t h_em_overflow_result = _uint32_sels( is_m_norm_overflow_msb, h_e_norm_overflow, h_em_norm );
393
+ const uint32_t h_em_nan_result = _uint32_sels( is_f_e_flagged_msb, h_em_nan, h_em_overflow_result );
394
+ const uint32_t h_em_nan_underflow_result = _uint32_sels( is_f_nan_underflow_msb, h_nan_min, h_em_nan_result );
395
+ const uint32_t h_em_inf_result = _uint32_sels( is_h_inf_msb, h_e_mask, h_em_nan_underflow_result );
396
+ const uint32_t h_em_denorm_result = _uint32_sels( is_h_denorm_msb, h_m_denorm, h_em_inf_result );
397
+ const uint32_t h_em_snan_result = _uint32_sels( is_f_snan_msb, h_snan_mask, h_em_denorm_result );
398
+ const uint32_t h_result = _uint32_or( h_s, h_em_snan_result );
399
+
400
+ return (uint16_t)(h_result);
401
+ }
402
+
403
+ uint32_t
404
+ pghalf_to_float( uint16_t h )
405
+ {
406
+ const uint32_t h_e_mask = _uint32_li( 0x00007e00 ); //bits 14-9
407
+ const uint32_t h_m_mask = _uint32_li( 0x000001ff ); //bits 8-0
408
+ const uint32_t h_s_mask = _uint32_li( 0x00008000 ); //bit 15
409
+ const uint32_t h_f_s_pos_offset = _uint32_li( 0x00000010 ); //f_s_pos - h_s_pos
410
+ const uint32_t h_f_e_pos_offset = _uint32_li( 0x0000000e ); //f_m_bitcount - h_m_bitcount
411
+ const uint32_t h_f_bias_offset = _uint32_li( 0x0000a000 ); //(f_bias - h_bias) << 9
412
+ const uint32_t f_e_mask = _uint32_li( 0x7f800000 ); //bits 30-23
413
+ const uint32_t f_m_mask = _uint32_li( 0x007fffff ); //bits 22-0
414
+ const uint32_t h_f_e_denorm_bias = _uint32_li( 0x0000005f ); //h_f_e_pos_offset + 1 + (f_bias - h_bias)
415
+ const uint32_t h_f_m_denorm_sa_bias = _uint32_li( 0x00000008 ); //float exp bit count
416
+ const uint32_t f_e_pos = _uint32_li( 0x00000017 ); //23
417
+ const uint32_t h_e_mask_minus_one = _uint32_li( 0x00007dff ); //h_e_mask + h_m_mask - 1<<h_e_pos
418
+ const uint32_t h_e = _uint32_and( h, h_e_mask );
419
+ const uint32_t h_m = _uint32_and( h, h_m_mask );
420
+ const uint32_t h_s = _uint32_and( h, h_s_mask );
421
+ const uint32_t h_e_f_bias = _uint32_add( h_e, h_f_bias_offset );
422
+ const uint32_t h_m_nlz = _uint32_cntlz( h_m );
423
+ const uint32_t f_s = _uint32_sll( h_s, h_f_s_pos_offset );
424
+ const uint32_t f_e = _uint32_sll( h_e_f_bias, h_f_e_pos_offset );
425
+ const uint32_t f_m = _uint32_sll( h_m, h_f_e_pos_offset );
426
+ const uint32_t f_em = _uint32_or( f_e, f_m );
427
+ const uint32_t h_f_m_sa = _uint32_sub( h_m_nlz, h_f_m_denorm_sa_bias );
428
+ const uint32_t f_e_denorm_unpacked = _uint32_sub( h_f_e_denorm_bias, h_f_m_sa );
429
+ const uint32_t h_f_m = _uint32_sll( h_m, h_f_m_sa );
430
+ const uint32_t f_m_denorm = _uint32_and( h_f_m, f_m_mask );
431
+ const uint32_t f_e_denorm = _uint32_sll( f_e_denorm_unpacked, f_e_pos );
432
+ const uint32_t f_em_denorm = _uint32_or( f_e_denorm, f_m_denorm );
433
+ const uint32_t f_em_nan = _uint32_or( f_e_mask, f_m );
434
+ const uint32_t is_e_eqz_msb = _uint32_dec( h_e );
435
+ const uint32_t is_m_nez_msb = _uint32_neg( h_m );
436
+ const uint32_t is_e_flagged_msb = _uint32_sub( h_e_mask_minus_one, h_e );
437
+ const uint32_t is_zero_msb = _uint32_andc( is_e_eqz_msb, is_m_nez_msb );
438
+ const uint32_t is_inf_msb = _uint32_andc( is_e_flagged_msb, is_m_nez_msb );
439
+ const uint32_t is_denorm_msb = _uint32_and( is_m_nez_msb, is_e_eqz_msb );
440
+ const uint32_t is_nan_msb = _uint32_and( is_e_flagged_msb, is_m_nez_msb );
441
+ const uint32_t is_zero = _uint32_ext( is_zero_msb );
442
+ const uint32_t f_zero_result = _uint32_andc( f_em, is_zero );
443
+ const uint32_t f_denorm_result = _uint32_sels( is_denorm_msb, f_em_denorm, f_zero_result );
444
+ const uint32_t f_inf_result = _uint32_sels( is_inf_msb, f_e_mask, f_denorm_result );
445
+ const uint32_t f_nan_result = _uint32_sels( is_nan_msb, f_em_nan, f_inf_result );
446
+ const uint32_t f_result = _uint32_or( f_s, f_nan_result );
447
+
448
+ return (f_result);
449
+ }