libbin 0.9.0 → 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +0 -0
- data/ext/libbin/extconf.rb +3 -0
- data/ext/libbin/half.c +719 -0
- data/ext/libbin/half.h +18 -0
- data/ext/libbin/libbin_c.c +65 -0
- data/ext/libbin/pghalf.c +449 -0
- data/ext/libbin/pghalf.h +9 -0
- data/lib/libbin.rb +51 -54
- data/lib/libbin/alignment.rb +0 -0
- data/lib/libbin/data_types.rb +63 -35
- data/libbin.gemspec +3 -2
- metadata +10 -3
data/ext/libbin/half.h
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
#ifndef HALF_H
|
2
|
+
#define HALF_H
|
3
|
+
|
4
|
+
#include <stdint.h>
|
5
|
+
|
6
|
+
uint32_t half_to_float( uint16_t h );
|
7
|
+
uint16_t half_from_float( uint32_t f );
|
8
|
+
uint16_t half_add( uint16_t arg0, uint16_t arg1 );
|
9
|
+
uint16_t half_mul( uint16_t arg0, uint16_t arg1 );
|
10
|
+
|
11
|
+
static inline uint16_t
|
12
|
+
half_sub( uint16_t ha, uint16_t hb )
|
13
|
+
{
|
14
|
+
// (a-b) is the same as (a+(-b))
|
15
|
+
return half_add( ha, hb ^ 0x8000 );
|
16
|
+
}
|
17
|
+
|
18
|
+
#endif /* HALF_H */
|
@@ -0,0 +1,65 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "./half.h"
|
3
|
+
#include "./pghalf.h"
|
4
|
+
|
5
|
+
union float_u {
|
6
|
+
float f;
|
7
|
+
uint32_t i;
|
8
|
+
};
|
9
|
+
|
10
|
+
static VALUE pghalf_from_string_p(VALUE self, VALUE str, VALUE pack_str) {
|
11
|
+
Check_Type(str, T_STRING);
|
12
|
+
Check_Type(pack_str, T_STRING);
|
13
|
+
VALUE arr = rb_funcall(str, rb_intern("unpack"), 1, pack_str);
|
14
|
+
uint16_t val = NUM2USHORT(rb_funcall(arr, rb_intern("first"), 0));
|
15
|
+
union float_u res;
|
16
|
+
|
17
|
+
res.i = pghalf_to_float(val);
|
18
|
+
return DBL2NUM(res.f);
|
19
|
+
}
|
20
|
+
|
21
|
+
static VALUE half_from_string_p(VALUE self, VALUE str, VALUE pack_str) {
|
22
|
+
Check_Type(str, T_STRING);
|
23
|
+
Check_Type(pack_str, T_STRING);
|
24
|
+
VALUE arr = rb_funcall(str, rb_intern("unpack"), 1, pack_str);
|
25
|
+
uint16_t val = NUM2USHORT(rb_funcall(arr, rb_intern("first"), 0));
|
26
|
+
union float_u res;
|
27
|
+
|
28
|
+
res.i = half_to_float(val);
|
29
|
+
return DBL2NUM(res.f);
|
30
|
+
}
|
31
|
+
|
32
|
+
static VALUE pghalf_to_string_p(VALUE self, VALUE number, VALUE pack_str) {
|
33
|
+
Check_Type(number, T_FLOAT);
|
34
|
+
union float_u val;
|
35
|
+
uint16_t res;
|
36
|
+
|
37
|
+
val.f = NUM2DBL(number);
|
38
|
+
res = pghalf_from_float(val.i);
|
39
|
+
VALUE arr = rb_ary_new3(1, UINT2NUM(res) );
|
40
|
+
|
41
|
+
return rb_funcall(arr, rb_intern("pack"), 1, pack_str);
|
42
|
+
}
|
43
|
+
|
44
|
+
static VALUE half_to_string_p(VALUE self, VALUE number, VALUE pack_str) {
|
45
|
+
Check_Type(number, T_FLOAT);
|
46
|
+
union float_u val;
|
47
|
+
uint16_t res;
|
48
|
+
|
49
|
+
val.f = NUM2DBL(number);
|
50
|
+
res = half_from_float(val.i);
|
51
|
+
VALUE arr = rb_ary_new3(1, UINT2NUM(res) );
|
52
|
+
|
53
|
+
return rb_funcall(arr, rb_intern("pack"), 1, pack_str);
|
54
|
+
}
|
55
|
+
|
56
|
+
void Init_libbin_c() {
|
57
|
+
ID id;
|
58
|
+
VALUE mod;
|
59
|
+
id = rb_intern("LibBin");
|
60
|
+
mod = rb_const_get(rb_cObject, id);
|
61
|
+
rb_define_module_function(mod, "half_from_string", half_from_string_p, 2);
|
62
|
+
rb_define_module_function(mod, "half_to_string", half_to_string_p, 2);
|
63
|
+
rb_define_module_function(mod, "pghalf_from_string", pghalf_from_string_p, 2);
|
64
|
+
rb_define_module_function(mod, "pghalf_to_string", pghalf_to_string_p, 2);
|
65
|
+
}
|
data/ext/libbin/pghalf.c
ADDED
@@ -0,0 +1,449 @@
|
|
1
|
+
// Branch-free implementation of half-precision (16 bit) floating point
|
2
|
+
// Copyright 2006 Mike Acton <macton@gmail.com>
|
3
|
+
// Copyright 2019 Brice Videau <brice.videau@gmail.com>
|
4
|
+
//
|
5
|
+
// Permission is hereby granted, free of charge, to any person obtaining a
|
6
|
+
// copy of this software and associated documentation files (the "Software"),
|
7
|
+
// to deal in the Software without restriction, including without limitation
|
8
|
+
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
9
|
+
// and/or sell copies of the Software, and to permit persons to whom the
|
10
|
+
// Software is furnished to do so, subject to the following conditions:
|
11
|
+
//
|
12
|
+
// The above copyright notice and this permission notice shall be included
|
13
|
+
// in all copies or substantial portions of the Software.
|
14
|
+
//
|
15
|
+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
// THE SOFTWARE
|
22
|
+
//
|
23
|
+
// PlatinumGames Half-precision floating point format
|
24
|
+
// ------------------------------------
|
25
|
+
//
|
26
|
+
// | Field | Last | First | Note
|
27
|
+
// |----------|------|-------|----------
|
28
|
+
// | Sign | 15 | 15 |
|
29
|
+
// | Exponent | 14 | 9 | Bias = 47
|
30
|
+
// | Mantissa | 8 | 0 |
|
31
|
+
//
|
32
|
+
// Compiling
|
33
|
+
// ---------
|
34
|
+
//
|
35
|
+
// Preferred compile flags for GCC:
|
36
|
+
// -O3 -fstrict-aliasing -std=c99 -pedantic -Wall -Wstrict-aliasing
|
37
|
+
//
|
38
|
+
// This file is a C99 source file, intended to be compiled with a C99
|
39
|
+
// compliant compiler. However, for the moment it remains combatible
|
40
|
+
// with C++98. Therefore if you are using a compiler that poorly implements
|
41
|
+
// C standards (e.g. MSVC), it may be compiled as C++. This is not
|
42
|
+
// guaranteed for future versions.
|
43
|
+
//
|
44
|
+
|
45
|
+
#include "pghalf.h"
|
46
|
+
|
47
|
+
// Load immediate
|
48
|
+
static inline uint32_t _uint32_li( uint32_t a )
|
49
|
+
{
|
50
|
+
return (a);
|
51
|
+
}
|
52
|
+
|
53
|
+
// Decrement
|
54
|
+
static inline uint32_t _uint32_dec( uint32_t a )
|
55
|
+
{
|
56
|
+
return (a - 1);
|
57
|
+
}
|
58
|
+
|
59
|
+
// Increment
|
60
|
+
static inline uint32_t _uint32_inc( uint32_t a )
|
61
|
+
{
|
62
|
+
return (a + 1);
|
63
|
+
}
|
64
|
+
|
65
|
+
// Complement
|
66
|
+
static inline uint32_t _uint32_not( uint32_t a )
|
67
|
+
{
|
68
|
+
return (~a);
|
69
|
+
}
|
70
|
+
|
71
|
+
// Negate
|
72
|
+
static inline uint32_t _uint32_neg( uint32_t a )
|
73
|
+
{
|
74
|
+
return (-a);
|
75
|
+
}
|
76
|
+
|
77
|
+
// Extend sign
|
78
|
+
static inline uint32_t _uint32_ext( uint32_t a )
|
79
|
+
{
|
80
|
+
return (((int32_t)a)>>31);
|
81
|
+
}
|
82
|
+
|
83
|
+
// And
|
84
|
+
static inline uint32_t _uint32_and( uint32_t a, uint32_t b )
|
85
|
+
{
|
86
|
+
return (a & b);
|
87
|
+
}
|
88
|
+
|
89
|
+
// Exclusive Or
|
90
|
+
static inline uint32_t _uint32_xor( uint32_t a, uint32_t b )
|
91
|
+
{
|
92
|
+
return (a ^ b);
|
93
|
+
}
|
94
|
+
|
95
|
+
// And with Complement
|
96
|
+
static inline uint32_t _uint32_andc( uint32_t a, uint32_t b )
|
97
|
+
{
|
98
|
+
return (a & ~b);
|
99
|
+
}
|
100
|
+
|
101
|
+
// Or
|
102
|
+
static inline uint32_t _uint32_or( uint32_t a, uint32_t b )
|
103
|
+
{
|
104
|
+
return (a | b);
|
105
|
+
}
|
106
|
+
|
107
|
+
// Shift Right Logical
|
108
|
+
static inline uint32_t _uint32_srl( uint32_t a, int sa )
|
109
|
+
{
|
110
|
+
return (a >> sa);
|
111
|
+
}
|
112
|
+
|
113
|
+
// Shift Left Logical
|
114
|
+
static inline uint32_t _uint32_sll( uint32_t a, int sa )
|
115
|
+
{
|
116
|
+
return (a << sa);
|
117
|
+
}
|
118
|
+
|
119
|
+
// Add
|
120
|
+
static inline uint32_t _uint32_add( uint32_t a, uint32_t b )
|
121
|
+
{
|
122
|
+
return (a + b);
|
123
|
+
}
|
124
|
+
|
125
|
+
// Subtract
|
126
|
+
static inline uint32_t _uint32_sub( uint32_t a, uint32_t b )
|
127
|
+
{
|
128
|
+
return (a - b);
|
129
|
+
}
|
130
|
+
|
131
|
+
// Multiply
|
132
|
+
static inline uint32_t _uint32_mul( uint32_t a, uint32_t b )
|
133
|
+
{
|
134
|
+
return (a * b);
|
135
|
+
}
|
136
|
+
|
137
|
+
// Select on Sign bit
|
138
|
+
static inline uint32_t _uint32_sels( uint32_t test, uint32_t a, uint32_t b )
|
139
|
+
{
|
140
|
+
const uint32_t mask = _uint32_ext( test );
|
141
|
+
const uint32_t sel_a = _uint32_and( a, mask );
|
142
|
+
const uint32_t sel_b = _uint32_andc( b, mask );
|
143
|
+
const uint32_t result = _uint32_or( sel_a, sel_b );
|
144
|
+
|
145
|
+
return (result);
|
146
|
+
}
|
147
|
+
|
148
|
+
// Select Bits on mask
|
149
|
+
static inline uint32_t _uint32_selb( uint32_t mask, uint32_t a, uint32_t b )
|
150
|
+
{
|
151
|
+
const uint32_t sel_a = _uint32_and( a, mask );
|
152
|
+
const uint32_t sel_b = _uint32_andc( b, mask );
|
153
|
+
const uint32_t result = _uint32_or( sel_a, sel_b );
|
154
|
+
|
155
|
+
return (result);
|
156
|
+
}
|
157
|
+
|
158
|
+
// Load Immediate
|
159
|
+
static inline uint16_t _uint16_li( uint16_t a )
|
160
|
+
{
|
161
|
+
return (a);
|
162
|
+
}
|
163
|
+
|
164
|
+
// Extend sign
|
165
|
+
static inline uint16_t _uint16_ext( uint16_t a )
|
166
|
+
{
|
167
|
+
return (((int16_t)a)>>15);
|
168
|
+
}
|
169
|
+
|
170
|
+
// Negate
|
171
|
+
static inline uint16_t _uint16_neg( uint16_t a )
|
172
|
+
{
|
173
|
+
return (-a);
|
174
|
+
}
|
175
|
+
|
176
|
+
// Complement
|
177
|
+
static inline uint16_t _uint16_not( uint16_t a )
|
178
|
+
{
|
179
|
+
return (~a);
|
180
|
+
}
|
181
|
+
|
182
|
+
// Decrement
|
183
|
+
static inline uint16_t _uint16_dec( uint16_t a )
|
184
|
+
{
|
185
|
+
return (a - 1);
|
186
|
+
}
|
187
|
+
|
188
|
+
// Shift Left Logical
|
189
|
+
static inline uint16_t _uint16_sll( uint16_t a, int sa )
|
190
|
+
{
|
191
|
+
return (a << sa);
|
192
|
+
}
|
193
|
+
|
194
|
+
// Shift Right Logical
|
195
|
+
static inline uint16_t _uint16_srl( uint16_t a, int sa )
|
196
|
+
{
|
197
|
+
return (a >> sa);
|
198
|
+
}
|
199
|
+
|
200
|
+
// Add
|
201
|
+
static inline uint16_t _uint16_add( uint16_t a, uint16_t b )
|
202
|
+
{
|
203
|
+
return (a + b);
|
204
|
+
}
|
205
|
+
|
206
|
+
// Subtract
|
207
|
+
static inline uint16_t _uint16_sub( uint16_t a, uint16_t b )
|
208
|
+
{
|
209
|
+
return (a - b);
|
210
|
+
}
|
211
|
+
|
212
|
+
// And
|
213
|
+
static inline uint16_t _uint16_and( uint16_t a, uint16_t b )
|
214
|
+
{
|
215
|
+
return (a & b);
|
216
|
+
}
|
217
|
+
|
218
|
+
// Or
|
219
|
+
static inline uint16_t _uint16_or( uint16_t a, uint16_t b )
|
220
|
+
{
|
221
|
+
return (a | b);
|
222
|
+
}
|
223
|
+
|
224
|
+
// Exclusive Or
|
225
|
+
static inline uint16_t _uint16_xor( uint16_t a, uint16_t b )
|
226
|
+
{
|
227
|
+
return (a ^ b);
|
228
|
+
}
|
229
|
+
|
230
|
+
// And with Complement
|
231
|
+
static inline uint16_t _uint16_andc( uint16_t a, uint16_t b )
|
232
|
+
{
|
233
|
+
return (a & ~b);
|
234
|
+
}
|
235
|
+
|
236
|
+
// And then Shift Right Logical
|
237
|
+
static inline uint16_t _uint16_andsrl( uint16_t a, uint16_t b, int sa )
|
238
|
+
{
|
239
|
+
return ((a & b) >> sa);
|
240
|
+
}
|
241
|
+
|
242
|
+
// Shift Right Logical then Mask
|
243
|
+
static inline uint16_t _uint16_srlm( uint16_t a, int sa, uint16_t mask )
|
244
|
+
{
|
245
|
+
return ((a >> sa) & mask);
|
246
|
+
}
|
247
|
+
|
248
|
+
// Add then Mask
|
249
|
+
static inline uint16_t _uint16_addm( uint16_t a, uint16_t b, uint16_t mask )
|
250
|
+
{
|
251
|
+
return ((a + b) & mask);
|
252
|
+
}
|
253
|
+
|
254
|
+
|
255
|
+
// Select on Sign bit
|
256
|
+
static inline uint16_t _uint16_sels( uint16_t test, uint16_t a, uint16_t b )
|
257
|
+
{
|
258
|
+
const uint16_t mask = _uint16_ext( test );
|
259
|
+
const uint16_t sel_a = _uint16_and( a, mask );
|
260
|
+
const uint16_t sel_b = _uint16_andc( b, mask );
|
261
|
+
const uint16_t result = _uint16_or( sel_a, sel_b );
|
262
|
+
|
263
|
+
return (result);
|
264
|
+
}
|
265
|
+
|
266
|
+
// Count Leading Zeros
|
267
|
+
static inline uint32_t _uint32_cntlz( uint32_t x )
|
268
|
+
{
|
269
|
+
#ifdef __GNUC__
|
270
|
+
/* NOTE: __builtin_clz is undefined for x == 0 */
|
271
|
+
/* On PowerPC, this will map to insn: cntlzw */
|
272
|
+
/* On Pentium, this will map to insn: clz */
|
273
|
+
uint32_t is_x_nez_msb = _uint32_neg( x );
|
274
|
+
uint32_t nlz = __builtin_clz( x );
|
275
|
+
uint32_t result = _uint32_sels( is_x_nez_msb, nlz, 0x00000020 );
|
276
|
+
return (result);
|
277
|
+
#else
|
278
|
+
const uint32_t x0 = _uint32_srl( x, 1 );
|
279
|
+
const uint32_t x1 = _uint32_or( x, x0 );
|
280
|
+
const uint32_t x2 = _uint32_srl( x1, 2 );
|
281
|
+
const uint32_t x3 = _uint32_or( x1, x2 );
|
282
|
+
const uint32_t x4 = _uint32_srl( x3, 4 );
|
283
|
+
const uint32_t x5 = _uint32_or( x3, x4 );
|
284
|
+
const uint32_t x6 = _uint32_srl( x5, 8 );
|
285
|
+
const uint32_t x7 = _uint32_or( x5, x6 );
|
286
|
+
const uint32_t x8 = _uint32_srl( x7, 16 );
|
287
|
+
const uint32_t x9 = _uint32_or( x7, x8 );
|
288
|
+
const uint32_t xA = _uint32_not( x9 );
|
289
|
+
const uint32_t xB = _uint32_srl( xA, 1 );
|
290
|
+
const uint32_t xC = _uint32_and( xB, 0x55555555 );
|
291
|
+
const uint32_t xD = _uint32_sub( xA, xC );
|
292
|
+
const uint32_t xE = _uint32_and( xD, 0x33333333 );
|
293
|
+
const uint32_t xF = _uint32_srl( xD, 2 );
|
294
|
+
const uint32_t x10 = _uint32_and( xF, 0x33333333 );
|
295
|
+
const uint32_t x11 = _uint32_add( xE, x10 );
|
296
|
+
const uint32_t x12 = _uint32_srl( x11, 4 );
|
297
|
+
const uint32_t x13 = _uint32_add( x11, x12 );
|
298
|
+
const uint32_t x14 = _uint32_and( x13, 0x0f0f0f0f );
|
299
|
+
const uint32_t x15 = _uint32_srl( x14, 8 );
|
300
|
+
const uint32_t x16 = _uint32_add( x14, x15 );
|
301
|
+
const uint32_t x17 = _uint32_srl( x16, 16 );
|
302
|
+
const uint32_t x18 = _uint32_add( x16, x17 );
|
303
|
+
const uint32_t x19 = _uint32_and( x18, 0x0000003f );
|
304
|
+
return ( x19 );
|
305
|
+
#endif
|
306
|
+
}
|
307
|
+
|
308
|
+
// Count Leading Zeros
|
309
|
+
static inline uint16_t _uint16_cntlz( uint16_t x )
|
310
|
+
{
|
311
|
+
#ifdef __GNUC__
|
312
|
+
uint16_t nlz32 = (uint16_t)_uint32_cntlz( (uint32_t)x );
|
313
|
+
uint32_t nlz = _uint32_sub( nlz32, 16 );
|
314
|
+
return (nlz);
|
315
|
+
#else
|
316
|
+
const uint16_t x0 = _uint16_srl( x, 1 );
|
317
|
+
const uint16_t x1 = _uint16_or( x, x0 );
|
318
|
+
const uint16_t x2 = _uint16_srl( x1, 2 );
|
319
|
+
const uint16_t x3 = _uint16_or( x1, x2 );
|
320
|
+
const uint16_t x4 = _uint16_srl( x3, 4 );
|
321
|
+
const uint16_t x5 = _uint16_or( x3, x4 );
|
322
|
+
const uint16_t x6 = _uint16_srl( x5, 8 );
|
323
|
+
const uint16_t x7 = _uint16_or( x5, x6 );
|
324
|
+
const uint16_t x8 = _uint16_not( x7 );
|
325
|
+
const uint16_t x9 = _uint16_srlm( x8, 1, 0x5555 );
|
326
|
+
const uint16_t xA = _uint16_sub( x8, x9 );
|
327
|
+
const uint16_t xB = _uint16_and( xA, 0x3333 );
|
328
|
+
const uint16_t xC = _uint16_srlm( xA, 2, 0x3333 );
|
329
|
+
const uint16_t xD = _uint16_add( xB, xC );
|
330
|
+
const uint16_t xE = _uint16_srl( xD, 4 );
|
331
|
+
const uint16_t xF = _uint16_addm( xD, xE, 0x0f0f );
|
332
|
+
const uint16_t x10 = _uint16_srl( xF, 8 );
|
333
|
+
const uint16_t x11 = _uint16_addm( xF, x10, 0x001f );
|
334
|
+
return ( x11 );
|
335
|
+
#endif
|
336
|
+
}
|
337
|
+
|
338
|
+
uint16_t
|
339
|
+
pghalf_from_float( uint32_t f )
|
340
|
+
{
|
341
|
+
const uint32_t one = _uint32_li( 0x00000001 );
|
342
|
+
const uint32_t f_s_mask = _uint32_li( 0x80000000 ); //bit 31
|
343
|
+
const uint32_t f_e_mask = _uint32_li( 0x7f800000 ); //bits 30-23
|
344
|
+
const uint32_t f_m_mask = _uint32_li( 0x007fffff ); //bits 22-0
|
345
|
+
const uint32_t f_m_hidden_bit = _uint32_li( 0x00800000 ); //1<<f_e_pos
|
346
|
+
const uint32_t f_m_round_bit = _uint32_li( 0x00002000 ); //1<<(f_e_pos - h_e_pos - 1)
|
347
|
+
const uint32_t f_snan_mask = _uint32_li( 0x7fc00000 ); //f_e_mask + 1 << (f_e_pos - 1)
|
348
|
+
const uint32_t f_e_pos = _uint32_li( 0x00000017 ); //23
|
349
|
+
const uint32_t h_e_pos = _uint32_li( 0x00000009 ); //9
|
350
|
+
const uint32_t h_e_mask = _uint32_li( 0x00007e00 ); //bits 14-9
|
351
|
+
const uint32_t h_snan_mask = _uint32_li( 0x00007f00 ); //h_e_mask + 1 << (h_e_pos - 1)
|
352
|
+
const uint32_t h_e_mask_value = _uint32_li( 0x0000003f ); //h_e_mask >> 9
|
353
|
+
const uint32_t f_h_s_pos_offset = _uint32_li( 0x00000010 ); //f_s_pos - h_s_pos
|
354
|
+
const uint32_t f_h_bias_offset = _uint32_li( 0x00000050 ); //f_bias - h_bias
|
355
|
+
const uint32_t f_h_m_pos_offset = _uint32_li( 0x0000000e ); //f_e_pos - h_e_pos
|
356
|
+
const uint32_t h_nan_min = _uint32_li( 0x00007e01 ); //h_e_mask + 1
|
357
|
+
const uint32_t f_h_e_biased_flag = _uint32_li( 0x000000af ); //f_bias + h_bias + 1
|
358
|
+
const uint32_t f_s = _uint32_and( f, f_s_mask );
|
359
|
+
const uint32_t f_e = _uint32_and( f, f_e_mask );
|
360
|
+
const uint16_t h_s = _uint32_srl( f_s, f_h_s_pos_offset );
|
361
|
+
const uint32_t f_m = _uint32_and( f, f_m_mask );
|
362
|
+
const uint16_t f_e_amount = _uint32_srl( f_e, f_e_pos );
|
363
|
+
const uint32_t f_e_half_bias = _uint32_sub( f_e_amount, f_h_bias_offset );
|
364
|
+
const uint32_t f_snan = _uint32_and( f, f_snan_mask );
|
365
|
+
const uint32_t f_m_round_mask = _uint32_and( f_m, f_m_round_bit );
|
366
|
+
const uint32_t f_m_round_offset = _uint32_sll( f_m_round_mask, one );
|
367
|
+
const uint32_t f_m_rounded = _uint32_add( f_m, f_m_round_offset );
|
368
|
+
const uint32_t f_m_denorm_sa = _uint32_sub( one, f_e_half_bias );
|
369
|
+
const uint32_t f_m_with_hidden = _uint32_or( f_m_rounded, f_m_hidden_bit );
|
370
|
+
const uint32_t f_m_denorm = _uint32_srl( f_m_with_hidden, f_m_denorm_sa );
|
371
|
+
const uint32_t h_m_denorm = _uint32_srl( f_m_denorm, f_h_m_pos_offset );
|
372
|
+
const uint32_t f_m_rounded_overflow = _uint32_and( f_m_rounded, f_m_hidden_bit );
|
373
|
+
const uint32_t m_nan = _uint32_srl( f_m, f_h_m_pos_offset );
|
374
|
+
const uint32_t h_em_nan = _uint32_or( h_e_mask, m_nan );
|
375
|
+
const uint32_t h_e_norm_overflow_offset = _uint32_inc( f_e_half_bias );
|
376
|
+
const uint32_t h_e_norm_overflow = _uint32_sll( h_e_norm_overflow_offset, h_e_pos );
|
377
|
+
const uint32_t h_e_norm = _uint32_sll( f_e_half_bias, h_e_pos );
|
378
|
+
const uint32_t h_m_norm = _uint32_srl( f_m_rounded, f_h_m_pos_offset );
|
379
|
+
const uint32_t h_em_norm = _uint32_or( h_e_norm, h_m_norm );
|
380
|
+
const uint32_t is_h_ndenorm_msb = _uint32_sub( f_h_bias_offset, f_e_amount );
|
381
|
+
const uint32_t is_f_e_flagged_msb = _uint32_sub( f_h_e_biased_flag, f_e_half_bias );
|
382
|
+
const uint32_t is_h_denorm_msb = _uint32_not( is_h_ndenorm_msb );
|
383
|
+
const uint32_t is_f_m_eqz_msb = _uint32_dec( f_m );
|
384
|
+
const uint32_t is_h_nan_eqz_msb = _uint32_dec( m_nan );
|
385
|
+
const uint32_t is_f_inf_msb = _uint32_and( is_f_e_flagged_msb, is_f_m_eqz_msb );
|
386
|
+
const uint32_t is_f_nan_underflow_msb = _uint32_and( is_f_e_flagged_msb, is_h_nan_eqz_msb );
|
387
|
+
const uint32_t is_e_overflow_msb = _uint32_sub( h_e_mask_value, f_e_half_bias );
|
388
|
+
const uint32_t is_h_inf_msb = _uint32_or( is_e_overflow_msb, is_f_inf_msb );
|
389
|
+
const uint32_t is_f_nsnan_msb = _uint32_sub( f_snan, f_snan_mask );
|
390
|
+
const uint32_t is_m_norm_overflow_msb = _uint32_neg( f_m_rounded_overflow );
|
391
|
+
const uint32_t is_f_snan_msb = _uint32_not( is_f_nsnan_msb );
|
392
|
+
const uint32_t h_em_overflow_result = _uint32_sels( is_m_norm_overflow_msb, h_e_norm_overflow, h_em_norm );
|
393
|
+
const uint32_t h_em_nan_result = _uint32_sels( is_f_e_flagged_msb, h_em_nan, h_em_overflow_result );
|
394
|
+
const uint32_t h_em_nan_underflow_result = _uint32_sels( is_f_nan_underflow_msb, h_nan_min, h_em_nan_result );
|
395
|
+
const uint32_t h_em_inf_result = _uint32_sels( is_h_inf_msb, h_e_mask, h_em_nan_underflow_result );
|
396
|
+
const uint32_t h_em_denorm_result = _uint32_sels( is_h_denorm_msb, h_m_denorm, h_em_inf_result );
|
397
|
+
const uint32_t h_em_snan_result = _uint32_sels( is_f_snan_msb, h_snan_mask, h_em_denorm_result );
|
398
|
+
const uint32_t h_result = _uint32_or( h_s, h_em_snan_result );
|
399
|
+
|
400
|
+
return (uint16_t)(h_result);
|
401
|
+
}
|
402
|
+
|
403
|
+
uint32_t
|
404
|
+
pghalf_to_float( uint16_t h )
|
405
|
+
{
|
406
|
+
const uint32_t h_e_mask = _uint32_li( 0x00007e00 ); //bits 14-9
|
407
|
+
const uint32_t h_m_mask = _uint32_li( 0x000001ff ); //bits 8-0
|
408
|
+
const uint32_t h_s_mask = _uint32_li( 0x00008000 ); //bit 15
|
409
|
+
const uint32_t h_f_s_pos_offset = _uint32_li( 0x00000010 ); //f_s_pos - h_s_pos
|
410
|
+
const uint32_t h_f_e_pos_offset = _uint32_li( 0x0000000e ); //f_m_bitcount - h_m_bitcount
|
411
|
+
const uint32_t h_f_bias_offset = _uint32_li( 0x0000a000 ); //(f_bias - h_bias) << 9
|
412
|
+
const uint32_t f_e_mask = _uint32_li( 0x7f800000 ); //bits 30-23
|
413
|
+
const uint32_t f_m_mask = _uint32_li( 0x007fffff ); //bits 22-0
|
414
|
+
const uint32_t h_f_e_denorm_bias = _uint32_li( 0x0000005f ); //h_f_e_pos_offset + 1 + (f_bias - h_bias)
|
415
|
+
const uint32_t h_f_m_denorm_sa_bias = _uint32_li( 0x00000008 ); //float exp bit count
|
416
|
+
const uint32_t f_e_pos = _uint32_li( 0x00000017 ); //23
|
417
|
+
const uint32_t h_e_mask_minus_one = _uint32_li( 0x00007dff ); //h_e_mask + h_m_mask - 1<<h_e_pos
|
418
|
+
const uint32_t h_e = _uint32_and( h, h_e_mask );
|
419
|
+
const uint32_t h_m = _uint32_and( h, h_m_mask );
|
420
|
+
const uint32_t h_s = _uint32_and( h, h_s_mask );
|
421
|
+
const uint32_t h_e_f_bias = _uint32_add( h_e, h_f_bias_offset );
|
422
|
+
const uint32_t h_m_nlz = _uint32_cntlz( h_m );
|
423
|
+
const uint32_t f_s = _uint32_sll( h_s, h_f_s_pos_offset );
|
424
|
+
const uint32_t f_e = _uint32_sll( h_e_f_bias, h_f_e_pos_offset );
|
425
|
+
const uint32_t f_m = _uint32_sll( h_m, h_f_e_pos_offset );
|
426
|
+
const uint32_t f_em = _uint32_or( f_e, f_m );
|
427
|
+
const uint32_t h_f_m_sa = _uint32_sub( h_m_nlz, h_f_m_denorm_sa_bias );
|
428
|
+
const uint32_t f_e_denorm_unpacked = _uint32_sub( h_f_e_denorm_bias, h_f_m_sa );
|
429
|
+
const uint32_t h_f_m = _uint32_sll( h_m, h_f_m_sa );
|
430
|
+
const uint32_t f_m_denorm = _uint32_and( h_f_m, f_m_mask );
|
431
|
+
const uint32_t f_e_denorm = _uint32_sll( f_e_denorm_unpacked, f_e_pos );
|
432
|
+
const uint32_t f_em_denorm = _uint32_or( f_e_denorm, f_m_denorm );
|
433
|
+
const uint32_t f_em_nan = _uint32_or( f_e_mask, f_m );
|
434
|
+
const uint32_t is_e_eqz_msb = _uint32_dec( h_e );
|
435
|
+
const uint32_t is_m_nez_msb = _uint32_neg( h_m );
|
436
|
+
const uint32_t is_e_flagged_msb = _uint32_sub( h_e_mask_minus_one, h_e );
|
437
|
+
const uint32_t is_zero_msb = _uint32_andc( is_e_eqz_msb, is_m_nez_msb );
|
438
|
+
const uint32_t is_inf_msb = _uint32_andc( is_e_flagged_msb, is_m_nez_msb );
|
439
|
+
const uint32_t is_denorm_msb = _uint32_and( is_m_nez_msb, is_e_eqz_msb );
|
440
|
+
const uint32_t is_nan_msb = _uint32_and( is_e_flagged_msb, is_m_nez_msb );
|
441
|
+
const uint32_t is_zero = _uint32_ext( is_zero_msb );
|
442
|
+
const uint32_t f_zero_result = _uint32_andc( f_em, is_zero );
|
443
|
+
const uint32_t f_denorm_result = _uint32_sels( is_denorm_msb, f_em_denorm, f_zero_result );
|
444
|
+
const uint32_t f_inf_result = _uint32_sels( is_inf_msb, f_e_mask, f_denorm_result );
|
445
|
+
const uint32_t f_nan_result = _uint32_sels( is_nan_msb, f_em_nan, f_inf_result );
|
446
|
+
const uint32_t f_result = _uint32_or( f_s, f_nan_result );
|
447
|
+
|
448
|
+
return (f_result);
|
449
|
+
}
|