libbin 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/ext/libbin/half.h ADDED
@@ -0,0 +1,18 @@
1
+ #ifndef HALF_H
2
+ #define HALF_H
3
+
4
+ #include <stdint.h>
5
+
6
+ uint32_t half_to_float( uint16_t h );
7
+ uint16_t half_from_float( uint32_t f );
8
+ uint16_t half_add( uint16_t arg0, uint16_t arg1 );
9
+ uint16_t half_mul( uint16_t arg0, uint16_t arg1 );
10
+
11
+ static inline uint16_t
12
+ half_sub( uint16_t ha, uint16_t hb )
13
+ {
14
+ // (a-b) is the same as (a+(-b))
15
+ return half_add( ha, hb ^ 0x8000 );
16
+ }
17
+
18
+ #endif /* HALF_H */
@@ -0,0 +1,65 @@
1
+ #include "ruby.h"
2
+ #include "./half.h"
3
+ #include "./pghalf.h"
4
+
5
+ union float_u {
6
+ float f;
7
+ uint32_t i;
8
+ };
9
+
10
+ static VALUE pghalf_from_string_p(VALUE self, VALUE str, VALUE pack_str) {
11
+ Check_Type(str, T_STRING);
12
+ Check_Type(pack_str, T_STRING);
13
+ VALUE arr = rb_funcall(str, rb_intern("unpack"), 1, pack_str);
14
+ uint16_t val = NUM2USHORT(rb_funcall(arr, rb_intern("first"), 0));
15
+ union float_u res;
16
+
17
+ res.i = pghalf_to_float(val);
18
+ return DBL2NUM(res.f);
19
+ }
20
+
21
+ static VALUE half_from_string_p(VALUE self, VALUE str, VALUE pack_str) {
22
+ Check_Type(str, T_STRING);
23
+ Check_Type(pack_str, T_STRING);
24
+ VALUE arr = rb_funcall(str, rb_intern("unpack"), 1, pack_str);
25
+ uint16_t val = NUM2USHORT(rb_funcall(arr, rb_intern("first"), 0));
26
+ union float_u res;
27
+
28
+ res.i = half_to_float(val);
29
+ return DBL2NUM(res.f);
30
+ }
31
+
32
+ static VALUE pghalf_to_string_p(VALUE self, VALUE number, VALUE pack_str) {
33
+ Check_Type(number, T_FLOAT);
34
+ union float_u val;
35
+ uint16_t res;
36
+
37
+ val.f = NUM2DBL(number);
38
+ res = pghalf_from_float(val.i);
39
+ VALUE arr = rb_ary_new3(1, UINT2NUM(res) );
40
+
41
+ return rb_funcall(arr, rb_intern("pack"), 1, pack_str);
42
+ }
43
+
44
+ static VALUE half_to_string_p(VALUE self, VALUE number, VALUE pack_str) {
45
+ Check_Type(number, T_FLOAT);
46
+ union float_u val;
47
+ uint16_t res;
48
+
49
+ val.f = NUM2DBL(number);
50
+ res = half_from_float(val.i);
51
+ VALUE arr = rb_ary_new3(1, UINT2NUM(res) );
52
+
53
+ return rb_funcall(arr, rb_intern("pack"), 1, pack_str);
54
+ }
55
+
56
+ void Init_libbin_c() {
57
+ ID id;
58
+ VALUE mod;
59
+ id = rb_intern("LibBin");
60
+ mod = rb_const_get(rb_cObject, id);
61
+ rb_define_module_function(mod, "half_from_string", half_from_string_p, 2);
62
+ rb_define_module_function(mod, "half_to_string", half_to_string_p, 2);
63
+ rb_define_module_function(mod, "pghalf_from_string", pghalf_from_string_p, 2);
64
+ rb_define_module_function(mod, "pghalf_to_string", pghalf_to_string_p, 2);
65
+ }
@@ -0,0 +1,449 @@
1
+ // Branch-free implementation of half-precision (16 bit) floating point
2
+ // Copyright 2006 Mike Acton <macton@gmail.com>
3
+ // Copyright 2019 Brice Videau <brice.videau@gmail.com>
4
+ //
5
+ // Permission is hereby granted, free of charge, to any person obtaining a
6
+ // copy of this software and associated documentation files (the "Software"),
7
+ // to deal in the Software without restriction, including without limitation
8
+ // the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ // and/or sell copies of the Software, and to permit persons to whom the
10
+ // Software is furnished to do so, subject to the following conditions:
11
+ //
12
+ // The above copyright notice and this permission notice shall be included
13
+ // in all copies or substantial portions of the Software.
14
+ //
15
+ // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ // THE SOFTWARE
22
+ //
23
+ // PlatinumGames Half-precision floating point format
24
+ // ------------------------------------
25
+ //
26
+ // | Field | Last | First | Note
27
+ // |----------|------|-------|----------
28
+ // | Sign | 15 | 15 |
29
+ // | Exponent | 14 | 9 | Bias = 47
30
+ // | Mantissa | 8 | 0 |
31
+ //
32
+ // Compiling
33
+ // ---------
34
+ //
35
+ // Preferred compile flags for GCC:
36
+ // -O3 -fstrict-aliasing -std=c99 -pedantic -Wall -Wstrict-aliasing
37
+ //
38
+ // This file is a C99 source file, intended to be compiled with a C99
39
+ // compliant compiler. However, for the moment it remains combatible
40
+ // with C++98. Therefore if you are using a compiler that poorly implements
41
+ // C standards (e.g. MSVC), it may be compiled as C++. This is not
42
+ // guaranteed for future versions.
43
+ //
44
+
45
+ #include "pghalf.h"
46
+
47
+ // Load immediate
48
+ static inline uint32_t _uint32_li( uint32_t a )
49
+ {
50
+ return (a);
51
+ }
52
+
53
+ // Decrement
54
+ static inline uint32_t _uint32_dec( uint32_t a )
55
+ {
56
+ return (a - 1);
57
+ }
58
+
59
+ // Increment
60
+ static inline uint32_t _uint32_inc( uint32_t a )
61
+ {
62
+ return (a + 1);
63
+ }
64
+
65
+ // Complement
66
+ static inline uint32_t _uint32_not( uint32_t a )
67
+ {
68
+ return (~a);
69
+ }
70
+
71
+ // Negate
72
+ static inline uint32_t _uint32_neg( uint32_t a )
73
+ {
74
+ return (-a);
75
+ }
76
+
77
+ // Extend sign
78
+ static inline uint32_t _uint32_ext( uint32_t a )
79
+ {
80
+ return (((int32_t)a)>>31);
81
+ }
82
+
83
+ // And
84
+ static inline uint32_t _uint32_and( uint32_t a, uint32_t b )
85
+ {
86
+ return (a & b);
87
+ }
88
+
89
+ // Exclusive Or
90
+ static inline uint32_t _uint32_xor( uint32_t a, uint32_t b )
91
+ {
92
+ return (a ^ b);
93
+ }
94
+
95
+ // And with Complement
96
+ static inline uint32_t _uint32_andc( uint32_t a, uint32_t b )
97
+ {
98
+ return (a & ~b);
99
+ }
100
+
101
+ // Or
102
+ static inline uint32_t _uint32_or( uint32_t a, uint32_t b )
103
+ {
104
+ return (a | b);
105
+ }
106
+
107
+ // Shift Right Logical
108
+ static inline uint32_t _uint32_srl( uint32_t a, int sa )
109
+ {
110
+ return (a >> sa);
111
+ }
112
+
113
+ // Shift Left Logical
114
+ static inline uint32_t _uint32_sll( uint32_t a, int sa )
115
+ {
116
+ return (a << sa);
117
+ }
118
+
119
+ // Add
120
+ static inline uint32_t _uint32_add( uint32_t a, uint32_t b )
121
+ {
122
+ return (a + b);
123
+ }
124
+
125
+ // Subtract
126
+ static inline uint32_t _uint32_sub( uint32_t a, uint32_t b )
127
+ {
128
+ return (a - b);
129
+ }
130
+
131
+ // Multiply
132
+ static inline uint32_t _uint32_mul( uint32_t a, uint32_t b )
133
+ {
134
+ return (a * b);
135
+ }
136
+
137
+ // Select on Sign bit
138
+ static inline uint32_t _uint32_sels( uint32_t test, uint32_t a, uint32_t b )
139
+ {
140
+ const uint32_t mask = _uint32_ext( test );
141
+ const uint32_t sel_a = _uint32_and( a, mask );
142
+ const uint32_t sel_b = _uint32_andc( b, mask );
143
+ const uint32_t result = _uint32_or( sel_a, sel_b );
144
+
145
+ return (result);
146
+ }
147
+
148
+ // Select Bits on mask
149
+ static inline uint32_t _uint32_selb( uint32_t mask, uint32_t a, uint32_t b )
150
+ {
151
+ const uint32_t sel_a = _uint32_and( a, mask );
152
+ const uint32_t sel_b = _uint32_andc( b, mask );
153
+ const uint32_t result = _uint32_or( sel_a, sel_b );
154
+
155
+ return (result);
156
+ }
157
+
158
+ // Load Immediate
159
+ static inline uint16_t _uint16_li( uint16_t a )
160
+ {
161
+ return (a);
162
+ }
163
+
164
+ // Extend sign
165
+ static inline uint16_t _uint16_ext( uint16_t a )
166
+ {
167
+ return (((int16_t)a)>>15);
168
+ }
169
+
170
+ // Negate
171
+ static inline uint16_t _uint16_neg( uint16_t a )
172
+ {
173
+ return (-a);
174
+ }
175
+
176
+ // Complement
177
+ static inline uint16_t _uint16_not( uint16_t a )
178
+ {
179
+ return (~a);
180
+ }
181
+
182
+ // Decrement
183
+ static inline uint16_t _uint16_dec( uint16_t a )
184
+ {
185
+ return (a - 1);
186
+ }
187
+
188
+ // Shift Left Logical
189
+ static inline uint16_t _uint16_sll( uint16_t a, int sa )
190
+ {
191
+ return (a << sa);
192
+ }
193
+
194
+ // Shift Right Logical
195
+ static inline uint16_t _uint16_srl( uint16_t a, int sa )
196
+ {
197
+ return (a >> sa);
198
+ }
199
+
200
+ // Add
201
+ static inline uint16_t _uint16_add( uint16_t a, uint16_t b )
202
+ {
203
+ return (a + b);
204
+ }
205
+
206
+ // Subtract
207
+ static inline uint16_t _uint16_sub( uint16_t a, uint16_t b )
208
+ {
209
+ return (a - b);
210
+ }
211
+
212
+ // And
213
+ static inline uint16_t _uint16_and( uint16_t a, uint16_t b )
214
+ {
215
+ return (a & b);
216
+ }
217
+
218
+ // Or
219
+ static inline uint16_t _uint16_or( uint16_t a, uint16_t b )
220
+ {
221
+ return (a | b);
222
+ }
223
+
224
+ // Exclusive Or
225
+ static inline uint16_t _uint16_xor( uint16_t a, uint16_t b )
226
+ {
227
+ return (a ^ b);
228
+ }
229
+
230
+ // And with Complement
231
+ static inline uint16_t _uint16_andc( uint16_t a, uint16_t b )
232
+ {
233
+ return (a & ~b);
234
+ }
235
+
236
+ // And then Shift Right Logical
237
+ static inline uint16_t _uint16_andsrl( uint16_t a, uint16_t b, int sa )
238
+ {
239
+ return ((a & b) >> sa);
240
+ }
241
+
242
+ // Shift Right Logical then Mask
243
+ static inline uint16_t _uint16_srlm( uint16_t a, int sa, uint16_t mask )
244
+ {
245
+ return ((a >> sa) & mask);
246
+ }
247
+
248
+ // Add then Mask
249
+ static inline uint16_t _uint16_addm( uint16_t a, uint16_t b, uint16_t mask )
250
+ {
251
+ return ((a + b) & mask);
252
+ }
253
+
254
+
255
+ // Select on Sign bit
256
+ static inline uint16_t _uint16_sels( uint16_t test, uint16_t a, uint16_t b )
257
+ {
258
+ const uint16_t mask = _uint16_ext( test );
259
+ const uint16_t sel_a = _uint16_and( a, mask );
260
+ const uint16_t sel_b = _uint16_andc( b, mask );
261
+ const uint16_t result = _uint16_or( sel_a, sel_b );
262
+
263
+ return (result);
264
+ }
265
+
266
+ // Count Leading Zeros
267
+ static inline uint32_t _uint32_cntlz( uint32_t x )
268
+ {
269
+ #ifdef __GNUC__
270
+ /* NOTE: __builtin_clz is undefined for x == 0 */
271
+ /* On PowerPC, this will map to insn: cntlzw */
272
+ /* On Pentium, this will map to insn: clz */
273
+ uint32_t is_x_nez_msb = _uint32_neg( x );
274
+ uint32_t nlz = __builtin_clz( x );
275
+ uint32_t result = _uint32_sels( is_x_nez_msb, nlz, 0x00000020 );
276
+ return (result);
277
+ #else
278
+ const uint32_t x0 = _uint32_srl( x, 1 );
279
+ const uint32_t x1 = _uint32_or( x, x0 );
280
+ const uint32_t x2 = _uint32_srl( x1, 2 );
281
+ const uint32_t x3 = _uint32_or( x1, x2 );
282
+ const uint32_t x4 = _uint32_srl( x3, 4 );
283
+ const uint32_t x5 = _uint32_or( x3, x4 );
284
+ const uint32_t x6 = _uint32_srl( x5, 8 );
285
+ const uint32_t x7 = _uint32_or( x5, x6 );
286
+ const uint32_t x8 = _uint32_srl( x7, 16 );
287
+ const uint32_t x9 = _uint32_or( x7, x8 );
288
+ const uint32_t xA = _uint32_not( x9 );
289
+ const uint32_t xB = _uint32_srl( xA, 1 );
290
+ const uint32_t xC = _uint32_and( xB, 0x55555555 );
291
+ const uint32_t xD = _uint32_sub( xA, xC );
292
+ const uint32_t xE = _uint32_and( xD, 0x33333333 );
293
+ const uint32_t xF = _uint32_srl( xD, 2 );
294
+ const uint32_t x10 = _uint32_and( xF, 0x33333333 );
295
+ const uint32_t x11 = _uint32_add( xE, x10 );
296
+ const uint32_t x12 = _uint32_srl( x11, 4 );
297
+ const uint32_t x13 = _uint32_add( x11, x12 );
298
+ const uint32_t x14 = _uint32_and( x13, 0x0f0f0f0f );
299
+ const uint32_t x15 = _uint32_srl( x14, 8 );
300
+ const uint32_t x16 = _uint32_add( x14, x15 );
301
+ const uint32_t x17 = _uint32_srl( x16, 16 );
302
+ const uint32_t x18 = _uint32_add( x16, x17 );
303
+ const uint32_t x19 = _uint32_and( x18, 0x0000003f );
304
+ return ( x19 );
305
+ #endif
306
+ }
307
+
308
+ // Count Leading Zeros
309
+ static inline uint16_t _uint16_cntlz( uint16_t x )
310
+ {
311
+ #ifdef __GNUC__
312
+ uint16_t nlz32 = (uint16_t)_uint32_cntlz( (uint32_t)x );
313
+ uint32_t nlz = _uint32_sub( nlz32, 16 );
314
+ return (nlz);
315
+ #else
316
+ const uint16_t x0 = _uint16_srl( x, 1 );
317
+ const uint16_t x1 = _uint16_or( x, x0 );
318
+ const uint16_t x2 = _uint16_srl( x1, 2 );
319
+ const uint16_t x3 = _uint16_or( x1, x2 );
320
+ const uint16_t x4 = _uint16_srl( x3, 4 );
321
+ const uint16_t x5 = _uint16_or( x3, x4 );
322
+ const uint16_t x6 = _uint16_srl( x5, 8 );
323
+ const uint16_t x7 = _uint16_or( x5, x6 );
324
+ const uint16_t x8 = _uint16_not( x7 );
325
+ const uint16_t x9 = _uint16_srlm( x8, 1, 0x5555 );
326
+ const uint16_t xA = _uint16_sub( x8, x9 );
327
+ const uint16_t xB = _uint16_and( xA, 0x3333 );
328
+ const uint16_t xC = _uint16_srlm( xA, 2, 0x3333 );
329
+ const uint16_t xD = _uint16_add( xB, xC );
330
+ const uint16_t xE = _uint16_srl( xD, 4 );
331
+ const uint16_t xF = _uint16_addm( xD, xE, 0x0f0f );
332
+ const uint16_t x10 = _uint16_srl( xF, 8 );
333
+ const uint16_t x11 = _uint16_addm( xF, x10, 0x001f );
334
+ return ( x11 );
335
+ #endif
336
+ }
337
+
338
+ uint16_t
339
+ pghalf_from_float( uint32_t f )
340
+ {
341
+ const uint32_t one = _uint32_li( 0x00000001 );
342
+ const uint32_t f_s_mask = _uint32_li( 0x80000000 );
343
+ const uint32_t f_e_mask = _uint32_li( 0x7f800000 );
344
+ const uint32_t f_m_mask = _uint32_li( 0x007fffff );
345
+ const uint32_t f_m_hidden_bit = _uint32_li( 0x00800000 );
346
+ const uint32_t f_m_round_bit = _uint32_li( 0x00001000 );
347
+ const uint32_t f_snan_mask = _uint32_li( 0x7fc00000 );
348
+ const uint32_t f_e_pos = _uint32_li( 0x00000017 );
349
+ const uint32_t h_e_pos = _uint32_li( 0x00000009 );
350
+ const uint32_t h_e_mask = _uint32_li( 0x00007e00 );
351
+ const uint32_t h_snan_mask = _uint32_li( 0x00007f00 );
352
+ const uint32_t h_e_mask_value = _uint32_li( 0x0000003f );
353
+ const uint32_t f_h_s_pos_offset = _uint32_li( 0x00000010 );
354
+ const uint32_t f_h_bias_offset = _uint32_li( 0x00000050 );
355
+ const uint32_t f_h_m_pos_offset = _uint32_li( 0x0000000e );
356
+ const uint32_t h_nan_min = _uint32_li( 0x00007e01 );
357
+ const uint32_t f_h_e_biased_flag = _uint32_li( 0x000000af );
358
+ const uint32_t f_s = _uint32_and( f, f_s_mask );
359
+ const uint32_t f_e = _uint32_and( f, f_e_mask );
360
+ const uint16_t h_s = _uint32_srl( f_s, f_h_s_pos_offset );
361
+ const uint32_t f_m = _uint32_and( f, f_m_mask );
362
+ const uint16_t f_e_amount = _uint32_srl( f_e, f_e_pos );
363
+ const uint32_t f_e_half_bias = _uint32_sub( f_e_amount, f_h_bias_offset );
364
+ const uint32_t f_snan = _uint32_and( f, f_snan_mask );
365
+ const uint32_t f_m_round_mask = _uint32_and( f_m, f_m_round_bit );
366
+ const uint32_t f_m_round_offset = _uint32_sll( f_m_round_mask, one );
367
+ const uint32_t f_m_rounded = _uint32_add( f_m, f_m_round_offset );
368
+ const uint32_t f_m_denorm_sa = _uint32_sub( one, f_e_half_bias );
369
+ const uint32_t f_m_with_hidden = _uint32_or( f_m_rounded, f_m_hidden_bit );
370
+ const uint32_t f_m_denorm = _uint32_srl( f_m_with_hidden, f_m_denorm_sa );
371
+ const uint32_t h_m_denorm = _uint32_srl( f_m_denorm, f_h_m_pos_offset );
372
+ const uint32_t f_m_rounded_overflow = _uint32_and( f_m_rounded, f_m_hidden_bit );
373
+ const uint32_t m_nan = _uint32_srl( f_m, f_h_m_pos_offset );
374
+ const uint32_t h_em_nan = _uint32_or( h_e_mask, m_nan );
375
+ const uint32_t h_e_norm_overflow_offset = _uint32_inc( f_e_half_bias );
376
+ const uint32_t h_e_norm_overflow = _uint32_sll( h_e_norm_overflow_offset, h_e_pos );
377
+ const uint32_t h_e_norm = _uint32_sll( f_e_half_bias, h_e_pos );
378
+ const uint32_t h_m_norm = _uint32_srl( f_m_rounded, f_h_m_pos_offset );
379
+ const uint32_t h_em_norm = _uint32_or( h_e_norm, h_m_norm );
380
+ const uint32_t is_h_ndenorm_msb = _uint32_sub( f_h_bias_offset, f_e_amount );
381
+ const uint32_t is_f_e_flagged_msb = _uint32_sub( f_h_e_biased_flag, f_e_half_bias );
382
+ const uint32_t is_h_denorm_msb = _uint32_not( is_h_ndenorm_msb );
383
+ const uint32_t is_f_m_eqz_msb = _uint32_dec( f_m );
384
+ const uint32_t is_h_nan_eqz_msb = _uint32_dec( m_nan );
385
+ const uint32_t is_f_inf_msb = _uint32_and( is_f_e_flagged_msb, is_f_m_eqz_msb );
386
+ const uint32_t is_f_nan_underflow_msb = _uint32_and( is_f_e_flagged_msb, is_h_nan_eqz_msb );
387
+ const uint32_t is_e_overflow_msb = _uint32_sub( h_e_mask_value, f_e_half_bias );
388
+ const uint32_t is_h_inf_msb = _uint32_or( is_e_overflow_msb, is_f_inf_msb );
389
+ const uint32_t is_f_nsnan_msb = _uint32_sub( f_snan, f_snan_mask );
390
+ const uint32_t is_m_norm_overflow_msb = _uint32_neg( f_m_rounded_overflow );
391
+ const uint32_t is_f_snan_msb = _uint32_not( is_f_nsnan_msb );
392
+ const uint32_t h_em_overflow_result = _uint32_sels( is_m_norm_overflow_msb, h_e_norm_overflow, h_em_norm );
393
+ const uint32_t h_em_nan_result = _uint32_sels( is_f_e_flagged_msb, h_em_nan, h_em_overflow_result );
394
+ const uint32_t h_em_nan_underflow_result = _uint32_sels( is_f_nan_underflow_msb, h_nan_min, h_em_nan_result );
395
+ const uint32_t h_em_inf_result = _uint32_sels( is_h_inf_msb, h_e_mask, h_em_nan_underflow_result );
396
+ const uint32_t h_em_denorm_result = _uint32_sels( is_h_denorm_msb, h_m_denorm, h_em_inf_result );
397
+ const uint32_t h_em_snan_result = _uint32_sels( is_f_snan_msb, h_snan_mask, h_em_denorm_result );
398
+ const uint32_t h_result = _uint32_or( h_s, h_em_snan_result );
399
+
400
+ return (uint16_t)(h_result);
401
+ }
402
+
403
+ uint32_t
404
+ pghalf_to_float( uint16_t h )
405
+ {
406
+ const uint32_t h_e_mask = _uint32_li( 0x00007e00 );
407
+ const uint32_t h_m_mask = _uint32_li( 0x000001ff );
408
+ const uint32_t h_s_mask = _uint32_li( 0x00008000 );
409
+ const uint32_t h_f_s_pos_offset = _uint32_li( 0x00000010 );
410
+ const uint32_t h_f_e_pos_offset = _uint32_li( 0x0000000e );
411
+ const uint32_t h_f_bias_offset = _uint32_li( 0x0000a000 );
412
+ const uint32_t f_e_mask = _uint32_li( 0x7f800000 );
413
+ const uint32_t f_m_mask = _uint32_li( 0x007fffff );
414
+ const uint32_t h_f_e_denorm_bias = _uint32_li( 0x0000005f );
415
+ const uint32_t h_f_m_denorm_sa_bias = _uint32_li( 0x00000008 );
416
+ const uint32_t f_e_pos = _uint32_li( 0x00000017 );
417
+ const uint32_t h_e_mask_minus_one = _uint32_li( 0x00007dff );
418
+ const uint32_t h_e = _uint32_and( h, h_e_mask );
419
+ const uint32_t h_m = _uint32_and( h, h_m_mask );
420
+ const uint32_t h_s = _uint32_and( h, h_s_mask );
421
+ const uint32_t h_e_f_bias = _uint32_add( h_e, h_f_bias_offset );
422
+ const uint32_t h_m_nlz = _uint32_cntlz( h_m );
423
+ const uint32_t f_s = _uint32_sll( h_s, h_f_s_pos_offset );
424
+ const uint32_t f_e = _uint32_sll( h_e_f_bias, h_f_e_pos_offset );
425
+ const uint32_t f_m = _uint32_sll( h_m, h_f_e_pos_offset );
426
+ const uint32_t f_em = _uint32_or( f_e, f_m );
427
+ const uint32_t h_f_m_sa = _uint32_sub( h_m_nlz, h_f_m_denorm_sa_bias );
428
+ const uint32_t f_e_denorm_unpacked = _uint32_sub( h_f_e_denorm_bias, h_f_m_sa );
429
+ const uint32_t h_f_m = _uint32_sll( h_m, h_f_m_sa );
430
+ const uint32_t f_m_denorm = _uint32_and( h_f_m, f_m_mask );
431
+ const uint32_t f_e_denorm = _uint32_sll( f_e_denorm_unpacked, f_e_pos );
432
+ const uint32_t f_em_denorm = _uint32_or( f_e_denorm, f_m_denorm );
433
+ const uint32_t f_em_nan = _uint32_or( f_e_mask, f_m );
434
+ const uint32_t is_e_eqz_msb = _uint32_dec( h_e );
435
+ const uint32_t is_m_nez_msb = _uint32_neg( h_m );
436
+ const uint32_t is_e_flagged_msb = _uint32_sub( h_e_mask_minus_one, h_e );
437
+ const uint32_t is_zero_msb = _uint32_andc( is_e_eqz_msb, is_m_nez_msb );
438
+ const uint32_t is_inf_msb = _uint32_andc( is_e_flagged_msb, is_m_nez_msb );
439
+ const uint32_t is_denorm_msb = _uint32_and( is_m_nez_msb, is_e_eqz_msb );
440
+ const uint32_t is_nan_msb = _uint32_and( is_e_flagged_msb, is_m_nez_msb );
441
+ const uint32_t is_zero = _uint32_ext( is_zero_msb );
442
+ const uint32_t f_zero_result = _uint32_andc( f_em, is_zero );
443
+ const uint32_t f_denorm_result = _uint32_sels( is_denorm_msb, f_em_denorm, f_zero_result );
444
+ const uint32_t f_inf_result = _uint32_sels( is_inf_msb, f_e_mask, f_denorm_result );
445
+ const uint32_t f_nan_result = _uint32_sels( is_nan_msb, f_em_nan, f_inf_result );
446
+ const uint32_t f_result = _uint32_or( f_s, f_nan_result );
447
+
448
+ return (f_result);
449
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef PGHALF_H
2
+ #define PGHALF_H
3
+
4
+ #include <stdint.h>
5
+
6
+ uint32_t pghalf_to_float( uint16_t h );
7
+ uint16_t pghalf_from_float( uint32_t f );
8
+
9
+ #endif /* PGHALF_H */
@@ -218,18 +218,18 @@ module LibBin
218
218
  :E => l["E"],
219
219
  :G => l["G"],
220
220
  :"a*" => l["a*"],
221
- :half => [ lambda { |str| Flt::IEEE_binary16_BE::from_bytes(str).to(Float) },
222
- lambda { |v| Flt::IEEE_binary16_BE::new(v).to_bytes } ],
223
- :half_le => [ lambda { |str| Flt::IEEE_binary16_LE::from_bytes(str).to(Float) },
224
- lambda { |v| Flt::IEEE_binary16_LE::new(v).to_bytes } ],
225
- :half_be => [ lambda { |str| Flt::IEEE_binary16_BE::from_bytes(str).to(Float) },
226
- lambda { |v| Flt::IEEE_binary16_BE::new(v).to_bytes } ],
227
- :pghalf => [ lambda { |str| Flt::IEEE_binary16_pg_BE::from_bytes(str).to(Float) },
228
- lambda { |v| Flt::IEEE_binary16_pg_BE::new(v).to_bytes } ],
229
- :pghalf_le => [ lambda { |str| Flt::IEEE_binary16_pg_LE::from_bytes(str).to(Float) },
230
- lambda { |v| Flt::IEEE_binary16_pg_LE::new(v).to_bytes } ],
231
- :pghalf_be => [ lambda { |str| Flt::IEEE_binary16_pg_BE::from_bytes(str).to(Float) },
232
- lambda { |v| Flt::IEEE_binary16_pg_BE::new(v).to_bytes } ]
221
+ :half => [ lambda { |str| LibBin::half_from_string(str, "S>") },
222
+ lambda { |v| LibBin::half_to_string(v, "S>") } ],
223
+ :half_le => [ lambda { |str| LibBin::half_from_string(str, "S<") },
224
+ lambda { |v| LibBin::half_to_string(v, "S<") } ],
225
+ :half_be => [ lambda { |str| LibBin::half_from_string(str, "S>") },
226
+ lambda { |v| LibBin::half_to_string(v, "S>") } ],
227
+ :pghalf => [ lambda { |str| LibBin::pghalf_from_string(str, "S>") },
228
+ lambda { |v| LibBin::pghalf_to_string(v, "S>") } ],
229
+ :pghalf_le => [ lambda { |str| LibBin::pghalf_from_string(str, "S<") },
230
+ lambda { |v| LibBin::pghalf_to_string(v, "S<") } ],
231
+ :pghalf_be => [ lambda { |str| LibBin::pghalf_from_string(str, "S>") },
232
+ lambda { |v| LibBin::pghalf_to_string(v, "S>") } ]
233
233
  } )
234
234
  DATA_ENDIAN[false].merge!( {
235
235
  :c => l["c"],
@@ -263,18 +263,18 @@ module LibBin
263
263
  :E => l["E"],
264
264
  :G => l["G"],
265
265
  :"a*" => l["a*"],
266
- :half => [ lambda { |str| Flt::IEEE_binary16::from_bytes(str).to(Float) },
267
- lambda { |v| Flt::IEEE_binary16::new(v).to_bytes } ],
268
- :half_le => [ lambda { |str| Flt::IEEE_binary16_LE::from_bytes(str).to(Float) },
269
- lambda { |v| Flt::IEEE_binary16_LE::new(v).to_bytes } ],
270
- :half_be => [ lambda { |str| Flt::IEEE_binary16_BE::from_bytes(str).to(Float) },
271
- lambda { |v| Flt::IEEE_binary16_BE::new(v).to_bytes } ],
272
- :pghalf => [ lambda { |str| Flt::IEEE_binary16_pg::from_bytes(str).to(Float) },
273
- lambda { |v| Flt::IEEE_binary16_pg::new(v).to_bytes } ],
274
- :pghalf_le => [ lambda { |str| Flt::IEEE_binary16_pg_LE::from_bytes(str).to(Float) },
275
- lambda { |v| Flt::IEEE_binary16_pg_LE::new(v).to_bytes } ],
276
- :pghalf_be => [ lambda { |str| Flt::IEEE_binary16_pg_BE::from_bytes(str).to(Float) },
277
- lambda { |v| Flt::IEEE_binary16_pg_BE::new(v).to_bytes } ]
266
+ :half => [ lambda { |str| LibBin::half_from_string(str, "S<") },
267
+ lambda { |v| LibBin::half_to_string(v, "S<") } ],
268
+ :half_le => [ lambda { |str| LibBin::half_from_string(str, "S<") },
269
+ lambda { |v| LibBin::half_to_string(v, "S<") } ],
270
+ :half_be => [ lambda { |str| LibBin::half_from_string(str, "S>") },
271
+ lambda { |v| LibBin::half_to_string(v, "S>") } ],
272
+ :pghalf => [ lambda { |str| LibBin::pghalf_from_string(str, "S<") },
273
+ lambda { |v| LibBin::pghalf_to_string(v, "S<") } ],
274
+ :pghalf_le => [ lambda { |str| LibBin::pghalf_to_string(v, "S<") },
275
+ lambda { |v| LibBin::pghalf_to_string(v, "S<") } ],
276
+ :pghalf_be => [ lambda { |str| LibBin::pghalf_to_string(v, "S>") },
277
+ lambda { |v| LibBin::pghalf_to_string(v, "S>") } ]
278
278
  } )
279
279
 
280
280
 
@@ -292,7 +292,7 @@ module LibBin
292
292
  @symbol = symbol
293
293
  @size = DATA_SIZES[symbol]
294
294
  @rl_be, @sl_be = DATA_ENDIAN[true][symbol]
295
- @rl_le, @sl_be = DATA_ENDIAN[false][symbol]
295
+ @rl_le, @sl_le = DATA_ENDIAN[false][symbol]
296
296
  end
297
297
 
298
298
  def self.load(input, input_big = LibBin::default_big?, _ = nil, _ = nil)
@@ -305,7 +305,7 @@ module LibBin
305
305
  output.write(str)
306
306
  end
307
307
 
308
- def self.convert(input, output, input_big = LibBin::default_big?, output_big = !LibBin::default_big, _ = nil, _ = nil)
308
+ def self.convert(input, output, input_big = LibBin::default_big?, output_big = !input_big, _ = nil, _ = nil)
309
309
  str = input.read(@size)
310
310
  value = (input_big ? @rl_be[str] : @rl_le[str])
311
311
  str = (output_big ? @sl_be[value] : @sl_le[value])
data/lib/libbin.rb CHANGED
@@ -1,10 +1,6 @@
1
- warn_level = $VERBOSE
2
- $VERBOSE = nil
3
- require 'float-formats'
4
- $VERBOSE = warn_level
5
-
6
- Flt::IEEE.binary :IEEE_binary16_pg, significand: 9, exponent: 6, bias: 47
7
- Flt::IEEE.binary :IEEE_binary16_pg_BE, significand: 9, exponent: 6, bias: 47, endianness: :big_endian
1
+ module LibBin
2
+ end
3
+ require "libbin_c.so"
8
4
 
9
5
  require_relative 'libbin/alignment'
10
6
  require_relative 'libbin/data_types'
data/libbin.gemspec CHANGED
@@ -1,12 +1,13 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'libbin'
3
- s.version = "1.0.0"
3
+ s.version = "1.0.1"
4
4
  s.author = "Brice Videau"
5
5
  s.email = "brice.videau@imag.fr"
6
6
  s.homepage = "https://github.com/kerilk/libbin"
7
7
  s.summary = "Library for loading and converting binary files"
8
8
  s.description = "Read, write and convert Binary data in Ruby."
9
- s.files = Dir[ 'libbin.gemspec', 'LICENSE', 'lib/**/*.rb' ]
9
+ s.files = Dir[ 'libbin.gemspec', 'LICENSE', 'lib/**/*.rb', 'ext/libbin/extconf.rb', 'ext/libbin/*.c', 'ext/libbin/*.h' ]
10
+ s.extensions << 'ext/libbin/extconf.rb'
10
11
  s.has_rdoc = false
11
12
  s.license = 'BSD-2-Clause'
12
13
  s.required_ruby_version = '>= 2.0.0'