crmf 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -0
- data/crmf.gemspec +102 -1
- data/ext/crlibm-1.0beta5/AUTHORS +2 -0
- data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
- data/ext/crlibm-1.0beta5/COPYING +340 -0
- data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
- data/ext/crlibm-1.0beta5/ChangeLog +125 -0
- data/ext/crlibm-1.0beta5/Makefile.am +134 -0
- data/ext/crlibm-1.0beta5/NEWS +0 -0
- data/ext/crlibm-1.0beta5/README +31 -0
- data/ext/crlibm-1.0beta5/README.DEV +23 -0
- data/ext/crlibm-1.0beta5/README.md +5 -0
- data/ext/crlibm-1.0beta5/TODO +66 -0
- data/ext/crlibm-1.0beta5/VERSION +1 -0
- data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
- data/ext/crlibm-1.0beta5/acos-td.h +629 -0
- data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
- data/ext/crlibm-1.0beta5/asin-td.h +620 -0
- data/ext/crlibm-1.0beta5/asincos.c +4488 -0
- data/ext/crlibm-1.0beta5/asincos.h +575 -0
- data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
- data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
- data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
- data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
- data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
- data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
- data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
- data/ext/crlibm-1.0beta5/configure.ac +419 -0
- data/ext/crlibm-1.0beta5/crlibm.h +204 -0
- data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
- data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
- data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
- data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
- data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
- data/ext/crlibm-1.0beta5/double-extended.h +496 -0
- data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
- data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
- data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
- data/ext/crlibm-1.0beta5/exp-td.h +685 -0
- data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
- data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
- data/ext/crlibm-1.0beta5/expm1.c +2515 -0
- data/ext/crlibm-1.0beta5/expm1.h +715 -0
- data/ext/crlibm-1.0beta5/interval.h +238 -0
- data/ext/crlibm-1.0beta5/log-de.c +480 -0
- data/ext/crlibm-1.0beta5/log-de.h +747 -0
- data/ext/crlibm-1.0beta5/log-de2.c +280 -0
- data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
- data/ext/crlibm-1.0beta5/log-td.c +1158 -0
- data/ext/crlibm-1.0beta5/log-td.h +819 -0
- data/ext/crlibm-1.0beta5/log.c +2244 -0
- data/ext/crlibm-1.0beta5/log.h +1592 -0
- data/ext/crlibm-1.0beta5/log10-td.c +906 -0
- data/ext/crlibm-1.0beta5/log10-td.h +823 -0
- data/ext/crlibm-1.0beta5/log1p.c +1295 -0
- data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
- data/ext/crlibm-1.0beta5/log2-td.h +821 -0
- data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
- data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
- data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
- data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
- data/ext/crlibm-1.0beta5/log_fast.c +360 -0
- data/ext/crlibm-1.0beta5/log_fast.h +440 -0
- data/ext/crlibm-1.0beta5/pow.c +1396 -0
- data/ext/crlibm-1.0beta5/pow.h +3101 -0
- data/ext/crlibm-1.0beta5/prepare +20 -0
- data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
- data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
- data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
- data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
- data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
- data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
- data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
- data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
- data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
- data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
- data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
- data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
- data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
- data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
- data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
- data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
- data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
- data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
- data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
- data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
- data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
- data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
- data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
- data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
- data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
- data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
- data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
- data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
- data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
- data/ext/crlibm-1.0beta5/trigpi.h +556 -0
- data/ext/crlibm-1.0beta5/triple-double.c +57 -0
- data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
- data/ext/crmf/crmf.c +16 -16
- data/ext/crmf/extconf.rb +12 -8
- data/lib/crmf/version.rb +1 -1
- data/tests/perf.rb +100 -219
- metadata +104 -3
- data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,496 @@
|
|
1
|
+
#ifndef __DOUBLE_EXTENDED_H
|
2
|
+
#define __DOUBLE_EXTENDED_H
|
3
|
+
|
4
|
+
#if (defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64))
|
5
|
+
#include <fpu_control.h>
|
6
|
+
typedef long double double_ext;
|
7
|
+
#endif
|
8
|
+
#if defined(CRLIBM_TYPECPU_ITANIUM) && defined(__INTEL_COMPILER)
|
9
|
+
typedef __fpreg double_ext;
|
10
|
+
#endif
|
11
|
+
|
12
|
+
|
13
|
+
/* For debugging */
|
14
|
+
typedef union {
|
15
|
+
int i[3];
|
16
|
+
long double d;
|
17
|
+
} db_ext_number;
|
18
|
+
|
19
|
+
|
20
|
+
#define DE_EXP 2
|
21
|
+
#define DE_MANTISSA_HI 1
|
22
|
+
#define DE_MANTISSA_LO 0
|
23
|
+
|
24
|
+
#define print_ext(_s, _y) {\
|
25
|
+
db_ext_number _yy; _yy.d=_y; \
|
26
|
+
printf("%s %04x %08x %08x \n",_s, 0xffff&_yy.i[DE_EXP], _yy.i[DE_MANTISSA_HI], _yy.i[DE_MANTISSA_LO]); \
|
27
|
+
}
|
28
|
+
|
29
|
+
/**************************************************************************************/
|
30
|
+
/*********************************Rounding tests***************************************/
|
31
|
+
/**************************************************************************************/
|
32
|
+
|
33
|
+
|
34
|
+
/* These test work by observing the bits of your double-extended after the 53rd.
|
35
|
+
|
36
|
+
mask should be 7ff if you trust your 64 bits (hum)
|
37
|
+
7fe if you trust 63 (if you have proven that maxepsilon<2^(-63) )
|
38
|
+
7fc 62
|
39
|
+
7f8 61
|
40
|
+
7f0 60 etc
|
41
|
+
*/
|
42
|
+
|
43
|
+
/* Mask constants for rounding test */
|
44
|
+
|
45
|
+
#define ACCURATE_TO_64_BITS 0x7ff
|
46
|
+
#define ACCURATE_TO_63_BITS 0x7fe
|
47
|
+
#define ACCURATE_TO_62_BITS 0x7fc
|
48
|
+
#define ACCURATE_TO_61_BITS 0x7f8
|
49
|
+
#define ACCURATE_TO_60_BITS 0x7f0
|
50
|
+
#define ACCURATE_TO_59_BITS 0x7e0
|
51
|
+
|
52
|
+
|
53
|
+
#if (defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64))
|
54
|
+
|
55
|
+
static const unsigned short RN_Double = (_FPU_DEFAULT & ~_FPU_EXTENDED)|_FPU_DOUBLE;
|
56
|
+
static const unsigned short RN_DoubleDown = (_FPU_DEFAULT & ~_FPU_EXTENDED & ~_FPU_RC_NEAREST)|_FPU_DOUBLE | _FPU_RC_DOWN;
|
57
|
+
static const unsigned short RN_DoubleUp = (_FPU_DEFAULT & ~_FPU_EXTENDED & ~_FPU_RC_NEAREST)|_FPU_DOUBLE | _FPU_RC_UP;
|
58
|
+
static const unsigned short RN_DoubleExt = _FPU_DEFAULT;
|
59
|
+
|
60
|
+
#define DOUBLE_EXTENDED_MODE _FPU_SETCW(RN_DoubleExt)
|
61
|
+
#define DOUBLE_UP_MODE _FPU_SETCW(RN_DoubleUp)
|
62
|
+
#define DOUBLE_DOWN_MODE _FPU_SETCW(RN_DoubleDown)
|
63
|
+
#define BACK_TO_DOUBLE_MODE _FPU_SETCW(RN_Double)
|
64
|
+
|
65
|
+
|
66
|
+
/*
|
67
|
+
Two rounding tests to the nearest. On Pentium 3, gcc3.3, the second
|
68
|
+
is faster by 12 cycles (and also improves the worst-case time by 60
|
69
|
+
cycles since it doesn't switch processor rounding mode in this
|
70
|
+
case). However it uses a coarser error estimation.
|
71
|
+
*/
|
72
|
+
|
73
|
+
#define DE_TEST_AND_RETURN_RN_ZIV(y,rncst) \
|
74
|
+
{ double yh, yl; \
|
75
|
+
yh = (double) y; \
|
76
|
+
yl = y-yh; \
|
77
|
+
BACK_TO_DOUBLE_MODE; \
|
78
|
+
if(yh==yh + yl*rncst) return yh; \
|
79
|
+
DOUBLE_EXTENDED_MODE; \
|
80
|
+
}
|
81
|
+
|
82
|
+
|
83
|
+
#define DE_TEST_AND_RETURN_RN(_y, _mask) \
|
84
|
+
{ \
|
85
|
+
db_ext_number _z; double _yd; \
|
86
|
+
int _lo; \
|
87
|
+
_z.d = _y; \
|
88
|
+
_yd = (double) _y; \
|
89
|
+
_lo = _z.i[DE_MANTISSA_LO] &(_mask); \
|
90
|
+
if((_lo!=(0x3ff&(_mask))) && (_lo!= (0x400&(_mask)))) { \
|
91
|
+
BACK_TO_DOUBLE_MODE; \
|
92
|
+
return _yd; \
|
93
|
+
} \
|
94
|
+
}
|
95
|
+
|
96
|
+
|
97
|
+
#define DE_TEST_AND_RETURN_RD(_y, _mask) \
|
98
|
+
{ \
|
99
|
+
double _result; int _bits; \
|
100
|
+
db_ext_number _z; \
|
101
|
+
_z.d = _y; \
|
102
|
+
DOUBLE_DOWN_MODE; \
|
103
|
+
_bits = _z.i[DE_MANTISSA_LO] &(_mask); \
|
104
|
+
_result = (double)(_y); \
|
105
|
+
if( (_bits != (0xfff&(_mask))) && (_bits != (0x000&(_mask))) ) { \
|
106
|
+
BACK_TO_DOUBLE_MODE; \
|
107
|
+
return _result; \
|
108
|
+
} \
|
109
|
+
DOUBLE_EXTENDED_MODE; \
|
110
|
+
}
|
111
|
+
#define DE_TEST_AND_RETURN_RU(_y, _mask) \
|
112
|
+
{ \
|
113
|
+
double _result; int _bits; \
|
114
|
+
db_ext_number _z; \
|
115
|
+
_z.d = _y; \
|
116
|
+
DOUBLE_UP_MODE; \
|
117
|
+
_bits = _z.i[DE_MANTISSA_LO] &(_mask); \
|
118
|
+
_result = (double)(_y); \
|
119
|
+
if( (_bits != (0xfff&(_mask))) && (_bits != (0x000&(_mask))) ) { \
|
120
|
+
BACK_TO_DOUBLE_MODE; \
|
121
|
+
return _result; \
|
122
|
+
} \
|
123
|
+
DOUBLE_EXTENDED_MODE; \
|
124
|
+
}
|
125
|
+
|
126
|
+
|
127
|
+
/* Use this one if you want a final computation step to overlap with
|
128
|
+
the rounding test. Examples: multiplication by a sign or by a power of 2 */
|
129
|
+
|
130
|
+
#define DE_TEST_AND_RETURN_RN2(_ytest, _yreturn, _mask) \
|
131
|
+
{ \
|
132
|
+
db_ext_number _z; double _y_return_d; \
|
133
|
+
int _bits; \
|
134
|
+
_z.d = _ytest; \
|
135
|
+
_y_return_d = (double) (_yreturn); \
|
136
|
+
_bits = _z.i[DE_MANTISSA_LO] &(_mask); \
|
137
|
+
if((_bits!=(0x3ff&(_mask))) && (_bits!= (0x400&(_mask)))) {\
|
138
|
+
BACK_TO_DOUBLE_MODE; \
|
139
|
+
return _y_return_d; \
|
140
|
+
} \
|
141
|
+
}
|
142
|
+
|
143
|
+
|
144
|
+
|
145
|
+
|
146
|
+
/* Slow macros with two changes of precision, but who cares, they are
|
147
|
+
used at the end of the second step */
|
148
|
+
#define RETURN_SUM_ROUNDED_DOWN(_rh, _rl) {\
|
149
|
+
double _result; \
|
150
|
+
DOUBLE_DOWN_MODE; \
|
151
|
+
_result = (_rh+_rl); \
|
152
|
+
BACK_TO_DOUBLE_MODE; \
|
153
|
+
return _result; \
|
154
|
+
}
|
155
|
+
|
156
|
+
#define RETURN_SUM_ROUNDED_UP(_rh, _rl) {\
|
157
|
+
double _result; \
|
158
|
+
DOUBLE_UP_MODE; \
|
159
|
+
_result = (_rh+_rl); \
|
160
|
+
BACK_TO_DOUBLE_MODE; \
|
161
|
+
return _result; \
|
162
|
+
}
|
163
|
+
|
164
|
+
#else /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
|
165
|
+
|
166
|
+
|
167
|
+
|
168
|
+
|
169
|
+
|
170
|
+
#if !defined(CRLIBM_TYPECPU_ITANIUM)
|
171
|
+
#error "This file should be compiled only for IA32 or IA64 architecture "
|
172
|
+
#endif
|
173
|
+
|
174
|
+
/* TODO Add what it takes to compile under HP-UX */
|
175
|
+
#if !defined(__INTEL_COMPILER)
|
176
|
+
#error "Use icc, version 8.1 or higher to compile for IA64 architecture"
|
177
|
+
#endif
|
178
|
+
|
179
|
+
|
180
|
+
#define DOUBLE_EXTENDED_MODE {}
|
181
|
+
#define BACK_TO_DOUBLE_MODE {}
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
|
186
|
+
|
187
|
+
#define DE_TEST_AND_RETURN_RN(_y, _mask) \
|
188
|
+
{ uint64_t _mantissa, _bits; double _yd; \
|
189
|
+
_yd = _Asm_fma(2/*_FR_D*/, 1.0, _y, 0.0, 0/*SF0*/); \
|
190
|
+
_mantissa = _Asm_getf(4/*_FR_SIG*/, _y); \
|
191
|
+
_bits = _mantissa & (0x7ff&(_mask)); \
|
192
|
+
if(__builtin_expect( \
|
193
|
+
(_bits!=(0x3ff&(_mask))) && (_bits != (0x400&(_mask))), \
|
194
|
+
1+1==2)) \
|
195
|
+
return _yd; \
|
196
|
+
}
|
197
|
+
|
198
|
+
|
199
|
+
/* Slower by 5 cycles as of 2005... Let us keep it, you never know */
|
200
|
+
#define DE_TEST_AND_RETURN_RN_ZIV(y,rncst) \
|
201
|
+
{ double yh, yl; \
|
202
|
+
yh = (double) y; \
|
203
|
+
yl = y-yh; \
|
204
|
+
if(__builtin_expect(yh == yh + yl*rncst, 1+1==2)) return yh; \
|
205
|
+
}
|
206
|
+
|
207
|
+
/* Use this one if you want a final computation step to overlap with
|
208
|
+
the rounding test. Examples: multiplication by a sign or by a power of 2 */
|
209
|
+
|
210
|
+
#define DE_TEST_AND_RETURN_RN2(_ytest, _yreturn, _mask) \
|
211
|
+
{ uint64_t _mantissa, _bits; \
|
212
|
+
_mantissa = _Asm_getf(4/*_FR_SIG*/, _ytest); \
|
213
|
+
_bits = _mantissa & (0x7ff&(_mask)); \
|
214
|
+
if(__builtin_expect( \
|
215
|
+
(_bits!=(0x3ff&(_mask))) && (_bits != (0x400&(_mask))), \
|
216
|
+
1+1==2)) \
|
217
|
+
return _yreturn; \
|
218
|
+
}
|
219
|
+
|
220
|
+
|
221
|
+
#define DE_TEST_AND_RETURN_RD(_y, _mask) \
|
222
|
+
{ uint64_t _mantissa, _bits; double _yd; \
|
223
|
+
_yd = _Asm_fma(2/*_FR_D*/, -1.0, _y, 0.0, 3/*SF3*/); \
|
224
|
+
_mantissa = _Asm_getf(4/*_FR_SIG*/, _y); \
|
225
|
+
_bits = _mantissa & (0x7ff&(_mask)); \
|
226
|
+
if(__builtin_expect( \
|
227
|
+
(_bits!=(0x000&(_mask))) && (_bits != (0x7ff&(_mask))), \
|
228
|
+
1+1==2)) \
|
229
|
+
return -_yd; \
|
230
|
+
}
|
231
|
+
|
232
|
+
#define DE_TEST_AND_RETURN_RU(_y, _mask) \
|
233
|
+
{ uint64_t _mantissa, _bits; double _yd; \
|
234
|
+
_yd = _Asm_fma(2/*_FR_D*/, 1.0, _y, 0.0, 3/*SF3*/); \
|
235
|
+
_mantissa = _Asm_getf(4/*_FR_SIG*/, _y); \
|
236
|
+
_bits = _mantissa & (0x7ff&(_mask)); \
|
237
|
+
if(__builtin_expect( \
|
238
|
+
(_bits!=(0x000&(_mask))) && (_bits != (0x7ff&(_mask))), \
|
239
|
+
1+1==2)) \
|
240
|
+
return _yd; \
|
241
|
+
}
|
242
|
+
|
243
|
+
#define RETURN_SUM_ROUNDED_DOWN(_rh, _rl) \
|
244
|
+
return -_Asm_fma(2/*_FR_D*/, -1.0, _rh, -_rl, 3/*SF3*/);
|
245
|
+
|
246
|
+
#define RETURN_SUM_ROUNDED_UP(_rh, _rl) \
|
247
|
+
return _Asm_fma(2/*_FR_D*/, 1.0, _rh, _rl, 3/*SF3*/);
|
248
|
+
|
249
|
+
|
250
|
+
#if 0
|
251
|
+
|
252
|
+
/* This test doesn'use SF2 and SF3 Kept as a model for Pentium implementation, to erase afterwards */
|
253
|
+
|
254
|
+
#define DE_TEST_AND_RETURN_RU(_y, _mask) \
|
255
|
+
{ \
|
256
|
+
db_ext_number _z; double _yd; \
|
257
|
+
unsigned long int _mantissa, _y_double, _isNegative ,_wasRoundedUp, _bits; \
|
258
|
+
_yd=(double) _y; \
|
259
|
+
_mantissa = _Asm_getf(4/*_FR_SIG*/, _y); \
|
260
|
+
_y_double = _Asm_getf(2/*_FR_D*/, _yd); \
|
261
|
+
_bits = _mantissa & (0x7ff&(_mask)); \
|
262
|
+
_wasRoundedUp = ((_mantissa >>11) & 1) != (_y_double & 1); \
|
263
|
+
_bits = _mantissa & (0x7ff&(_mask)); \
|
264
|
+
_isNegative = _y_double >> 63; \
|
265
|
+
if(_isNegative) { \
|
266
|
+
if(_wasRoundedUp) { /* RN was RD */ \
|
267
|
+
if( _bits != (0x7ff&(_mask)) ) { \
|
268
|
+
_y_double--; \
|
269
|
+
return (double) _Asm_setf(2/*_FR_D*/, _y_double); \
|
270
|
+
} /* else launch accurate phase */ \
|
271
|
+
} \
|
272
|
+
else{ /* RN was RU, so need to check */ \
|
273
|
+
if( _bits != (0x000&(_mask)) ) { \
|
274
|
+
return _yd; \
|
275
|
+
} /* else launch accurate phase */ \
|
276
|
+
} \
|
277
|
+
} \
|
278
|
+
else{ /* Positive number */ \
|
279
|
+
if(_wasRoundedUp) { /* RN was RU */ \
|
280
|
+
if( _bits != (0x7ff&(_mask)) ) { \
|
281
|
+
return _yd; \
|
282
|
+
} /* else launch accurate phase */ \
|
283
|
+
} \
|
284
|
+
else{ /* RN was RD, */ \
|
285
|
+
if( _bits != (0x000&(_mask)) ) { \
|
286
|
+
_y_double++; /* beware, does not work for -0 */ \
|
287
|
+
return (double) _Asm_setf(2/*_FR_D*/, _y_double); \
|
288
|
+
} /* else launch accurate phase */ \
|
289
|
+
} \
|
290
|
+
} \
|
291
|
+
}
|
292
|
+
|
293
|
+
|
294
|
+
|
295
|
+
|
296
|
+
#define DE_TEST_AND_RETURN_RD(_y, _mask) \
|
297
|
+
{ \
|
298
|
+
db_ext_number _z; double _yd; \
|
299
|
+
unsigned long int _mantissa, _y_double, _isNegative ,_wasRoundedUp, _bits; \
|
300
|
+
_yd=(double) _y; \
|
301
|
+
_mantissa = _Asm_getf(4/*_FR_SIG*/, _y); \
|
302
|
+
_y_double = _Asm_getf(2/*_FR_D*/, _yd); \
|
303
|
+
_bits = _mantissa & (0x7ff&(_mask)); \
|
304
|
+
_wasRoundedUp = ((_mantissa >>11) & 1) != (_y_double & 1); \
|
305
|
+
_bits = _mantissa & (0x7ff&(_mask)); \
|
306
|
+
_isNegative = _y_double >> 63; \
|
307
|
+
if(_isNegative) { \
|
308
|
+
if(_wasRoundedUp) { /* RN was RD */ \
|
309
|
+
if( _bits != (0x7ff&(_mask)) ) { \
|
310
|
+
return (double) _y; \
|
311
|
+
} /* else launch accurate phase */ \
|
312
|
+
} \
|
313
|
+
else{ /* RN was RU */ \
|
314
|
+
if( _bits != (0x000&(_mask)) ) { \
|
315
|
+
_y_double++; \
|
316
|
+
return (double) _Asm_setf(2/*_FR_D*/, _y_double); \
|
317
|
+
} /* else launch accurate phase */ \
|
318
|
+
} \
|
319
|
+
} \
|
320
|
+
else{ /* Positive number */ \
|
321
|
+
if(_wasRoundedUp) { /* RN was RU */ \
|
322
|
+
if( _bits != (0x7ff&(_mask)) ) { \
|
323
|
+
_y_double--; \
|
324
|
+
return (double) _Asm_setf(2/*_FR_D*/, _y_double); \
|
325
|
+
} /* else launch accurate phase */ \
|
326
|
+
} \
|
327
|
+
else{ /* RN was RD, */ \
|
328
|
+
if( _bits != (0x000&(_mask)) ) { \
|
329
|
+
return (double) _y; \
|
330
|
+
} /* else launch accurate phase */ \
|
331
|
+
} \
|
332
|
+
} \
|
333
|
+
}
|
334
|
+
|
335
|
+
#endif /* 0 */
|
336
|
+
|
337
|
+
|
338
|
+
|
339
|
+
#endif /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
|
340
|
+
|
341
|
+
|
342
|
+
|
343
|
+
|
344
|
+
|
345
|
+
|
346
|
+
|
347
|
+
/**************************************************************************************/
|
348
|
+
/************************Double double-extended arithmetic*****************************/
|
349
|
+
/**************************************************************************************/
|
350
|
+
|
351
|
+
|
352
|
+
|
353
|
+
#define Add12_ext(prh, prl, a, b) \
|
354
|
+
{ \
|
355
|
+
double_ext _z, _a, _b; \
|
356
|
+
_a = a; _b = b; \
|
357
|
+
*prh = _a + _b; \
|
358
|
+
_z = *prh - _a; \
|
359
|
+
*prl = _b - _z; \
|
360
|
+
}
|
361
|
+
|
362
|
+
|
363
|
+
|
364
|
+
|
365
|
+
#if (defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64))
|
366
|
+
#define Mul12_ext(prh,prl,u,v) \
|
367
|
+
{ \
|
368
|
+
const double_ext _c = 4294967297.L; /* 2^32 +1 */ \
|
369
|
+
double_ext _up, _u1, _u2, _vp, _v1, _v2; \
|
370
|
+
double_ext _u =u, _v=v; \
|
371
|
+
\
|
372
|
+
_up = _u*_c; _vp = _v*_c; \
|
373
|
+
_u1 = (_u-_up)+_up; _v1 = (_v-_vp)+_vp; \
|
374
|
+
_u2 = _u-_u1; _v2 = _v-_v1; \
|
375
|
+
\
|
376
|
+
*prh = _u*_v; \
|
377
|
+
*prl = _u1*_v1 - *prh; \
|
378
|
+
*prl = *prl + _u1*_v2; \
|
379
|
+
*prl = *prl + _u2*_v1; \
|
380
|
+
*prl = *prl + _u2*_v2; \
|
381
|
+
}
|
382
|
+
|
383
|
+
#define Mul22_ext(prh,prl, ah,al, bh,bl) \
|
384
|
+
{ \
|
385
|
+
double_ext mh, ml; \
|
386
|
+
Mul12_ext(&mh,&ml,(ah),(bh)); \
|
387
|
+
ml += (ah)*(bl) + (al)*(bh); \
|
388
|
+
Add12_ext(prh,prl, mh,ml); \
|
389
|
+
}
|
390
|
+
|
391
|
+
#define FMA22_ext(prh,prl, ah,al, bh,bl, ch,cl) \
|
392
|
+
{ \
|
393
|
+
Mul22_ext(prh,prl, (ah),(al), (bh),(bl)); \
|
394
|
+
Add22_ext(prh,prl, ch,cl, *prh, *prl); \
|
395
|
+
}
|
396
|
+
|
397
|
+
|
398
|
+
|
399
|
+
#else /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
|
400
|
+
|
401
|
+
|
402
|
+
|
403
|
+
|
404
|
+
|
405
|
+
|
406
|
+
#define Mul12_ext( prh,prl, a, b ) \
|
407
|
+
{ \
|
408
|
+
*prh = (a) * (b); \
|
409
|
+
*prl = _Asm_fms( 3/*_PC_NONE*/, (a), (b), *prh, 1 ); \
|
410
|
+
}
|
411
|
+
|
412
|
+
|
413
|
+
#if 0 /* transcription of Alexey's */
|
414
|
+
#define Mul22_ext( prh,prl, ah,al, bh,bl ) \
|
415
|
+
{ \
|
416
|
+
double_ext _t1,_t2,_t3; \
|
417
|
+
*prh = (ah) * (bh); \
|
418
|
+
_t1 = (ah)*(bl); \
|
419
|
+
_t2 = _Asm_fms( 3/*_PC_NONE*/, (ah), (bh), *prh, 1 ); \
|
420
|
+
_t3 = (al) * (bh) + _t1; \
|
421
|
+
*prl = (_t2 + _t3); \
|
422
|
+
}
|
423
|
+
#else
|
424
|
+
#define Mul22_ext( prh,prl, ah,al, bh,bl ) \
|
425
|
+
{ \
|
426
|
+
double_ext ph, pl; \
|
427
|
+
ph = (ah)*(bh); \
|
428
|
+
pl = _Asm_fms( 3/*_PC_NONE*/, ah, bh, ph, 1/*_SF1*/ ); \
|
429
|
+
pl = (ah)*(bl) + pl; \
|
430
|
+
pl = (al)*(bh) + pl; \
|
431
|
+
Add12_ext(prh,prl, ph,pl); \
|
432
|
+
}
|
433
|
+
#endif
|
434
|
+
|
435
|
+
/* res = a*b + c, assume |a*b| <= |c|
|
436
|
+
* res, a and b in X format
|
437
|
+
* c in L format
|
438
|
+
*/
|
439
|
+
|
440
|
+
#if 0
|
441
|
+
#define FMA22_ext(prh,prl, ah,al, bh,bl, ch,cl) \
|
442
|
+
{ \
|
443
|
+
Mul22_ext(prh,prl, (ah),(al), (bh),(bl)); \
|
444
|
+
Add22_ext(prh,prl, ch,cl, *prh, *prl); \
|
445
|
+
}
|
446
|
+
#else
|
447
|
+
#define FMA22_ext( prh,prl, ah,al, bh,bl, ch,cl) \
|
448
|
+
{ \
|
449
|
+
double_ext __xfmagxxx_r_hi__,__xfmagxxx_r_lo__, \
|
450
|
+
__xfmagxxx_t1__,__xfmagxxx_t2__, \
|
451
|
+
__xfmagxxx_t3__,__xfmagxxx_t4__; \
|
452
|
+
__xfmagxxx_r_hi__ = ah * bh + ch; \
|
453
|
+
__xfmagxxx_t1__ = al * bh + cl; \
|
454
|
+
__xfmagxxx_t2__ = __xfmagxxx_r_hi__ - ch; \
|
455
|
+
__xfmagxxx_t3__ = ah * bl + __xfmagxxx_t1__; \
|
456
|
+
__xfmagxxx_t4__ = _Asm_fms( 3/*_PC_NONE*/, ah, bh, __xfmagxxx_t2__, 1/*_SF1*/ ); \
|
457
|
+
__xfmagxxx_r_lo__ = (__xfmagxxx_t3__ + __xfmagxxx_t4__); \
|
458
|
+
*prh = __xfmagxxx_r_hi__; *prl = __xfmagxxx_r_lo__; \
|
459
|
+
}
|
460
|
+
#endif
|
461
|
+
|
462
|
+
#endif /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
|
463
|
+
|
464
|
+
|
465
|
+
|
466
|
+
|
467
|
+
|
468
|
+
|
469
|
+
|
470
|
+
|
471
|
+
#define Div22_ext(prh,prl,xh,xl,yh,yl) \
|
472
|
+
{ \
|
473
|
+
double_ext ch,cl,uh,ul; \
|
474
|
+
ch = (xh)/(yh); \
|
475
|
+
Mul12_ext(&uh,&ul,ch,(yh)); \
|
476
|
+
cl = (xh)-uh; \
|
477
|
+
cl = cl - ul; \
|
478
|
+
cl = cl + (xl); \
|
479
|
+
cl = cl - ch*(yl); \
|
480
|
+
cl = cl / (yh); \
|
481
|
+
Add12(prh,prl, ch, cl) ; \
|
482
|
+
}
|
483
|
+
|
484
|
+
|
485
|
+
#define Add22_ext(prh,prl,xh,xl,yh,yl) \
|
486
|
+
do { \
|
487
|
+
double_ext _r,_s; \
|
488
|
+
_r = (xh)+(yh); \
|
489
|
+
_s = (xh)-_r; \
|
490
|
+
_s = _s + (yh); \
|
491
|
+
_s = _s + (yl); \
|
492
|
+
_s = _s + (xl); \
|
493
|
+
Add12_ext(prh,prl,_r,_s); \
|
494
|
+
} while(0)
|
495
|
+
|
496
|
+
#endif /* ifndef __DOUBLE_EXTENDED_H*/
|