intervals 0.3.56
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION.txt +1 -0
- data/ext/crlibm/AUTHORS +2 -0
- data/ext/crlibm/COPYING +504 -0
- data/ext/crlibm/ChangeLog +80 -0
- data/ext/crlibm/INSTALL +182 -0
- data/ext/crlibm/Makefile.am +84 -0
- data/ext/crlibm/Makefile.in +530 -0
- data/ext/crlibm/NEWS +0 -0
- data/ext/crlibm/README +31 -0
- data/ext/crlibm/TODO +47 -0
- data/ext/crlibm/VERSION +1 -0
- data/ext/crlibm/aclocal.m4 +989 -0
- data/ext/crlibm/atan-itanium.c +846 -0
- data/ext/crlibm/atan-pentium.c +261 -0
- data/ext/crlibm/atan_accurate.c +244 -0
- data/ext/crlibm/atan_accurate.h +191 -0
- data/ext/crlibm/atan_fast.c +324 -0
- data/ext/crlibm/atan_fast.h +678 -0
- data/ext/crlibm/config.guess +1461 -0
- data/ext/crlibm/config.sub +1566 -0
- data/ext/crlibm/configure +7517 -0
- data/ext/crlibm/configure.ac +364 -0
- data/ext/crlibm/crlibm.h +125 -0
- data/ext/crlibm/crlibm_config.h +149 -0
- data/ext/crlibm/crlibm_config.h.in +148 -0
- data/ext/crlibm/crlibm_private.c +293 -0
- data/ext/crlibm/crlibm_private.h +658 -0
- data/ext/crlibm/csh_fast.c +631 -0
- data/ext/crlibm/csh_fast.h +771 -0
- data/ext/crlibm/double-extended.h +496 -0
- data/ext/crlibm/exp-td.c +962 -0
- data/ext/crlibm/exp-td.h +685 -0
- data/ext/crlibm/exp_accurate.c +197 -0
- data/ext/crlibm/exp_accurate.h +85 -0
- data/ext/crlibm/gappa/log-de-E0-logir0.gappa +106 -0
- data/ext/crlibm/gappa/log-de-E0.gappa +79 -0
- data/ext/crlibm/gappa/log-de.gappa +81 -0
- data/ext/crlibm/gappa/log-td-E0-logir0.gappa +126 -0
- data/ext/crlibm/gappa/log-td-E0.gappa +143 -0
- data/ext/crlibm/gappa/log-td-accurate-E0-logir0.gappa +230 -0
- data/ext/crlibm/gappa/log-td-accurate-E0.gappa +213 -0
- data/ext/crlibm/gappa/log-td-accurate.gappa +217 -0
- data/ext/crlibm/gappa/log-td.gappa +156 -0
- data/ext/crlibm/gappa/trigoSinCosCase3.gappa +204 -0
- data/ext/crlibm/gappa/trigoTanCase2.gappa +73 -0
- data/ext/crlibm/install-sh +269 -0
- data/ext/crlibm/log-de.c +431 -0
- data/ext/crlibm/log-de.h +732 -0
- data/ext/crlibm/log-td.c +852 -0
- data/ext/crlibm/log-td.h +819 -0
- data/ext/crlibm/log10-td.c +906 -0
- data/ext/crlibm/log10-td.h +823 -0
- data/ext/crlibm/log2-td.c +935 -0
- data/ext/crlibm/log2-td.h +821 -0
- data/ext/crlibm/maple/atan.mpl +359 -0
- data/ext/crlibm/maple/common-procedures.mpl +997 -0
- data/ext/crlibm/maple/csh.mpl +446 -0
- data/ext/crlibm/maple/double-extended.mpl +151 -0
- data/ext/crlibm/maple/exp-td.mpl +195 -0
- data/ext/crlibm/maple/log-de.mpl +243 -0
- data/ext/crlibm/maple/log-td.mpl +316 -0
- data/ext/crlibm/maple/log10-td.mpl +345 -0
- data/ext/crlibm/maple/log2-td.mpl +334 -0
- data/ext/crlibm/maple/trigo.mpl +728 -0
- data/ext/crlibm/maple/triple-double.mpl +58 -0
- data/ext/crlibm/missing +198 -0
- data/ext/crlibm/mkinstalldirs +40 -0
- data/ext/crlibm/rem_pio2_accurate.c +219 -0
- data/ext/crlibm/rem_pio2_accurate.h +53 -0
- data/ext/crlibm/scs_lib/AUTHORS +3 -0
- data/ext/crlibm/scs_lib/COPYING +504 -0
- data/ext/crlibm/scs_lib/ChangeLog +16 -0
- data/ext/crlibm/scs_lib/INSTALL +215 -0
- data/ext/crlibm/scs_lib/Makefile.am +18 -0
- data/ext/crlibm/scs_lib/Makefile.in +328 -0
- data/ext/crlibm/scs_lib/NEWS +0 -0
- data/ext/crlibm/scs_lib/README +9 -0
- data/ext/crlibm/scs_lib/TODO +4 -0
- data/ext/crlibm/scs_lib/addition_scs.c +623 -0
- data/ext/crlibm/scs_lib/config.guess +1461 -0
- data/ext/crlibm/scs_lib/config.sub +1566 -0
- data/ext/crlibm/scs_lib/configure +6226 -0
- data/ext/crlibm/scs_lib/division_scs.c +110 -0
- data/ext/crlibm/scs_lib/double2scs.c +174 -0
- data/ext/crlibm/scs_lib/install-sh +269 -0
- data/ext/crlibm/scs_lib/missing +198 -0
- data/ext/crlibm/scs_lib/mkinstalldirs +40 -0
- data/ext/crlibm/scs_lib/multiplication_scs.c +456 -0
- data/ext/crlibm/scs_lib/poly_fct.c +112 -0
- data/ext/crlibm/scs_lib/print_scs.c +73 -0
- data/ext/crlibm/scs_lib/rand_scs.c +63 -0
- data/ext/crlibm/scs_lib/scs.h +353 -0
- data/ext/crlibm/scs_lib/scs2double.c +391 -0
- data/ext/crlibm/scs_lib/scs2mpf.c +58 -0
- data/ext/crlibm/scs_lib/scs2mpfr.c +61 -0
- data/ext/crlibm/scs_lib/scs_private.c +23 -0
- data/ext/crlibm/scs_lib/scs_private.h +133 -0
- data/ext/crlibm/scs_lib/tests/tbx_timing.h +102 -0
- data/ext/crlibm/scs_lib/wrapper_scs.h +486 -0
- data/ext/crlibm/scs_lib/zero_scs.c +52 -0
- data/ext/crlibm/stamp-h.in +1 -0
- data/ext/crlibm/tests/Makefile.am +43 -0
- data/ext/crlibm/tests/Makefile.in +396 -0
- data/ext/crlibm/tests/blind_test.c +148 -0
- data/ext/crlibm/tests/generate_test_vectors.c +258 -0
- data/ext/crlibm/tests/soak_test.c +334 -0
- data/ext/crlibm/tests/test_common.c +627 -0
- data/ext/crlibm/tests/test_common.h +28 -0
- data/ext/crlibm/tests/test_perf.c +570 -0
- data/ext/crlibm/tests/test_val.c +249 -0
- data/ext/crlibm/trigo_accurate.c +500 -0
- data/ext/crlibm/trigo_accurate.h +331 -0
- data/ext/crlibm/trigo_fast.c +1219 -0
- data/ext/crlibm/trigo_fast.h +639 -0
- data/ext/crlibm/triple-double.h +878 -0
- data/ext/extconf.rb +31 -0
- data/ext/fpu.c +107 -0
- data/ext/jamis-mod.rb +591 -0
- data/lib/fpu.rb +287 -0
- data/lib/interval.rb +1170 -0
- data/lib/intervals.rb +212 -0
- data/lib/struct_float.rb +133 -0
- data/test/data_atan.txt +360 -0
- data/test/data_cos.txt +346 -0
- data/test/data_cosh.txt +3322 -0
- data/test/data_exp.txt +3322 -0
- data/test/data_log.txt +141 -0
- data/test/data_sin.txt +140 -0
- data/test/data_sinh.txt +3322 -0
- data/test/data_tan.txt +342 -0
- metadata +186 -0
@@ -0,0 +1,496 @@
|
|
1
|
+
#ifndef __DOUBLE_EXTENDED_H
|
2
|
+
#define __DOUBLE_EXTENDED_H
|
3
|
+
|
4
|
+
#if (defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64))
|
5
|
+
#include <fpu_control.h>
|
6
|
+
typedef long double double_ext;
|
7
|
+
#endif
|
8
|
+
#if defined(CRLIBM_TYPECPU_ITANIUM) && defined(__INTEL_COMPILER)
|
9
|
+
typedef __fpreg double_ext;
|
10
|
+
#endif
|
11
|
+
|
12
|
+
|
13
|
+
/* For debugging */
|
14
|
+
typedef union {
|
15
|
+
int i[3];
|
16
|
+
long double d;
|
17
|
+
} db_ext_number;
|
18
|
+
|
19
|
+
|
20
|
+
#define DE_EXP 2
|
21
|
+
#define DE_MANTISSA_HI 1
|
22
|
+
#define DE_MANTISSA_LO 0
|
23
|
+
|
24
|
+
#define print_ext(_s, _y) {\
|
25
|
+
db_ext_number _yy; _yy.d=_y; \
|
26
|
+
printf("%s %04x %08x %08x \n",_s, 0xffff&_yy.i[DE_EXP], _yy.i[DE_MANTISSA_HI], _yy.i[DE_MANTISSA_LO]); \
|
27
|
+
}
|
28
|
+
|
29
|
+
/**************************************************************************************/
|
30
|
+
/*********************************Rounding tests***************************************/
|
31
|
+
/**************************************************************************************/
|
32
|
+
|
33
|
+
|
34
|
+
/* These test work by observing the bits of your double-extended after the 53rd.
|
35
|
+
|
36
|
+
mask should be 7ff if you trust your 64 bits (hum)
|
37
|
+
7fe if you trust 63 (if you have proven that maxepsilon<2^(-63) )
|
38
|
+
7fc 62
|
39
|
+
7f8 61
|
40
|
+
7f0 60 etc
|
41
|
+
*/
|
42
|
+
|
43
|
+
/* Mask constants for rounding test */
|
44
|
+
|
45
|
+
#define ACCURATE_TO_64_BITS 0x7ff
|
46
|
+
#define ACCURATE_TO_63_BITS 0x7fe
|
47
|
+
#define ACCURATE_TO_62_BITS 0x7fc
|
48
|
+
#define ACCURATE_TO_61_BITS 0x7f8
|
49
|
+
#define ACCURATE_TO_60_BITS 0x7f0
|
50
|
+
#define ACCURATE_TO_59_BITS 0x7e0
|
51
|
+
|
52
|
+
|
53
|
+
#if (defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64))
|
54
|
+
|
55
|
+
static const unsigned short RN_Double = (_FPU_DEFAULT & ~_FPU_EXTENDED)|_FPU_DOUBLE;
|
56
|
+
static const unsigned short RN_DoubleDown = (_FPU_DEFAULT & ~_FPU_EXTENDED & ~_FPU_RC_NEAREST)|_FPU_DOUBLE | _FPU_RC_DOWN;
|
57
|
+
static const unsigned short RN_DoubleUp = (_FPU_DEFAULT & ~_FPU_EXTENDED & ~_FPU_RC_NEAREST)|_FPU_DOUBLE | _FPU_RC_UP;
|
58
|
+
static const unsigned short RN_DoubleExt = _FPU_DEFAULT;
|
59
|
+
|
60
|
+
#define DOUBLE_EXTENDED_MODE _FPU_SETCW(RN_DoubleExt)
|
61
|
+
#define DOUBLE_UP_MODE _FPU_SETCW(RN_DoubleUp)
|
62
|
+
#define DOUBLE_DOWN_MODE _FPU_SETCW(RN_DoubleDown)
|
63
|
+
#define BACK_TO_DOUBLE_MODE _FPU_SETCW(RN_Double)
|
64
|
+
|
65
|
+
|
66
|
+
/*
|
67
|
+
Two rounding tests to the nearest. On Pentium 3, gcc3.3, the second
|
68
|
+
is faster by 12 cycles (and also improves the worst-case time by 60
|
69
|
+
cycles since it doesn't switch processor rounding mode in this
|
70
|
+
case). However it uses a coarser error estimation.
|
71
|
+
*/
|
72
|
+
|
73
|
+
#define DE_TEST_AND_RETURN_RN_ZIV(y,rncst) \
|
74
|
+
{ double yh, yl; \
|
75
|
+
yh = (double) y; \
|
76
|
+
yl = y-yh; \
|
77
|
+
BACK_TO_DOUBLE_MODE; \
|
78
|
+
if(yh==yh + yl*rncst) return yh; \
|
79
|
+
DOUBLE_EXTENDED_MODE; \
|
80
|
+
}
|
81
|
+
|
82
|
+
|
83
|
+
#define DE_TEST_AND_RETURN_RN(_y, _mask) \
|
84
|
+
{ \
|
85
|
+
db_ext_number _z; double _yd; \
|
86
|
+
int _lo; \
|
87
|
+
_z.d = _y; \
|
88
|
+
_yd = (double) _y; \
|
89
|
+
_lo = _z.i[DE_MANTISSA_LO] &(_mask); \
|
90
|
+
if((_lo!=(0x3ff&(_mask))) && (_lo!= (0x400&(_mask)))) { \
|
91
|
+
BACK_TO_DOUBLE_MODE; \
|
92
|
+
return _yd; \
|
93
|
+
} \
|
94
|
+
}
|
95
|
+
|
96
|
+
|
97
|
+
#define DE_TEST_AND_RETURN_RD(_y, _mask) \
|
98
|
+
{ \
|
99
|
+
double _result; int _bits; \
|
100
|
+
db_ext_number _z; \
|
101
|
+
_z.d = _y; \
|
102
|
+
DOUBLE_DOWN_MODE; \
|
103
|
+
_bits = _z.i[DE_MANTISSA_LO] &(_mask); \
|
104
|
+
_result = (double)(_y); \
|
105
|
+
if( (_bits != (0xfff&(_mask))) && (_bits != (0x000&(_mask))) ) { \
|
106
|
+
BACK_TO_DOUBLE_MODE; \
|
107
|
+
return _result; \
|
108
|
+
} \
|
109
|
+
DOUBLE_EXTENDED_MODE; \
|
110
|
+
}
|
111
|
+
#define DE_TEST_AND_RETURN_RU(_y, _mask) \
|
112
|
+
{ \
|
113
|
+
double _result; int _bits; \
|
114
|
+
db_ext_number _z; \
|
115
|
+
_z.d = _y; \
|
116
|
+
DOUBLE_UP_MODE; \
|
117
|
+
_bits = _z.i[DE_MANTISSA_LO] &(_mask); \
|
118
|
+
_result = (double)(_y); \
|
119
|
+
if( (_bits != (0xfff&(_mask))) && (_bits != (0x000&(_mask))) ) { \
|
120
|
+
BACK_TO_DOUBLE_MODE; \
|
121
|
+
return _result; \
|
122
|
+
} \
|
123
|
+
DOUBLE_EXTENDED_MODE; \
|
124
|
+
}
|
125
|
+
|
126
|
+
|
127
|
+
/* Use this one if you want a final computation step to overlap with
|
128
|
+
the rounding test. Examples: multiplication by a sign or by a power of 2 */
|
129
|
+
|
130
|
+
#define DE_TEST_AND_RETURN_RN2(_ytest, _yreturn, _mask) \
|
131
|
+
{ \
|
132
|
+
db_ext_number _z; double _y_return_d; \
|
133
|
+
int _bits; \
|
134
|
+
_z.d = _ytest; \
|
135
|
+
_y_return_d = (double) (_yreturn); \
|
136
|
+
_bits = _z.i[DE_MANTISSA_LO] &(_mask); \
|
137
|
+
if((_bits!=(0x3ff&(_mask))) && (_bits!= (0x400&(_mask)))) {\
|
138
|
+
BACK_TO_DOUBLE_MODE; \
|
139
|
+
return _y_return_d; \
|
140
|
+
} \
|
141
|
+
}
|
142
|
+
|
143
|
+
|
144
|
+
|
145
|
+
|
146
|
+
/* Slow macros with two changes of precision, but who cares, they are
|
147
|
+
used at the end of the second step */
|
148
|
+
#define RETURN_SUM_ROUNDED_DOWN(_rh, _rl) {\
|
149
|
+
double _result; \
|
150
|
+
DOUBLE_DOWN_MODE; \
|
151
|
+
_result = (_rh+_rl); \
|
152
|
+
BACK_TO_DOUBLE_MODE; \
|
153
|
+
return _result; \
|
154
|
+
}
|
155
|
+
|
156
|
+
#define RETURN_SUM_ROUNDED_UP(_rh, _rl) {\
|
157
|
+
double _result; \
|
158
|
+
DOUBLE_UP_MODE; \
|
159
|
+
_result = (_rh+_rl); \
|
160
|
+
BACK_TO_DOUBLE_MODE; \
|
161
|
+
return _result; \
|
162
|
+
}
|
163
|
+
|
164
|
+
#else /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
|
165
|
+
|
166
|
+
|
167
|
+
|
168
|
+
|
169
|
+
|
170
|
+
#if !defined(CRLIBM_TYPECPU_ITANIUM)
|
171
|
+
#error "This file should be compiled only for IA32 or IA64 architecture "
|
172
|
+
#endif
|
173
|
+
|
174
|
+
/* TODO Add what it takes to compile under HP-UX */
|
175
|
+
#if !defined(__INTEL_COMPILER)
|
176
|
+
#error "Use icc, version 8.1 or higher to compile for IA64 architecture"
|
177
|
+
#endif
|
178
|
+
|
179
|
+
|
180
|
+
#define DOUBLE_EXTENDED_MODE {}
|
181
|
+
#define BACK_TO_DOUBLE_MODE {}
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
|
186
|
+
|
187
|
+
#define DE_TEST_AND_RETURN_RN(_y, _mask) \
|
188
|
+
{ uint64_t _mantissa, _bits; double _yd; \
|
189
|
+
_yd = _Asm_fma(2/*_FR_D*/, 1.0, _y, 0.0, 0/*SF0*/); \
|
190
|
+
_mantissa = _Asm_getf(4/*_FR_SIG*/, _y); \
|
191
|
+
_bits = _mantissa & (0x7ff&(_mask)); \
|
192
|
+
if(__builtin_expect( \
|
193
|
+
(_bits!=(0x3ff&(_mask))) && (_bits != (0x400&(_mask))), \
|
194
|
+
1+1==2)) \
|
195
|
+
return _yd; \
|
196
|
+
}
|
197
|
+
|
198
|
+
|
199
|
+
/* Slower by 5 cycles as of 2005... Let us keep it, you never know */
|
200
|
+
#define DE_TEST_AND_RETURN_RN_ZIV(y,rncst) \
|
201
|
+
{ double yh, yl; \
|
202
|
+
yh = (double) y; \
|
203
|
+
yl = y-yh; \
|
204
|
+
if(__builtin_expect(yh == yh + yl*rncst, 1+1==2)) return yh; \
|
205
|
+
}
|
206
|
+
|
207
|
+
/* Use this one if you want a final computation step to overlap with
|
208
|
+
the rounding test. Examples: multiplication by a sign or by a power of 2 */
|
209
|
+
|
210
|
+
#define DE_TEST_AND_RETURN_RN2(_ytest, _yreturn, _mask) \
|
211
|
+
{ uint64_t _mantissa, _bits; \
|
212
|
+
_mantissa = _Asm_getf(4/*_FR_SIG*/, _ytest); \
|
213
|
+
_bits = _mantissa & (0x7ff&(_mask)); \
|
214
|
+
if(__builtin_expect( \
|
215
|
+
(_bits!=(0x3ff&(_mask))) && (_bits != (0x400&(_mask))), \
|
216
|
+
1+1==2)) \
|
217
|
+
return _yreturn; \
|
218
|
+
}
|
219
|
+
|
220
|
+
|
221
|
+
#define DE_TEST_AND_RETURN_RD(_y, _mask) \
|
222
|
+
{ uint64_t _mantissa, _bits; double _yd; \
|
223
|
+
_yd = _Asm_fma(2/*_FR_D*/, -1.0, _y, 0.0, 3/*SF3*/); \
|
224
|
+
_mantissa = _Asm_getf(4/*_FR_SIG*/, _y); \
|
225
|
+
_bits = _mantissa & (0x7ff&(_mask)); \
|
226
|
+
if(__builtin_expect( \
|
227
|
+
(_bits!=(0x000&(_mask))) && (_bits != (0x7ff&(_mask))), \
|
228
|
+
1+1==2)) \
|
229
|
+
return -_yd; \
|
230
|
+
}
|
231
|
+
|
232
|
+
#define DE_TEST_AND_RETURN_RU(_y, _mask) \
|
233
|
+
{ uint64_t _mantissa, _bits; double _yd; \
|
234
|
+
_yd = _Asm_fma(2/*_FR_D*/, 1.0, _y, 0.0, 3/*SF3*/); \
|
235
|
+
_mantissa = _Asm_getf(4/*_FR_SIG*/, _y); \
|
236
|
+
_bits = _mantissa & (0x7ff&(_mask)); \
|
237
|
+
if(__builtin_expect( \
|
238
|
+
(_bits!=(0x000&(_mask))) && (_bits != (0x7ff&(_mask))), \
|
239
|
+
1+1==2)) \
|
240
|
+
return _yd; \
|
241
|
+
}
|
242
|
+
|
243
|
+
#define RETURN_SUM_ROUNDED_DOWN(_rh, _rl) \
|
244
|
+
return -_Asm_fma(2/*_FR_D*/, -1.0, _rh, -_rl, 3/*SF3*/);
|
245
|
+
|
246
|
+
#define RETURN_SUM_ROUNDED_UP(_rh, _rl) \
|
247
|
+
return _Asm_fma(2/*_FR_D*/, 1.0, _rh, _rl, 3/*SF3*/);
|
248
|
+
|
249
|
+
|
250
|
+
#if 0
|
251
|
+
|
252
|
+
/* This test doesn'use SF2 and SF3 Kept as a model for Pentium implementation, to erase afterwards */
|
253
|
+
|
254
|
+
#define DE_TEST_AND_RETURN_RU(_y, _mask) \
|
255
|
+
{ \
|
256
|
+
db_ext_number _z; double _yd; \
|
257
|
+
unsigned long int _mantissa, _y_double, _isNegative ,_wasRoundedUp, _bits; \
|
258
|
+
_yd=(double) _y; \
|
259
|
+
_mantissa = _Asm_getf(4/*_FR_SIG*/, _y); \
|
260
|
+
_y_double = _Asm_getf(2/*_FR_D*/, _yd); \
|
261
|
+
_bits = _mantissa & (0x7ff&(_mask)); \
|
262
|
+
_wasRoundedUp = ((_mantissa >>11) & 1) != (_y_double & 1); \
|
263
|
+
_bits = _mantissa & (0x7ff&(_mask)); \
|
264
|
+
_isNegative = _y_double >> 63; \
|
265
|
+
if(_isNegative) { \
|
266
|
+
if(_wasRoundedUp) { /* RN was RD */ \
|
267
|
+
if( _bits != (0x7ff&(_mask)) ) { \
|
268
|
+
_y_double--; \
|
269
|
+
return (double) _Asm_setf(2/*_FR_D*/, _y_double); \
|
270
|
+
} /* else launch accurate phase */ \
|
271
|
+
} \
|
272
|
+
else{ /* RN was RU, so need to check */ \
|
273
|
+
if( _bits != (0x000&(_mask)) ) { \
|
274
|
+
return _yd; \
|
275
|
+
} /* else launch accurate phase */ \
|
276
|
+
} \
|
277
|
+
} \
|
278
|
+
else{ /* Positive number */ \
|
279
|
+
if(_wasRoundedUp) { /* RN was RU */ \
|
280
|
+
if( _bits != (0x7ff&(_mask)) ) { \
|
281
|
+
return _yd; \
|
282
|
+
} /* else launch accurate phase */ \
|
283
|
+
} \
|
284
|
+
else{ /* RN was RD, */ \
|
285
|
+
if( _bits != (0x000&(_mask)) ) { \
|
286
|
+
_y_double++; /* beware, does not work for -0 */ \
|
287
|
+
return (double) _Asm_setf(2/*_FR_D*/, _y_double); \
|
288
|
+
} /* else launch accurate phase */ \
|
289
|
+
} \
|
290
|
+
} \
|
291
|
+
}
|
292
|
+
|
293
|
+
|
294
|
+
|
295
|
+
|
296
|
+
#define DE_TEST_AND_RETURN_RD(_y, _mask) \
|
297
|
+
{ \
|
298
|
+
db_ext_number _z; double _yd; \
|
299
|
+
unsigned long int _mantissa, _y_double, _isNegative ,_wasRoundedUp, _bits; \
|
300
|
+
_yd=(double) _y; \
|
301
|
+
_mantissa = _Asm_getf(4/*_FR_SIG*/, _y); \
|
302
|
+
_y_double = _Asm_getf(2/*_FR_D*/, _yd); \
|
303
|
+
_bits = _mantissa & (0x7ff&(_mask)); \
|
304
|
+
_wasRoundedUp = ((_mantissa >>11) & 1) != (_y_double & 1); \
|
305
|
+
_bits = _mantissa & (0x7ff&(_mask)); \
|
306
|
+
_isNegative = _y_double >> 63; \
|
307
|
+
if(_isNegative) { \
|
308
|
+
if(_wasRoundedUp) { /* RN was RD */ \
|
309
|
+
if( _bits != (0x7ff&(_mask)) ) { \
|
310
|
+
return (double) _y; \
|
311
|
+
} /* else launch accurate phase */ \
|
312
|
+
} \
|
313
|
+
else{ /* RN was RU */ \
|
314
|
+
if( _bits != (0x000&(_mask)) ) { \
|
315
|
+
_y_double++; \
|
316
|
+
return (double) _Asm_setf(2/*_FR_D*/, _y_double); \
|
317
|
+
} /* else launch accurate phase */ \
|
318
|
+
} \
|
319
|
+
} \
|
320
|
+
else{ /* Positive number */ \
|
321
|
+
if(_wasRoundedUp) { /* RN was RU */ \
|
322
|
+
if( _bits != (0x7ff&(_mask)) ) { \
|
323
|
+
_y_double--; \
|
324
|
+
return (double) _Asm_setf(2/*_FR_D*/, _y_double); \
|
325
|
+
} /* else launch accurate phase */ \
|
326
|
+
} \
|
327
|
+
else{ /* RN was RD, */ \
|
328
|
+
if( _bits != (0x000&(_mask)) ) { \
|
329
|
+
return (double) _y; \
|
330
|
+
} /* else launch accurate phase */ \
|
331
|
+
} \
|
332
|
+
} \
|
333
|
+
}
|
334
|
+
|
335
|
+
#endif /* 0 */
|
336
|
+
|
337
|
+
|
338
|
+
|
339
|
+
#endif /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
|
340
|
+
|
341
|
+
|
342
|
+
|
343
|
+
|
344
|
+
|
345
|
+
|
346
|
+
|
347
|
+
/**************************************************************************************/
|
348
|
+
/************************Double double-extended arithmetic*****************************/
|
349
|
+
/**************************************************************************************/
|
350
|
+
|
351
|
+
|
352
|
+
|
353
|
+
#define Add12_ext(prh, prl, a, b) \
|
354
|
+
{ \
|
355
|
+
double_ext _z, _a, _b; \
|
356
|
+
_a = a; _b = b; \
|
357
|
+
*prh = _a + _b; \
|
358
|
+
_z = *prh - _a; \
|
359
|
+
*prl = _b - _z; \
|
360
|
+
}
|
361
|
+
|
362
|
+
|
363
|
+
|
364
|
+
|
365
|
+
#if (defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64))
|
366
|
+
#define Mul12_ext(prh,prl,u,v) \
|
367
|
+
{ \
|
368
|
+
const double_ext _c = 4294967297.L; /* 2^32 +1 */ \
|
369
|
+
double_ext _up, _u1, _u2, _vp, _v1, _v2; \
|
370
|
+
double_ext _u =u, _v=v; \
|
371
|
+
\
|
372
|
+
_up = _u*_c; _vp = _v*_c; \
|
373
|
+
_u1 = (_u-_up)+_up; _v1 = (_v-_vp)+_vp; \
|
374
|
+
_u2 = _u-_u1; _v2 = _v-_v1; \
|
375
|
+
\
|
376
|
+
*prh = _u*_v; \
|
377
|
+
*prl = _u1*_v1 - *prh; \
|
378
|
+
*prl = *prl + _u1*_v2; \
|
379
|
+
*prl = *prl + _u2*_v1; \
|
380
|
+
*prl = *prl + _u2*_v2; \
|
381
|
+
}
|
382
|
+
|
383
|
+
#define Mul22_ext(prh,prl, ah,al, bh,bl) \
|
384
|
+
{ \
|
385
|
+
double_ext mh, ml; \
|
386
|
+
Mul12_ext(&mh,&ml,(ah),(bh)); \
|
387
|
+
ml += (ah)*(bl) + (al)*(bh); \
|
388
|
+
Add12_ext(prh,prl, mh,ml); \
|
389
|
+
}
|
390
|
+
|
391
|
+
#define FMA22_ext(prh,prl, ah,al, bh,bl, ch,cl) \
|
392
|
+
{ \
|
393
|
+
Mul22_ext(prh,prl, (ah),(al), (bh),(bl)); \
|
394
|
+
Add22_ext(prh,prl, ch,cl, *prh, *prl); \
|
395
|
+
}
|
396
|
+
|
397
|
+
|
398
|
+
|
399
|
+
#else /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
|
400
|
+
|
401
|
+
|
402
|
+
|
403
|
+
|
404
|
+
|
405
|
+
|
406
|
+
#define Mul12_ext( prh,prl, a, b ) \
|
407
|
+
{ \
|
408
|
+
*prh = (a) * (b); \
|
409
|
+
*prl = _Asm_fms( 3/*_PC_NONE*/, (a), (b), *prh, 1 ); \
|
410
|
+
}
|
411
|
+
|
412
|
+
|
413
|
+
#if 0 /* transcription of Alexey's */
|
414
|
+
#define Mul22_ext( prh,prl, ah,al, bh,bl ) \
|
415
|
+
{ \
|
416
|
+
double_ext _t1,_t2,_t3; \
|
417
|
+
*prh = (ah) * (bh); \
|
418
|
+
_t1 = (ah)*(bl); \
|
419
|
+
_t2 = _Asm_fms( 3/*_PC_NONE*/, (ah), (bh), *prh, 1 ); \
|
420
|
+
_t3 = (al) * (bh) + _t1; \
|
421
|
+
*prl = (_t2 + _t3); \
|
422
|
+
}
|
423
|
+
#else
|
424
|
+
#define Mul22_ext( prh,prl, ah,al, bh,bl ) \
|
425
|
+
{ \
|
426
|
+
double_ext ph, pl; \
|
427
|
+
ph = (ah)*(bh); \
|
428
|
+
pl = _Asm_fms( 3/*_PC_NONE*/, ah, bh, ph, 1/*_SF1*/ ); \
|
429
|
+
pl = (ah)*(bl) + pl; \
|
430
|
+
pl = (al)*(bh) + pl; \
|
431
|
+
Add12_ext(prh,prl, ph,pl); \
|
432
|
+
}
|
433
|
+
#endif
|
434
|
+
|
435
|
+
/* res = a*b + c, assume |a*b| <= |c|
|
436
|
+
* res, a and b in X format
|
437
|
+
* c in L format
|
438
|
+
*/
|
439
|
+
|
440
|
+
#if 0
|
441
|
+
#define FMA22_ext(prh,prl, ah,al, bh,bl, ch,cl) \
|
442
|
+
{ \
|
443
|
+
Mul22_ext(prh,prl, (ah),(al), (bh),(bl)); \
|
444
|
+
Add22_ext(prh,prl, ch,cl, *prh, *prl); \
|
445
|
+
}
|
446
|
+
#else
|
447
|
+
#define FMA22_ext( prh,prl, ah,al, bh,bl, ch,cl) \
|
448
|
+
{ \
|
449
|
+
double_ext __xfmagxxx_r_hi__,__xfmagxxx_r_lo__, \
|
450
|
+
__xfmagxxx_t1__,__xfmagxxx_t2__, \
|
451
|
+
__xfmagxxx_t3__,__xfmagxxx_t4__; \
|
452
|
+
__xfmagxxx_r_hi__ = ah * bh + ch; \
|
453
|
+
__xfmagxxx_t1__ = al * bh + cl; \
|
454
|
+
__xfmagxxx_t2__ = __xfmagxxx_r_hi__ - ch; \
|
455
|
+
__xfmagxxx_t3__ = ah * bl + __xfmagxxx_t1__; \
|
456
|
+
__xfmagxxx_t4__ = _Asm_fms( 3/*_PC_NONE*/, ah, bh, __xfmagxxx_t2__, 1/*_SF1*/ ); \
|
457
|
+
__xfmagxxx_r_lo__ = (__xfmagxxx_t3__ + __xfmagxxx_t4__); \
|
458
|
+
*prh = __xfmagxxx_r_hi__; *prl = __xfmagxxx_r_lo__; \
|
459
|
+
}
|
460
|
+
#endif
|
461
|
+
|
462
|
+
#endif /* defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64) */
|
463
|
+
|
464
|
+
|
465
|
+
|
466
|
+
|
467
|
+
|
468
|
+
|
469
|
+
|
470
|
+
|
471
|
+
#define Div22_ext(prh,prl,xh,xl,yh,yl) \
|
472
|
+
{ \
|
473
|
+
double_ext ch,cl,uh,ul; \
|
474
|
+
ch = (xh)/(yh); \
|
475
|
+
Mul12_ext(&uh,&ul,ch,(yh)); \
|
476
|
+
cl = (xh)-uh; \
|
477
|
+
cl = cl - ul; \
|
478
|
+
cl = cl + (xl); \
|
479
|
+
cl = cl - ch*(yl); \
|
480
|
+
cl = cl / (yh); \
|
481
|
+
Add12(prh,prl, ch, cl) ; \
|
482
|
+
}
|
483
|
+
|
484
|
+
|
485
|
+
#define Add22_ext(prh,prl,xh,xl,yh,yl) \
|
486
|
+
do { \
|
487
|
+
double_ext _r,_s; \
|
488
|
+
_r = (xh)+(yh); \
|
489
|
+
_s = (xh)-_r; \
|
490
|
+
_s = _s + (yh); \
|
491
|
+
_s = _s + (yl); \
|
492
|
+
_s = _s + (xl); \
|
493
|
+
Add12_ext(prh,prl,_r,_s); \
|
494
|
+
} while(0)
|
495
|
+
|
496
|
+
#endif /* ifndef __DOUBLE_EXTENDED_H*/
|