intervals 0.3.56
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION.txt +1 -0
- data/ext/crlibm/AUTHORS +2 -0
- data/ext/crlibm/COPYING +504 -0
- data/ext/crlibm/ChangeLog +80 -0
- data/ext/crlibm/INSTALL +182 -0
- data/ext/crlibm/Makefile.am +84 -0
- data/ext/crlibm/Makefile.in +530 -0
- data/ext/crlibm/NEWS +0 -0
- data/ext/crlibm/README +31 -0
- data/ext/crlibm/TODO +47 -0
- data/ext/crlibm/VERSION +1 -0
- data/ext/crlibm/aclocal.m4 +989 -0
- data/ext/crlibm/atan-itanium.c +846 -0
- data/ext/crlibm/atan-pentium.c +261 -0
- data/ext/crlibm/atan_accurate.c +244 -0
- data/ext/crlibm/atan_accurate.h +191 -0
- data/ext/crlibm/atan_fast.c +324 -0
- data/ext/crlibm/atan_fast.h +678 -0
- data/ext/crlibm/config.guess +1461 -0
- data/ext/crlibm/config.sub +1566 -0
- data/ext/crlibm/configure +7517 -0
- data/ext/crlibm/configure.ac +364 -0
- data/ext/crlibm/crlibm.h +125 -0
- data/ext/crlibm/crlibm_config.h +149 -0
- data/ext/crlibm/crlibm_config.h.in +148 -0
- data/ext/crlibm/crlibm_private.c +293 -0
- data/ext/crlibm/crlibm_private.h +658 -0
- data/ext/crlibm/csh_fast.c +631 -0
- data/ext/crlibm/csh_fast.h +771 -0
- data/ext/crlibm/double-extended.h +496 -0
- data/ext/crlibm/exp-td.c +962 -0
- data/ext/crlibm/exp-td.h +685 -0
- data/ext/crlibm/exp_accurate.c +197 -0
- data/ext/crlibm/exp_accurate.h +85 -0
- data/ext/crlibm/gappa/log-de-E0-logir0.gappa +106 -0
- data/ext/crlibm/gappa/log-de-E0.gappa +79 -0
- data/ext/crlibm/gappa/log-de.gappa +81 -0
- data/ext/crlibm/gappa/log-td-E0-logir0.gappa +126 -0
- data/ext/crlibm/gappa/log-td-E0.gappa +143 -0
- data/ext/crlibm/gappa/log-td-accurate-E0-logir0.gappa +230 -0
- data/ext/crlibm/gappa/log-td-accurate-E0.gappa +213 -0
- data/ext/crlibm/gappa/log-td-accurate.gappa +217 -0
- data/ext/crlibm/gappa/log-td.gappa +156 -0
- data/ext/crlibm/gappa/trigoSinCosCase3.gappa +204 -0
- data/ext/crlibm/gappa/trigoTanCase2.gappa +73 -0
- data/ext/crlibm/install-sh +269 -0
- data/ext/crlibm/log-de.c +431 -0
- data/ext/crlibm/log-de.h +732 -0
- data/ext/crlibm/log-td.c +852 -0
- data/ext/crlibm/log-td.h +819 -0
- data/ext/crlibm/log10-td.c +906 -0
- data/ext/crlibm/log10-td.h +823 -0
- data/ext/crlibm/log2-td.c +935 -0
- data/ext/crlibm/log2-td.h +821 -0
- data/ext/crlibm/maple/atan.mpl +359 -0
- data/ext/crlibm/maple/common-procedures.mpl +997 -0
- data/ext/crlibm/maple/csh.mpl +446 -0
- data/ext/crlibm/maple/double-extended.mpl +151 -0
- data/ext/crlibm/maple/exp-td.mpl +195 -0
- data/ext/crlibm/maple/log-de.mpl +243 -0
- data/ext/crlibm/maple/log-td.mpl +316 -0
- data/ext/crlibm/maple/log10-td.mpl +345 -0
- data/ext/crlibm/maple/log2-td.mpl +334 -0
- data/ext/crlibm/maple/trigo.mpl +728 -0
- data/ext/crlibm/maple/triple-double.mpl +58 -0
- data/ext/crlibm/missing +198 -0
- data/ext/crlibm/mkinstalldirs +40 -0
- data/ext/crlibm/rem_pio2_accurate.c +219 -0
- data/ext/crlibm/rem_pio2_accurate.h +53 -0
- data/ext/crlibm/scs_lib/AUTHORS +3 -0
- data/ext/crlibm/scs_lib/COPYING +504 -0
- data/ext/crlibm/scs_lib/ChangeLog +16 -0
- data/ext/crlibm/scs_lib/INSTALL +215 -0
- data/ext/crlibm/scs_lib/Makefile.am +18 -0
- data/ext/crlibm/scs_lib/Makefile.in +328 -0
- data/ext/crlibm/scs_lib/NEWS +0 -0
- data/ext/crlibm/scs_lib/README +9 -0
- data/ext/crlibm/scs_lib/TODO +4 -0
- data/ext/crlibm/scs_lib/addition_scs.c +623 -0
- data/ext/crlibm/scs_lib/config.guess +1461 -0
- data/ext/crlibm/scs_lib/config.sub +1566 -0
- data/ext/crlibm/scs_lib/configure +6226 -0
- data/ext/crlibm/scs_lib/division_scs.c +110 -0
- data/ext/crlibm/scs_lib/double2scs.c +174 -0
- data/ext/crlibm/scs_lib/install-sh +269 -0
- data/ext/crlibm/scs_lib/missing +198 -0
- data/ext/crlibm/scs_lib/mkinstalldirs +40 -0
- data/ext/crlibm/scs_lib/multiplication_scs.c +456 -0
- data/ext/crlibm/scs_lib/poly_fct.c +112 -0
- data/ext/crlibm/scs_lib/print_scs.c +73 -0
- data/ext/crlibm/scs_lib/rand_scs.c +63 -0
- data/ext/crlibm/scs_lib/scs.h +353 -0
- data/ext/crlibm/scs_lib/scs2double.c +391 -0
- data/ext/crlibm/scs_lib/scs2mpf.c +58 -0
- data/ext/crlibm/scs_lib/scs2mpfr.c +61 -0
- data/ext/crlibm/scs_lib/scs_private.c +23 -0
- data/ext/crlibm/scs_lib/scs_private.h +133 -0
- data/ext/crlibm/scs_lib/tests/tbx_timing.h +102 -0
- data/ext/crlibm/scs_lib/wrapper_scs.h +486 -0
- data/ext/crlibm/scs_lib/zero_scs.c +52 -0
- data/ext/crlibm/stamp-h.in +1 -0
- data/ext/crlibm/tests/Makefile.am +43 -0
- data/ext/crlibm/tests/Makefile.in +396 -0
- data/ext/crlibm/tests/blind_test.c +148 -0
- data/ext/crlibm/tests/generate_test_vectors.c +258 -0
- data/ext/crlibm/tests/soak_test.c +334 -0
- data/ext/crlibm/tests/test_common.c +627 -0
- data/ext/crlibm/tests/test_common.h +28 -0
- data/ext/crlibm/tests/test_perf.c +570 -0
- data/ext/crlibm/tests/test_val.c +249 -0
- data/ext/crlibm/trigo_accurate.c +500 -0
- data/ext/crlibm/trigo_accurate.h +331 -0
- data/ext/crlibm/trigo_fast.c +1219 -0
- data/ext/crlibm/trigo_fast.h +639 -0
- data/ext/crlibm/triple-double.h +878 -0
- data/ext/extconf.rb +31 -0
- data/ext/fpu.c +107 -0
- data/ext/jamis-mod.rb +591 -0
- data/lib/fpu.rb +287 -0
- data/lib/interval.rb +1170 -0
- data/lib/intervals.rb +212 -0
- data/lib/struct_float.rb +133 -0
- data/test/data_atan.txt +360 -0
- data/test/data_cos.txt +346 -0
- data/test/data_cosh.txt +3322 -0
- data/test/data_exp.txt +3322 -0
- data/test/data_log.txt +141 -0
- data/test/data_sin.txt +140 -0
- data/test/data_sinh.txt +3322 -0
- data/test/data_tan.txt +342 -0
- metadata +186 -0
@@ -0,0 +1,658 @@
|
|
1
|
+
/*
|
2
|
+
* crlibm_private.h
|
3
|
+
*
|
4
|
+
* This file contains useful tools and data for the crlibm functions.
|
5
|
+
*
|
6
|
+
*/
|
7
|
+
|
8
|
+
#ifndef CRLIBM_PRIVATE_H
|
9
|
+
#define CRLIBM_PRIVATE_H 1
|
10
|
+
|
11
|
+
#include "scs_lib/scs.h"
|
12
|
+
#include "scs_lib/scs_private.h"
|
13
|
+
|
14
|
+
#ifdef HAVE_CONFIG_H
|
15
|
+
#include "crlibm_config.h"
|
16
|
+
#endif
|
17
|
+
/* otherwise CMake is used, and defines all the useful variables using -D switch */
|
18
|
+
|
19
|
+
#ifdef HAVE_INTTYPES_H
|
20
|
+
#include <inttypes.h>
|
21
|
+
#endif
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
#if (defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64))
|
26
|
+
# ifndef CRLIBM_TYPEOS_BSD
|
27
|
+
# include <fpu_control.h>
|
28
|
+
# ifndef _FPU_SETCW
|
29
|
+
# define _FPU_SETCW(cw) __asm__ ("fldcw %0" : : "m" (*&cw))
|
30
|
+
# endif
|
31
|
+
# ifndef _FPU_GETCW
|
32
|
+
# define _FPU_GETCW(cw) __asm__ ("fnstcw %0" : "=m" (*&cw))
|
33
|
+
# endif
|
34
|
+
# endif
|
35
|
+
#endif
|
36
|
+
|
37
|
+
/* 64 bit arithmetic may be standardised, but people still do want they want */
|
38
|
+
#ifdef HAVE_INTTYPES_H
|
39
|
+
#define ULL(bits) 0x##bits##uLL
|
40
|
+
#elif defined(_WIN32)
|
41
|
+
/* Windows garbage there */
|
42
|
+
typedef long long int int64_t;
|
43
|
+
typedef unsigned long long int uint64_t;
|
44
|
+
#define ULL(bits) 0x##bits##i64
|
45
|
+
/* Default, hoping it works, hopefully less and less relevant */
|
46
|
+
#else
|
47
|
+
typedef long long int int64_t;
|
48
|
+
typedef unsigned long long int uint64_t;
|
49
|
+
#define ULL(bits) 0x##bits##uLL
|
50
|
+
#endif
|
51
|
+
|
52
|
+
#ifndef SCS_DEF_INT64
|
53
|
+
#define SCS_DEF_INT64
|
54
|
+
#ifdef CRLIBM_TYPEOS_HPUX
|
55
|
+
#ifndef __LP64__ /* To solve the problem with 64 bits integer on HPPA */
|
56
|
+
typedef long long int64_t;
|
57
|
+
typedef unsigned long long uint64_t;
|
58
|
+
#define ULL(bits) 0x##bits##uLL
|
59
|
+
#endif
|
60
|
+
#endif
|
61
|
+
#endif
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
|
66
|
+
/* The Add22 and Add22 functions, as well as double-double
|
67
|
+
multiplications of the Dekker family may be either defined as
|
68
|
+
functions, or as #defines. Which one is better depends on the
|
69
|
+
processor/compiler/OS. As #define has to be used with more care (not
|
70
|
+
type-safe), the two following variables should be set to 1 in the
|
71
|
+
development/debugging phase, until no type warning remains.
|
72
|
+
|
73
|
+
*/
|
74
|
+
|
75
|
+
#define ADD22_AS_FUNCTIONS 0
|
76
|
+
#define DEKKER_AS_FUNCTIONS 0
|
77
|
+
|
78
|
+
|
79
|
+
|
80
|
+
/* setting the following variable adds variables and code for
|
81
|
+
monitoring the performance.
|
82
|
+
Note that sometimes only round to nearest is instrumented */
|
83
|
+
#define EVAL_PERF 1
|
84
|
+
|
85
|
+
|
86
|
+
#if EVAL_PERF==1
|
87
|
+
/* counter of calls to the second step (accurate step) */
|
88
|
+
extern int crlibm_second_step_taken;
|
89
|
+
#endif
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
/* The prototypes of the second steps */
|
94
|
+
extern void exp_SC(scs_ptr res_scs, double x);
|
95
|
+
extern double scs_exp_rn(double);
|
96
|
+
extern double scs_exp_ru(double);
|
97
|
+
extern double scs_exp_rd(double);
|
98
|
+
|
99
|
+
extern void scs_log(scs_ptr,db_number, int);
|
100
|
+
|
101
|
+
extern double scs_atan_rn(double);
|
102
|
+
extern double scs_atan_rd(double);
|
103
|
+
extern double scs_atan_ru(double);
|
104
|
+
|
105
|
+
extern double scs_sin_rn(double);
|
106
|
+
extern double scs_sin_ru(double);
|
107
|
+
extern double scs_sin_rd(double);
|
108
|
+
extern double scs_sin_rz(double);
|
109
|
+
extern double scs_cos_rn(double);
|
110
|
+
extern double scs_cos_ru(double);
|
111
|
+
extern double scs_cos_rd(double);
|
112
|
+
extern double scs_cos_rz(double);
|
113
|
+
extern double scs_tan_rn(double);
|
114
|
+
extern double scs_tan_rd(double);
|
115
|
+
extern double scs_tan_ru(double);
|
116
|
+
extern double scs_tan_rz(double);
|
117
|
+
|
118
|
+
extern int rem_pio2_scs(scs_ptr, scs_ptr);
|
119
|
+
|
120
|
+
/*
|
121
|
+
* i = d in rounding to nearest
|
122
|
+
The constant added is 2^52 + 2^51
|
123
|
+
*/
|
124
|
+
#define DOUBLE2INT(_i, _d) \
|
125
|
+
{db_number _t; \
|
126
|
+
_t.d = (_d+6755399441055744.0); \
|
127
|
+
_i = _t.i[LO];}
|
128
|
+
|
129
|
+
|
130
|
+
/* Same idea but beware: works only for |_i| < 2^51 -1 */
|
131
|
+
#define DOUBLE2LONGINT(_i, _d) \
|
132
|
+
{ \
|
133
|
+
db_number _t; \
|
134
|
+
_t.d = (_d+6755399441055744.0); \
|
135
|
+
if (_d >= 0) /* sign extend */ \
|
136
|
+
_i = _t.l & ULL(0007FFFFFFFFFFFF); \
|
137
|
+
else \
|
138
|
+
_i = (_t.l & ULL(0007FFFFFFFFFFFF)) | (ULL(FFF8000000000000)); \
|
139
|
+
}
|
140
|
+
|
141
|
+
|
142
|
+
|
143
|
+
|
144
|
+
|
145
|
+
/* Macros for the rounding tests in directed modes */
|
146
|
+
/* After Evgeny Gvozdev pointed out a bug in the rounding procedures I
|
147
|
+
decided to centralize them here
|
148
|
+
|
149
|
+
Note that these tests launch the accurate phase when yl=0, in
|
150
|
+
particular in the exceptional cases when the image of a double is a
|
151
|
+
double. See the chapter about the log for an example
|
152
|
+
|
153
|
+
*/
|
154
|
+
|
155
|
+
|
156
|
+
#define TEST_AND_RETURN_RU(__yh__, __yl__, __eps__) \
|
157
|
+
{ \
|
158
|
+
db_number yh, yl, u53; int yh_neg, yl_neg; \
|
159
|
+
yh.d = __yh__; yl.d = __yl__; \
|
160
|
+
yh_neg = (yh.i[HI] & 0x80000000); \
|
161
|
+
yl_neg = (yl.i[HI] & 0x80000000); \
|
162
|
+
yh.l = yh.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
|
163
|
+
yl.l = yl.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
|
164
|
+
u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
|
165
|
+
if(yl.d > __eps__ * u53.d){ \
|
166
|
+
if(!yl_neg) { /* The case yl==0 is filtered by the above test*/ \
|
167
|
+
/* return next up */ \
|
168
|
+
yh.d = __yh__; \
|
169
|
+
if(yh_neg) yh.l--; else yh.l++; /* Beware: fails for zero */ \
|
170
|
+
return yh.d ; \
|
171
|
+
} \
|
172
|
+
else return __yh__; \
|
173
|
+
} \
|
174
|
+
}
|
175
|
+
|
176
|
+
|
177
|
+
#define TEST_AND_RETURN_RD(__yh__, __yl__, __eps__) \
|
178
|
+
{ \
|
179
|
+
db_number yh, yl, u53; int yh_neg, yl_neg; \
|
180
|
+
yh.d = __yh__; yl.d = __yl__; \
|
181
|
+
yh_neg = (yh.i[HI] & 0x80000000); \
|
182
|
+
yl_neg = (yl.i[HI] & 0x80000000); \
|
183
|
+
yh.l = yh.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
|
184
|
+
yl.l = yl.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
|
185
|
+
u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
|
186
|
+
if(yl.d > __eps__ * u53.d){ \
|
187
|
+
if(yl_neg) { /* The case yl==0 is filtered by the above test*/ \
|
188
|
+
/* return next down */ \
|
189
|
+
yh.d = __yh__; \
|
190
|
+
if(yh_neg) yh.l++; else yh.l--; /* Beware: fails for zero */ \
|
191
|
+
return yh.d ; \
|
192
|
+
} \
|
193
|
+
else return __yh__; \
|
194
|
+
} \
|
195
|
+
}
|
196
|
+
|
197
|
+
|
198
|
+
|
199
|
+
#define TEST_AND_RETURN_RZ(__yh__, __yl__, __eps__) \
|
200
|
+
{ \
|
201
|
+
db_number yh, yl, u53; int yh_neg, yl_neg; \
|
202
|
+
yh.d = __yh__; yl.d = __yl__; \
|
203
|
+
yh_neg = (yh.i[HI] & 0x80000000); \
|
204
|
+
yl_neg = (yl.i[HI] & 0x80000000); \
|
205
|
+
yh.l = yh.l & ULL(7fffffffffffffff); /* compute the absolute value*/\
|
206
|
+
yl.l = yl.l & ULL(7fffffffffffffff); /* compute the absolute value*/\
|
207
|
+
u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
|
208
|
+
if(yl.d > __eps__ * u53.d){ \
|
209
|
+
if(yl_neg!=yh_neg) { \
|
210
|
+
yh.d = __yh__; \
|
211
|
+
yh.l--; /* Beware: fails for zero */ \
|
212
|
+
return yh.d ; \
|
213
|
+
} \
|
214
|
+
else return __yh__; \
|
215
|
+
} \
|
216
|
+
}
|
217
|
+
|
218
|
+
|
219
|
+
|
220
|
+
#define TEST_AND_COPY_RU(__cond__, __res__, __yh__, __yl__, __eps__) \
|
221
|
+
{ \
|
222
|
+
db_number yh, yl, u53; int yh_neg, yl_neg; \
|
223
|
+
yh.d = __yh__; yl.d = __yl__; \
|
224
|
+
yh_neg = (yh.i[HI] & 0x80000000); \
|
225
|
+
yl_neg = (yl.i[HI] & 0x80000000); \
|
226
|
+
yh.l = yh.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
|
227
|
+
yl.l = yl.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
|
228
|
+
u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
|
229
|
+
__cond__ = 0; \
|
230
|
+
if(yl.d > __eps__ * u53.d){ \
|
231
|
+
__cond__ = 1; \
|
232
|
+
if(!yl_neg) { /* The case yl==0 is filtered by the above test*/ \
|
233
|
+
/* return next up */ \
|
234
|
+
yh.d = __yh__; \
|
235
|
+
if(yh_neg) yh.l--; else yh.l++; /* Beware: fails for zero */ \
|
236
|
+
__res__ = yh.d ; \
|
237
|
+
} \
|
238
|
+
else { \
|
239
|
+
__res__ = __yh__; \
|
240
|
+
} \
|
241
|
+
} \
|
242
|
+
}
|
243
|
+
|
244
|
+
#define TEST_AND_COPY_RD(__cond__, __res__, __yh__, __yl__, __eps__) \
|
245
|
+
{ \
|
246
|
+
db_number yh, yl, u53; int yh_neg, yl_neg; \
|
247
|
+
yh.d = __yh__; yl.d = __yl__; \
|
248
|
+
yh_neg = (yh.i[HI] & 0x80000000); \
|
249
|
+
yl_neg = (yl.i[HI] & 0x80000000); \
|
250
|
+
yh.l = yh.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
|
251
|
+
yl.l = yl.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
|
252
|
+
u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
|
253
|
+
__cond__ = 0; \
|
254
|
+
if(yl.d > __eps__ * u53.d){ \
|
255
|
+
__cond__ = 1; \
|
256
|
+
if(yl_neg) { /* The case yl==0 is filtered by the above test*/ \
|
257
|
+
/* return next down */ \
|
258
|
+
yh.d = __yh__; \
|
259
|
+
if(yh_neg) yh.l++; else yh.l--; /* Beware: fails for zero */ \
|
260
|
+
__res__ = yh.d ; \
|
261
|
+
} \
|
262
|
+
else { \
|
263
|
+
__res__ = __yh__; \
|
264
|
+
} \
|
265
|
+
} \
|
266
|
+
}
|
267
|
+
|
268
|
+
|
269
|
+
#define TEST_AND_COPY_RZ(__cond__, __res__, __yh__, __yl__, __eps__) \
|
270
|
+
{ \
|
271
|
+
db_number yh, yl, u53; int yh_neg, yl_neg; \
|
272
|
+
yh.d = __yh__; yl.d = __yl__; \
|
273
|
+
yh_neg = (yh.i[HI] & 0x80000000); \
|
274
|
+
yl_neg = (yl.i[HI] & 0x80000000); \
|
275
|
+
yh.l = yh.l & ULL(7fffffffffffffff); /* compute the absolute value*/\
|
276
|
+
yl.l = yl.l & ULL(7fffffffffffffff); /* compute the absolute value*/\
|
277
|
+
u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
|
278
|
+
__cond__ = 0; \
|
279
|
+
if(yl.d > __eps__ * u53.d){ \
|
280
|
+
if(yl_neg!=yh_neg) { \
|
281
|
+
yh.d = __yh__; \
|
282
|
+
yh.l--; /* Beware: fails for zero */ \
|
283
|
+
__res__ = yh.d ; \
|
284
|
+
__cond__ = 1; \
|
285
|
+
} \
|
286
|
+
else { \
|
287
|
+
__res__ = __yh__; \
|
288
|
+
__cond__ = 1; \
|
289
|
+
} \
|
290
|
+
}
|
291
|
+
|
292
|
+
|
293
|
+
|
294
|
+
/* If the processor has a FMA, use it ! **/
|
295
|
+
|
296
|
+
/* All this probably works only with gcc.
|
297
|
+
See Markstein book for the case of HP's compiler */
|
298
|
+
|
299
|
+
#if defined(CRLIBM_TYPECPU_POWERPC) && defined(__GNUC__)
|
300
|
+
#define PROCESSOR_HAS_FMA 1
|
301
|
+
#undef PROCESSOR_HAS_FMA
|
302
|
+
#define FMA(a,b,c) /* r = a*b + c*/ \
|
303
|
+
({ \
|
304
|
+
double _a, _b,_c,_r; \
|
305
|
+
_a=a; _b=b;_c=c; \
|
306
|
+
__asm__ ("fmadd %0, %1, %2, %3\n ;;\n" \
|
307
|
+
: "=f"(_r) \
|
308
|
+
: "f"(_a), "f"(_b), "f"(_c) \
|
309
|
+
); \
|
310
|
+
_r; \
|
311
|
+
})
|
312
|
+
|
313
|
+
|
314
|
+
#define FMS(a,b,c) /* r = a*b - c*/ \
|
315
|
+
({ \
|
316
|
+
double _a, _b,_c,_r; \
|
317
|
+
_a=a; _b=b;_c=c; \
|
318
|
+
__asm__ ("fmsub %0, %1, %2, %3\n ;;\n" \
|
319
|
+
: "=f"(_r) \
|
320
|
+
: "f"(_a), "f"(_b), "f"(_c) \
|
321
|
+
); \
|
322
|
+
_r; \
|
323
|
+
})
|
324
|
+
|
325
|
+
#endif /* defined(CRLIBM_TYPECPU_POWERPC) && defined(__GCC__) */
|
326
|
+
|
327
|
+
|
328
|
+
|
329
|
+
|
330
|
+
/* On the Itanium 1 / gcc3.2 we lose 10 cycles when using the FMA !?!
|
331
|
+
It probably breaks the scheduling algorithms somehow...
|
332
|
+
To test again with higher gcc versions
|
333
|
+
*/
|
334
|
+
|
335
|
+
#if defined(CRLIBM_TYPECPU_ITANIUM) && defined(__GNUC__) && !defined(__INTEL_COMPILER) && 0
|
336
|
+
#define PROCESSOR_HAS_FMA 1
|
337
|
+
#define FMA(a,b,c) /* r = a*b + c*/ \
|
338
|
+
({ \
|
339
|
+
double _a, _b,_c,_r; \
|
340
|
+
_a=a; _b=b;_c=c; \
|
341
|
+
__asm__ ("fma %0 = %1, %2, %3\n ;;\n" \
|
342
|
+
: "=f"(_r) \
|
343
|
+
: "f"(_a), "f"(_b), "f"(_c) \
|
344
|
+
); \
|
345
|
+
_r; \
|
346
|
+
})
|
347
|
+
|
348
|
+
|
349
|
+
#define FMS(a,b,c) /* r = a*b - c*/ \
|
350
|
+
({ \
|
351
|
+
double _a, _b, _c, _r; \
|
352
|
+
_a=a; _b=b;_c=c; \
|
353
|
+
__asm__ ("fms %0 = %1, %2, %3\n ;;\n" \
|
354
|
+
: "=f"(_r) \
|
355
|
+
: "f"(_a), "f"(_b), "f"(_c) \
|
356
|
+
); \
|
357
|
+
_r; \
|
358
|
+
})
|
359
|
+
#endif /* defined(CRLIBM_TYPECPU_ITANIUM) && defined(__GCC__) && !defined(__INTEL_COMPILER) */
|
360
|
+
|
361
|
+
|
362
|
+
|
363
|
+
|
364
|
+
#if defined(CRLIBM_TYPECPU_ITANIUM) && defined(__INTEL_COMPILER)
|
365
|
+
#define PROCESSOR_HAS_FMA 1
|
366
|
+
#if 0 /* Commented out because it shouldn't be there: There should be
|
367
|
+
a standard #include doing all this, but as of april 2005
|
368
|
+
it doesn't exist, say intel people). Leave
|
369
|
+
it as documentation, though, until it is replaced by #include
|
370
|
+
*/
|
371
|
+
/* Table 1-17: legal floating-point precision completers (.pc) */
|
372
|
+
typedef enum {
|
373
|
+
_PC_S = 1 /* single .s */
|
374
|
+
,_PC_D = 2 /* double .d */
|
375
|
+
,_PC_NONE = 3 /* dynamic */
|
376
|
+
} _Asm_pc;
|
377
|
+
|
378
|
+
/* Table 1-22: legal getf/setf floating-point register access completers */
|
379
|
+
typedef enum {
|
380
|
+
_FR_S = 1 /* single form .s */
|
381
|
+
,_FR_D = 2 /* double form .d */
|
382
|
+
,_FR_EXP = 3 /* exponent form .exp */
|
383
|
+
,_FR_SIG = 4 /* significand form .sig */
|
384
|
+
} _Asm_fr_access;
|
385
|
+
|
386
|
+
/* Table 1-24: legal floating-point FPSR status field completers (.sf) */
|
387
|
+
typedef enum {
|
388
|
+
_SF0 = 0 /* FPSR status field 0 .s0 */
|
389
|
+
,_SF1 = 1 /* FPSR status field 1 .s1 */
|
390
|
+
,_SF2 = 2 /* FPSR status field 2 .s2 */
|
391
|
+
,_SF3 = 3 /* FPSR status field 3 .s3 */
|
392
|
+
} _Asm_sf;
|
393
|
+
#endif
|
394
|
+
|
395
|
+
#define FMA(a,b,c) /* r = a*b + c*/ \
|
396
|
+
_Asm_fma( 2/*_PC_D*/, a, b, c, 0/*_SF0*/ );
|
397
|
+
|
398
|
+
|
399
|
+
#define FMS(a,b,c) /* r = a*b - c*/ \
|
400
|
+
_Asm_fms( 2/*_PC_D*/, a, b, c, 0/*_SF0*/);
|
401
|
+
|
402
|
+
#endif /*defined(CRLIBM_TYPECPU_ITANIUM) && defined(__INTEL_COMPILER)*/
|
403
|
+
|
404
|
+
|
405
|
+
|
406
|
+
|
407
|
+
|
408
|
+
|
409
|
+
|
410
|
+
|
411
|
+
#ifdef WORDS_BIGENDIAN
|
412
|
+
#define DB_ONE {{0x3ff00000, 0x00000000}}
|
413
|
+
#else
|
414
|
+
#define DB_ONE {{0x00000000 ,0x3ff00000}}
|
415
|
+
#endif
|
416
|
+
|
417
|
+
|
418
|
+
|
419
|
+
|
420
|
+
|
421
|
+
|
422
|
+
extern const scs scs_zer, scs_half, scs_one, scs_two, scs_sixinv;
|
423
|
+
|
424
|
+
|
425
|
+
#define SCS_ZERO (scs_ptr)(&scs_zer)
|
426
|
+
#define SCS_HALF (scs_ptr)(&scs_half)
|
427
|
+
#define SCS_ONE (scs_ptr)(&scs_one)
|
428
|
+
#define SCS_TWO (scs_ptr)(&scs_two)
|
429
|
+
#define SCS_SIXINV (scs_ptr)(&scs_sixinv)
|
430
|
+
|
431
|
+
|
432
|
+
|
433
|
+
|
434
|
+
|
435
|
+
#define ABS(x) (((x)>0) ? (x) : (-(x)))
|
436
|
+
|
437
|
+
|
438
|
+
|
439
|
+
|
440
|
+
|
441
|
+
/*
|
442
|
+
* In the following, when an operator is preceded by a '@' it means that we
|
443
|
+
* are considering the IEEE-compliant machine operator, otherwise it
|
444
|
+
* is the mathematical operator.
|
445
|
+
*
|
446
|
+
*/
|
447
|
+
|
448
|
+
|
449
|
+
/*
|
450
|
+
* computes s and r such that s + r = a + b, with s = a @+ b exactly
|
451
|
+
*/
|
452
|
+
#define Add12Cond(s, r, a, b) \
|
453
|
+
{double _z, _a=a, _b=b; \
|
454
|
+
s = _a + _b; \
|
455
|
+
if (ABS(a) > ABS(b)){ \
|
456
|
+
_z = s - _a; \
|
457
|
+
r = _b - _z; \
|
458
|
+
}else { \
|
459
|
+
_z = s - _b; \
|
460
|
+
r = _a - _z;}}
|
461
|
+
|
462
|
+
/*
|
463
|
+
* computes s and r such that s + r = a + b, with s = a @+ b exactly
|
464
|
+
* under the condition a >= b
|
465
|
+
*/
|
466
|
+
#define Add12(s, r, a, b) \
|
467
|
+
{double _z, _a=a, _b=b; \
|
468
|
+
s = _a + _b; \
|
469
|
+
_z = s - _a; \
|
470
|
+
r = _b - _z; }
|
471
|
+
|
472
|
+
|
473
|
+
/*
|
474
|
+
* computes r1, r2, r3 such that r1 + r2 + r3 = a + b + c exactly
|
475
|
+
*/
|
476
|
+
#define Fast3Sum(r1, r2, r3, a, b, c) \
|
477
|
+
{double u, v, w; \
|
478
|
+
Fast2Sum(u, v, b, c); \
|
479
|
+
Fast2Sum(r1, w, a, u); \
|
480
|
+
Fast2Sum(r2, r3, w, v); }
|
481
|
+
|
482
|
+
|
483
|
+
|
484
|
+
|
485
|
+
|
486
|
+
|
487
|
+
|
488
|
+
/*
|
489
|
+
* Functions to computes double-double addition: zh+zl = xh+xl + yh+yl
|
490
|
+
* knowing that xh>yh
|
491
|
+
* relative error is smaller than 2^-103
|
492
|
+
*/
|
493
|
+
|
494
|
+
|
495
|
+
#if ADD22_AS_FUNCTIONS
|
496
|
+
extern void Add22(double *zh, double *zl, double xh, double xl, double yh, double yl);
|
497
|
+
extern void Add22Cond(double *zh, double *zl, double xh, double xl, double yh, double yl);
|
498
|
+
|
499
|
+
#else /* ADD22_AS_FUNCTIONS */
|
500
|
+
|
501
|
+
#define Add22Cond(zh,zl,xh,xl,yh,yl) \
|
502
|
+
do { \
|
503
|
+
double _r,_s; \
|
504
|
+
_r = (xh)+(yh); \
|
505
|
+
_s = ((ABS(xh)) > (ABS(yh)))? ((xh)-_r+(yh)+(yl)+(xl)) : ((yh)-_r+(xh)+(xl)+(yl)); \
|
506
|
+
*zh = _r+_s; \
|
507
|
+
*zl = (_r - (*zh)) + _s; \
|
508
|
+
} while(2+2==5)
|
509
|
+
|
510
|
+
|
511
|
+
|
512
|
+
#define Add22(zh,zl,xh,xl,yh,yl) \
|
513
|
+
do { \
|
514
|
+
double _r,_s; \
|
515
|
+
_r = (xh)+(yh); \
|
516
|
+
_s = ((((xh)-_r) +(yh)) + (yl)) + (xl); \
|
517
|
+
*zh = _r+_s; \
|
518
|
+
*zl = (_r - (*zh)) + _s; \
|
519
|
+
} while(0)
|
520
|
+
|
521
|
+
#endif /* ADD22_AS_FUNCTIONS */
|
522
|
+
|
523
|
+
|
524
|
+
|
525
|
+
#ifdef PROCESSOR_HAS_FMA
|
526
|
+
/* One of the nice things with the fused multiply-and-add is that it
|
527
|
+
greatly simplifies the double-double multiplications : */
|
528
|
+
#define Mul12(rh,rl,u,v) \
|
529
|
+
{ \
|
530
|
+
*rh = u*v; \
|
531
|
+
*rl = FMS(u,v, *rh); \
|
532
|
+
}
|
533
|
+
|
534
|
+
#define Mul22(pzh,pzl, xh,xl, yh,yl) \
|
535
|
+
{ \
|
536
|
+
double ph, pl; \
|
537
|
+
ph = xh*yh; \
|
538
|
+
pl = FMS(xh, yh, ph); \
|
539
|
+
pl = FMA(xh,yl, pl); \
|
540
|
+
pl = FMA(xl,yh,pl); \
|
541
|
+
*pzh = ph+pl; \
|
542
|
+
*pzl = ph - (*pzh); \
|
543
|
+
*pzl += pl; \
|
544
|
+
}
|
545
|
+
|
546
|
+
|
547
|
+
/* besides we don't care anymore about overflows in the mult */
|
548
|
+
#define Mul12Cond Mul12
|
549
|
+
#define Mul22cond Mul22
|
550
|
+
|
551
|
+
|
552
|
+
#else /* ! PROCESSOR_HAS_FMA */
|
553
|
+
|
554
|
+
|
555
|
+
#if DEKKER_AS_FUNCTIONS
|
556
|
+
extern void Mul12(double *rh, double *rl, double u, double v);
|
557
|
+
extern void Mul12Cond(double *rh, double *rl, double a, double b);
|
558
|
+
extern void Mul22(double *zh, double *zl, double xh, double xl, double yh, double yl);
|
559
|
+
#else /* if DEKKER_AS_FUNCTIONS */
|
560
|
+
/*
|
561
|
+
* computes rh and rl such that rh + rl = a * b with rh = a @* b exactly
|
562
|
+
* under the conditions : a < 2^970 et b < 2^970
|
563
|
+
*/
|
564
|
+
#define Mul12(rh,rl,u,v) \
|
565
|
+
{ \
|
566
|
+
const double c = 134217729.; /* 2^27 +1 */ \
|
567
|
+
double up, u1, u2, vp, v1, v2; \
|
568
|
+
double _u =u, _v=v; \
|
569
|
+
\
|
570
|
+
up = _u*c; vp = _v*c; \
|
571
|
+
u1 = (_u-up)+up; v1 = (_v-vp)+vp; \
|
572
|
+
u2 = _u-u1; v2 = _v-v1; \
|
573
|
+
\
|
574
|
+
*rh = _u*_v; \
|
575
|
+
*rl = (((u1*v1-*rh)+(u1*v2))+(u2*v1))+(u2*v2);\
|
576
|
+
}
|
577
|
+
|
578
|
+
|
579
|
+
/*
|
580
|
+
* Computes rh and rl such that rh + rl = a * b and rh = a @* b exactly
|
581
|
+
*/
|
582
|
+
#define Mul12Cond(rh, rl, a, b) \
|
583
|
+
{\
|
584
|
+
const double two_em53 = 1.1102230246251565404e-16; /* 0x3CA00000, 0x00000000 */\
|
585
|
+
const double two_e53 = 9007199254740992.; /* 0x43400000, 0x00000000 */\
|
586
|
+
double u, v; \
|
587
|
+
db_number _a=a, _b=b; \
|
588
|
+
\
|
589
|
+
if (_a.i[HI]>0x7C900000) u = _a*two_em53; \
|
590
|
+
else u = _a; \
|
591
|
+
if (_b.i[HI]>0x7C900000) v = _b*two_em53; \
|
592
|
+
else v = _b; \
|
593
|
+
\
|
594
|
+
Mul12(rh, rl, u, v); \
|
595
|
+
\
|
596
|
+
if (_a.i[HI]>0x7C900000) {*rh *= two_e53; *rl *= two_e53;} \
|
597
|
+
if (_b.i[HI]>0x7C900000) {*rh *= two_e53; *rl *= two_e53;} \
|
598
|
+
}
|
599
|
+
|
600
|
+
|
601
|
+
|
602
|
+
/*
|
603
|
+
* computes double-double multiplication: zh+zl = (xh+xl) * (yh+yl)
|
604
|
+
* relative error is smaller than 2^-102
|
605
|
+
*/
|
606
|
+
|
607
|
+
|
608
|
+
|
609
|
+
#define Mul22(zh,zl,xh,xl,yh,yl) \
|
610
|
+
{ \
|
611
|
+
double mh, ml; \
|
612
|
+
\
|
613
|
+
const double c = 134217729.; \
|
614
|
+
double up, u1, u2, vp, v1, v2; \
|
615
|
+
\
|
616
|
+
up = (xh)*c; vp = (yh)*c; \
|
617
|
+
u1 = ((xh)-up)+up; v1 = ((yh)-vp)+vp; \
|
618
|
+
u2 = (xh)-u1; v2 = (yh)-v1; \
|
619
|
+
\
|
620
|
+
mh = (xh)*(yh); \
|
621
|
+
ml = (((u1*v1-mh)+(u1*v2))+(u2*v1))+(u2*v2); \
|
622
|
+
\
|
623
|
+
ml += (xh)*(yl) + (xl)*(yh); \
|
624
|
+
*zh = mh+ml; \
|
625
|
+
*zl = mh - (*zh) + ml; \
|
626
|
+
}
|
627
|
+
|
628
|
+
|
629
|
+
|
630
|
+
#endif /* DEKKER_AS_FUNCTIONS */
|
631
|
+
|
632
|
+
#endif /* PROCESSOR_HAS_FMA */
|
633
|
+
|
634
|
+
|
635
|
+
|
636
|
+
/* In the following the one-line computation of _cl was split so that
|
637
|
+
icc(8.1) would compile it properly. It's a bug of icc */
|
638
|
+
|
639
|
+
#if DEKKER_AS_FUNCTIONS
|
640
|
+
extern void Div22(double *z, double *zz, double x, double xx, double y, double yy);
|
641
|
+
#else
|
642
|
+
#define Div22(pzh,pzl,xh,xl,yh,yl) { \
|
643
|
+
double _ch,_cl,_uh,_ul; \
|
644
|
+
_ch=(xh)/(yh); Mul12(&_uh,&_ul,_ch,(yh)); \
|
645
|
+
_cl=((xh)-_uh); \
|
646
|
+
_cl -= _ul; \
|
647
|
+
_cl += (xl); \
|
648
|
+
_cl -= _ch*(yl); \
|
649
|
+
_cl /= (yh); \
|
650
|
+
*pzh=_ch+_cl; *pzl=(_ch-(*pzh))+_cl; \
|
651
|
+
}
|
652
|
+
#endif /* DEKKER_AS_FUNCTIONS */
|
653
|
+
|
654
|
+
|
655
|
+
|
656
|
+
|
657
|
+
|
658
|
+
#endif /*CRLIBM_PRIVATE_H*/
|