intervals 0.3.56
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION.txt +1 -0
- data/ext/crlibm/AUTHORS +2 -0
- data/ext/crlibm/COPYING +504 -0
- data/ext/crlibm/ChangeLog +80 -0
- data/ext/crlibm/INSTALL +182 -0
- data/ext/crlibm/Makefile.am +84 -0
- data/ext/crlibm/Makefile.in +530 -0
- data/ext/crlibm/NEWS +0 -0
- data/ext/crlibm/README +31 -0
- data/ext/crlibm/TODO +47 -0
- data/ext/crlibm/VERSION +1 -0
- data/ext/crlibm/aclocal.m4 +989 -0
- data/ext/crlibm/atan-itanium.c +846 -0
- data/ext/crlibm/atan-pentium.c +261 -0
- data/ext/crlibm/atan_accurate.c +244 -0
- data/ext/crlibm/atan_accurate.h +191 -0
- data/ext/crlibm/atan_fast.c +324 -0
- data/ext/crlibm/atan_fast.h +678 -0
- data/ext/crlibm/config.guess +1461 -0
- data/ext/crlibm/config.sub +1566 -0
- data/ext/crlibm/configure +7517 -0
- data/ext/crlibm/configure.ac +364 -0
- data/ext/crlibm/crlibm.h +125 -0
- data/ext/crlibm/crlibm_config.h +149 -0
- data/ext/crlibm/crlibm_config.h.in +148 -0
- data/ext/crlibm/crlibm_private.c +293 -0
- data/ext/crlibm/crlibm_private.h +658 -0
- data/ext/crlibm/csh_fast.c +631 -0
- data/ext/crlibm/csh_fast.h +771 -0
- data/ext/crlibm/double-extended.h +496 -0
- data/ext/crlibm/exp-td.c +962 -0
- data/ext/crlibm/exp-td.h +685 -0
- data/ext/crlibm/exp_accurate.c +197 -0
- data/ext/crlibm/exp_accurate.h +85 -0
- data/ext/crlibm/gappa/log-de-E0-logir0.gappa +106 -0
- data/ext/crlibm/gappa/log-de-E0.gappa +79 -0
- data/ext/crlibm/gappa/log-de.gappa +81 -0
- data/ext/crlibm/gappa/log-td-E0-logir0.gappa +126 -0
- data/ext/crlibm/gappa/log-td-E0.gappa +143 -0
- data/ext/crlibm/gappa/log-td-accurate-E0-logir0.gappa +230 -0
- data/ext/crlibm/gappa/log-td-accurate-E0.gappa +213 -0
- data/ext/crlibm/gappa/log-td-accurate.gappa +217 -0
- data/ext/crlibm/gappa/log-td.gappa +156 -0
- data/ext/crlibm/gappa/trigoSinCosCase3.gappa +204 -0
- data/ext/crlibm/gappa/trigoTanCase2.gappa +73 -0
- data/ext/crlibm/install-sh +269 -0
- data/ext/crlibm/log-de.c +431 -0
- data/ext/crlibm/log-de.h +732 -0
- data/ext/crlibm/log-td.c +852 -0
- data/ext/crlibm/log-td.h +819 -0
- data/ext/crlibm/log10-td.c +906 -0
- data/ext/crlibm/log10-td.h +823 -0
- data/ext/crlibm/log2-td.c +935 -0
- data/ext/crlibm/log2-td.h +821 -0
- data/ext/crlibm/maple/atan.mpl +359 -0
- data/ext/crlibm/maple/common-procedures.mpl +997 -0
- data/ext/crlibm/maple/csh.mpl +446 -0
- data/ext/crlibm/maple/double-extended.mpl +151 -0
- data/ext/crlibm/maple/exp-td.mpl +195 -0
- data/ext/crlibm/maple/log-de.mpl +243 -0
- data/ext/crlibm/maple/log-td.mpl +316 -0
- data/ext/crlibm/maple/log10-td.mpl +345 -0
- data/ext/crlibm/maple/log2-td.mpl +334 -0
- data/ext/crlibm/maple/trigo.mpl +728 -0
- data/ext/crlibm/maple/triple-double.mpl +58 -0
- data/ext/crlibm/missing +198 -0
- data/ext/crlibm/mkinstalldirs +40 -0
- data/ext/crlibm/rem_pio2_accurate.c +219 -0
- data/ext/crlibm/rem_pio2_accurate.h +53 -0
- data/ext/crlibm/scs_lib/AUTHORS +3 -0
- data/ext/crlibm/scs_lib/COPYING +504 -0
- data/ext/crlibm/scs_lib/ChangeLog +16 -0
- data/ext/crlibm/scs_lib/INSTALL +215 -0
- data/ext/crlibm/scs_lib/Makefile.am +18 -0
- data/ext/crlibm/scs_lib/Makefile.in +328 -0
- data/ext/crlibm/scs_lib/NEWS +0 -0
- data/ext/crlibm/scs_lib/README +9 -0
- data/ext/crlibm/scs_lib/TODO +4 -0
- data/ext/crlibm/scs_lib/addition_scs.c +623 -0
- data/ext/crlibm/scs_lib/config.guess +1461 -0
- data/ext/crlibm/scs_lib/config.sub +1566 -0
- data/ext/crlibm/scs_lib/configure +6226 -0
- data/ext/crlibm/scs_lib/division_scs.c +110 -0
- data/ext/crlibm/scs_lib/double2scs.c +174 -0
- data/ext/crlibm/scs_lib/install-sh +269 -0
- data/ext/crlibm/scs_lib/missing +198 -0
- data/ext/crlibm/scs_lib/mkinstalldirs +40 -0
- data/ext/crlibm/scs_lib/multiplication_scs.c +456 -0
- data/ext/crlibm/scs_lib/poly_fct.c +112 -0
- data/ext/crlibm/scs_lib/print_scs.c +73 -0
- data/ext/crlibm/scs_lib/rand_scs.c +63 -0
- data/ext/crlibm/scs_lib/scs.h +353 -0
- data/ext/crlibm/scs_lib/scs2double.c +391 -0
- data/ext/crlibm/scs_lib/scs2mpf.c +58 -0
- data/ext/crlibm/scs_lib/scs2mpfr.c +61 -0
- data/ext/crlibm/scs_lib/scs_private.c +23 -0
- data/ext/crlibm/scs_lib/scs_private.h +133 -0
- data/ext/crlibm/scs_lib/tests/tbx_timing.h +102 -0
- data/ext/crlibm/scs_lib/wrapper_scs.h +486 -0
- data/ext/crlibm/scs_lib/zero_scs.c +52 -0
- data/ext/crlibm/stamp-h.in +1 -0
- data/ext/crlibm/tests/Makefile.am +43 -0
- data/ext/crlibm/tests/Makefile.in +396 -0
- data/ext/crlibm/tests/blind_test.c +148 -0
- data/ext/crlibm/tests/generate_test_vectors.c +258 -0
- data/ext/crlibm/tests/soak_test.c +334 -0
- data/ext/crlibm/tests/test_common.c +627 -0
- data/ext/crlibm/tests/test_common.h +28 -0
- data/ext/crlibm/tests/test_perf.c +570 -0
- data/ext/crlibm/tests/test_val.c +249 -0
- data/ext/crlibm/trigo_accurate.c +500 -0
- data/ext/crlibm/trigo_accurate.h +331 -0
- data/ext/crlibm/trigo_fast.c +1219 -0
- data/ext/crlibm/trigo_fast.h +639 -0
- data/ext/crlibm/triple-double.h +878 -0
- data/ext/extconf.rb +31 -0
- data/ext/fpu.c +107 -0
- data/ext/jamis-mod.rb +591 -0
- data/lib/fpu.rb +287 -0
- data/lib/interval.rb +1170 -0
- data/lib/intervals.rb +212 -0
- data/lib/struct_float.rb +133 -0
- data/test/data_atan.txt +360 -0
- data/test/data_cos.txt +346 -0
- data/test/data_cosh.txt +3322 -0
- data/test/data_exp.txt +3322 -0
- data/test/data_log.txt +141 -0
- data/test/data_sin.txt +140 -0
- data/test/data_sinh.txt +3322 -0
- data/test/data_tan.txt +342 -0
- metadata +186 -0
@@ -0,0 +1,658 @@
|
|
1
|
+
/*
|
2
|
+
* crlibm_private.h
|
3
|
+
*
|
4
|
+
* This file contains useful tools and data for the crlibm functions.
|
5
|
+
*
|
6
|
+
*/
|
7
|
+
|
8
|
+
#ifndef CRLIBM_PRIVATE_H
|
9
|
+
#define CRLIBM_PRIVATE_H 1
|
10
|
+
|
11
|
+
#include "scs_lib/scs.h"
|
12
|
+
#include "scs_lib/scs_private.h"
|
13
|
+
|
14
|
+
#ifdef HAVE_CONFIG_H
|
15
|
+
#include "crlibm_config.h"
|
16
|
+
#endif
|
17
|
+
/* otherwise CMake is used, and defines all the useful variables using -D switch */
|
18
|
+
|
19
|
+
#ifdef HAVE_INTTYPES_H
|
20
|
+
#include <inttypes.h>
|
21
|
+
#endif
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
#if (defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64))
|
26
|
+
# ifndef CRLIBM_TYPEOS_BSD
|
27
|
+
# include <fpu_control.h>
|
28
|
+
# ifndef _FPU_SETCW
|
29
|
+
# define _FPU_SETCW(cw) __asm__ ("fldcw %0" : : "m" (*&cw))
|
30
|
+
# endif
|
31
|
+
# ifndef _FPU_GETCW
|
32
|
+
# define _FPU_GETCW(cw) __asm__ ("fnstcw %0" : "=m" (*&cw))
|
33
|
+
# endif
|
34
|
+
# endif
|
35
|
+
#endif
|
36
|
+
|
37
|
+
/* 64 bit arithmetic may be standardised, but people still do want they want */
|
38
|
+
#ifdef HAVE_INTTYPES_H
|
39
|
+
#define ULL(bits) 0x##bits##uLL
|
40
|
+
#elif defined(_WIN32)
|
41
|
+
/* Windows garbage there */
|
42
|
+
typedef long long int int64_t;
|
43
|
+
typedef unsigned long long int uint64_t;
|
44
|
+
#define ULL(bits) 0x##bits##i64
|
45
|
+
/* Default, hoping it works, hopefully less and less relevant */
|
46
|
+
#else
|
47
|
+
typedef long long int int64_t;
|
48
|
+
typedef unsigned long long int uint64_t;
|
49
|
+
#define ULL(bits) 0x##bits##uLL
|
50
|
+
#endif
|
51
|
+
|
52
|
+
#ifndef SCS_DEF_INT64
|
53
|
+
#define SCS_DEF_INT64
|
54
|
+
#ifdef CRLIBM_TYPEOS_HPUX
|
55
|
+
#ifndef __LP64__ /* To solve the problem with 64 bits integer on HPPA */
|
56
|
+
typedef long long int64_t;
|
57
|
+
typedef unsigned long long uint64_t;
|
58
|
+
#define ULL(bits) 0x##bits##uLL
|
59
|
+
#endif
|
60
|
+
#endif
|
61
|
+
#endif
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
|
66
|
+
/* The Add22 and Add22 functions, as well as double-double
|
67
|
+
multiplications of the Dekker family may be either defined as
|
68
|
+
functions, or as #defines. Which one is better depends on the
|
69
|
+
processor/compiler/OS. As #define has to be used with more care (not
|
70
|
+
type-safe), the two following variables should be set to 1 in the
|
71
|
+
development/debugging phase, until no type warning remains.
|
72
|
+
|
73
|
+
*/
|
74
|
+
|
75
|
+
#define ADD22_AS_FUNCTIONS 0
|
76
|
+
#define DEKKER_AS_FUNCTIONS 0
|
77
|
+
|
78
|
+
|
79
|
+
|
80
|
+
/* setting the following variable adds variables and code for
|
81
|
+
monitoring the performance.
|
82
|
+
Note that sometimes only round to nearest is instrumented */
|
83
|
+
#define EVAL_PERF 1
|
84
|
+
|
85
|
+
|
86
|
+
#if EVAL_PERF==1
|
87
|
+
/* counter of calls to the second step (accurate step) */
|
88
|
+
extern int crlibm_second_step_taken;
|
89
|
+
#endif
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
/* The prototypes of the second steps */
|
94
|
+
extern void exp_SC(scs_ptr res_scs, double x);
|
95
|
+
extern double scs_exp_rn(double);
|
96
|
+
extern double scs_exp_ru(double);
|
97
|
+
extern double scs_exp_rd(double);
|
98
|
+
|
99
|
+
extern void scs_log(scs_ptr,db_number, int);
|
100
|
+
|
101
|
+
extern double scs_atan_rn(double);
|
102
|
+
extern double scs_atan_rd(double);
|
103
|
+
extern double scs_atan_ru(double);
|
104
|
+
|
105
|
+
extern double scs_sin_rn(double);
|
106
|
+
extern double scs_sin_ru(double);
|
107
|
+
extern double scs_sin_rd(double);
|
108
|
+
extern double scs_sin_rz(double);
|
109
|
+
extern double scs_cos_rn(double);
|
110
|
+
extern double scs_cos_ru(double);
|
111
|
+
extern double scs_cos_rd(double);
|
112
|
+
extern double scs_cos_rz(double);
|
113
|
+
extern double scs_tan_rn(double);
|
114
|
+
extern double scs_tan_rd(double);
|
115
|
+
extern double scs_tan_ru(double);
|
116
|
+
extern double scs_tan_rz(double);
|
117
|
+
|
118
|
+
extern int rem_pio2_scs(scs_ptr, scs_ptr);
|
119
|
+
|
120
|
+
/*
|
121
|
+
* i = d in rounding to nearest
|
122
|
+
The constant added is 2^52 + 2^51
|
123
|
+
*/
|
124
|
+
#define DOUBLE2INT(_i, _d) \
|
125
|
+
{db_number _t; \
|
126
|
+
_t.d = (_d+6755399441055744.0); \
|
127
|
+
_i = _t.i[LO];}
|
128
|
+
|
129
|
+
|
130
|
+
/* Same idea but beware: works only for |_i| < 2^51 -1 */
|
131
|
+
#define DOUBLE2LONGINT(_i, _d) \
|
132
|
+
{ \
|
133
|
+
db_number _t; \
|
134
|
+
_t.d = (_d+6755399441055744.0); \
|
135
|
+
if (_d >= 0) /* sign extend */ \
|
136
|
+
_i = _t.l & ULL(0007FFFFFFFFFFFF); \
|
137
|
+
else \
|
138
|
+
_i = (_t.l & ULL(0007FFFFFFFFFFFF)) | (ULL(FFF8000000000000)); \
|
139
|
+
}
|
140
|
+
|
141
|
+
|
142
|
+
|
143
|
+
|
144
|
+
|
145
|
+
/* Macros for the rounding tests in directed modes */
|
146
|
+
/* After Evgeny Gvozdev pointed out a bug in the rounding procedures I
|
147
|
+
decided to centralize them here
|
148
|
+
|
149
|
+
Note that these tests launch the accurate phase when yl=0, in
|
150
|
+
particular in the exceptional cases when the image of a double is a
|
151
|
+
double. See the chapter about the log for an example
|
152
|
+
|
153
|
+
*/
|
154
|
+
|
155
|
+
|
156
|
+
#define TEST_AND_RETURN_RU(__yh__, __yl__, __eps__) \
|
157
|
+
{ \
|
158
|
+
db_number yh, yl, u53; int yh_neg, yl_neg; \
|
159
|
+
yh.d = __yh__; yl.d = __yl__; \
|
160
|
+
yh_neg = (yh.i[HI] & 0x80000000); \
|
161
|
+
yl_neg = (yl.i[HI] & 0x80000000); \
|
162
|
+
yh.l = yh.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
|
163
|
+
yl.l = yl.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
|
164
|
+
u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
|
165
|
+
if(yl.d > __eps__ * u53.d){ \
|
166
|
+
if(!yl_neg) { /* The case yl==0 is filtered by the above test*/ \
|
167
|
+
/* return next up */ \
|
168
|
+
yh.d = __yh__; \
|
169
|
+
if(yh_neg) yh.l--; else yh.l++; /* Beware: fails for zero */ \
|
170
|
+
return yh.d ; \
|
171
|
+
} \
|
172
|
+
else return __yh__; \
|
173
|
+
} \
|
174
|
+
}
|
175
|
+
|
176
|
+
|
177
|
+
#define TEST_AND_RETURN_RD(__yh__, __yl__, __eps__) \
|
178
|
+
{ \
|
179
|
+
db_number yh, yl, u53; int yh_neg, yl_neg; \
|
180
|
+
yh.d = __yh__; yl.d = __yl__; \
|
181
|
+
yh_neg = (yh.i[HI] & 0x80000000); \
|
182
|
+
yl_neg = (yl.i[HI] & 0x80000000); \
|
183
|
+
yh.l = yh.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
|
184
|
+
yl.l = yl.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
|
185
|
+
u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
|
186
|
+
if(yl.d > __eps__ * u53.d){ \
|
187
|
+
if(yl_neg) { /* The case yl==0 is filtered by the above test*/ \
|
188
|
+
/* return next down */ \
|
189
|
+
yh.d = __yh__; \
|
190
|
+
if(yh_neg) yh.l++; else yh.l--; /* Beware: fails for zero */ \
|
191
|
+
return yh.d ; \
|
192
|
+
} \
|
193
|
+
else return __yh__; \
|
194
|
+
} \
|
195
|
+
}
|
196
|
+
|
197
|
+
|
198
|
+
|
199
|
+
#define TEST_AND_RETURN_RZ(__yh__, __yl__, __eps__) \
|
200
|
+
{ \
|
201
|
+
db_number yh, yl, u53; int yh_neg, yl_neg; \
|
202
|
+
yh.d = __yh__; yl.d = __yl__; \
|
203
|
+
yh_neg = (yh.i[HI] & 0x80000000); \
|
204
|
+
yl_neg = (yl.i[HI] & 0x80000000); \
|
205
|
+
yh.l = yh.l & ULL(7fffffffffffffff); /* compute the absolute value*/\
|
206
|
+
yl.l = yl.l & ULL(7fffffffffffffff); /* compute the absolute value*/\
|
207
|
+
u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
|
208
|
+
if(yl.d > __eps__ * u53.d){ \
|
209
|
+
if(yl_neg!=yh_neg) { \
|
210
|
+
yh.d = __yh__; \
|
211
|
+
yh.l--; /* Beware: fails for zero */ \
|
212
|
+
return yh.d ; \
|
213
|
+
} \
|
214
|
+
else return __yh__; \
|
215
|
+
} \
|
216
|
+
}
|
217
|
+
|
218
|
+
|
219
|
+
|
220
|
+
#define TEST_AND_COPY_RU(__cond__, __res__, __yh__, __yl__, __eps__) \
|
221
|
+
{ \
|
222
|
+
db_number yh, yl, u53; int yh_neg, yl_neg; \
|
223
|
+
yh.d = __yh__; yl.d = __yl__; \
|
224
|
+
yh_neg = (yh.i[HI] & 0x80000000); \
|
225
|
+
yl_neg = (yl.i[HI] & 0x80000000); \
|
226
|
+
yh.l = yh.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
|
227
|
+
yl.l = yl.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
|
228
|
+
u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
|
229
|
+
__cond__ = 0; \
|
230
|
+
if(yl.d > __eps__ * u53.d){ \
|
231
|
+
__cond__ = 1; \
|
232
|
+
if(!yl_neg) { /* The case yl==0 is filtered by the above test*/ \
|
233
|
+
/* return next up */ \
|
234
|
+
yh.d = __yh__; \
|
235
|
+
if(yh_neg) yh.l--; else yh.l++; /* Beware: fails for zero */ \
|
236
|
+
__res__ = yh.d ; \
|
237
|
+
} \
|
238
|
+
else { \
|
239
|
+
__res__ = __yh__; \
|
240
|
+
} \
|
241
|
+
} \
|
242
|
+
}
|
243
|
+
|
244
|
+
#define TEST_AND_COPY_RD(__cond__, __res__, __yh__, __yl__, __eps__) \
|
245
|
+
{ \
|
246
|
+
db_number yh, yl, u53; int yh_neg, yl_neg; \
|
247
|
+
yh.d = __yh__; yl.d = __yl__; \
|
248
|
+
yh_neg = (yh.i[HI] & 0x80000000); \
|
249
|
+
yl_neg = (yl.i[HI] & 0x80000000); \
|
250
|
+
yh.l = yh.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
|
251
|
+
yl.l = yl.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
|
252
|
+
u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
|
253
|
+
__cond__ = 0; \
|
254
|
+
if(yl.d > __eps__ * u53.d){ \
|
255
|
+
__cond__ = 1; \
|
256
|
+
if(yl_neg) { /* The case yl==0 is filtered by the above test*/ \
|
257
|
+
/* return next down */ \
|
258
|
+
yh.d = __yh__; \
|
259
|
+
if(yh_neg) yh.l++; else yh.l--; /* Beware: fails for zero */ \
|
260
|
+
__res__ = yh.d ; \
|
261
|
+
} \
|
262
|
+
else { \
|
263
|
+
__res__ = __yh__; \
|
264
|
+
} \
|
265
|
+
} \
|
266
|
+
}
|
267
|
+
|
268
|
+
|
269
|
+
#define TEST_AND_COPY_RZ(__cond__, __res__, __yh__, __yl__, __eps__) \
|
270
|
+
{ \
|
271
|
+
db_number yh, yl, u53; int yh_neg, yl_neg; \
|
272
|
+
yh.d = __yh__; yl.d = __yl__; \
|
273
|
+
yh_neg = (yh.i[HI] & 0x80000000); \
|
274
|
+
yl_neg = (yl.i[HI] & 0x80000000); \
|
275
|
+
yh.l = yh.l & ULL(7fffffffffffffff); /* compute the absolute value*/\
|
276
|
+
yl.l = yl.l & ULL(7fffffffffffffff); /* compute the absolute value*/\
|
277
|
+
u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
|
278
|
+
__cond__ = 0; \
|
279
|
+
if(yl.d > __eps__ * u53.d){ \
|
280
|
+
if(yl_neg!=yh_neg) { \
|
281
|
+
yh.d = __yh__; \
|
282
|
+
yh.l--; /* Beware: fails for zero */ \
|
283
|
+
__res__ = yh.d ; \
|
284
|
+
__cond__ = 1; \
|
285
|
+
} \
|
286
|
+
else { \
|
287
|
+
__res__ = __yh__; \
|
288
|
+
__cond__ = 1; \
|
289
|
+
} \
|
290
|
+
}
|
291
|
+
|
292
|
+
|
293
|
+
|
294
|
+
/* If the processor has a FMA, use it ! **/
|
295
|
+
|
296
|
+
/* All this probably works only with gcc.
|
297
|
+
See Markstein book for the case of HP's compiler */
|
298
|
+
|
299
|
+
#if defined(CRLIBM_TYPECPU_POWERPC) && defined(__GNUC__)
|
300
|
+
#define PROCESSOR_HAS_FMA 1
|
301
|
+
#undef PROCESSOR_HAS_FMA
|
302
|
+
#define FMA(a,b,c) /* r = a*b + c*/ \
|
303
|
+
({ \
|
304
|
+
double _a, _b,_c,_r; \
|
305
|
+
_a=a; _b=b;_c=c; \
|
306
|
+
__asm__ ("fmadd %0, %1, %2, %3\n ;;\n" \
|
307
|
+
: "=f"(_r) \
|
308
|
+
: "f"(_a), "f"(_b), "f"(_c) \
|
309
|
+
); \
|
310
|
+
_r; \
|
311
|
+
})
|
312
|
+
|
313
|
+
|
314
|
+
#define FMS(a,b,c) /* r = a*b - c*/ \
|
315
|
+
({ \
|
316
|
+
double _a, _b,_c,_r; \
|
317
|
+
_a=a; _b=b;_c=c; \
|
318
|
+
__asm__ ("fmsub %0, %1, %2, %3\n ;;\n" \
|
319
|
+
: "=f"(_r) \
|
320
|
+
: "f"(_a), "f"(_b), "f"(_c) \
|
321
|
+
); \
|
322
|
+
_r; \
|
323
|
+
})
|
324
|
+
|
325
|
+
#endif /* defined(CRLIBM_TYPECPU_POWERPC) && defined(__GCC__) */
|
326
|
+
|
327
|
+
|
328
|
+
|
329
|
+
|
330
|
+
/* On the Itanium 1 / gcc3.2 we lose 10 cycles when using the FMA !?!
|
331
|
+
It probably breaks the scheduling algorithms somehow...
|
332
|
+
To test again with higher gcc versions
|
333
|
+
*/
|
334
|
+
|
335
|
+
#if defined(CRLIBM_TYPECPU_ITANIUM) && defined(__GNUC__) && !defined(__INTEL_COMPILER) && 0
|
336
|
+
#define PROCESSOR_HAS_FMA 1
|
337
|
+
#define FMA(a,b,c) /* r = a*b + c*/ \
|
338
|
+
({ \
|
339
|
+
double _a, _b,_c,_r; \
|
340
|
+
_a=a; _b=b;_c=c; \
|
341
|
+
__asm__ ("fma %0 = %1, %2, %3\n ;;\n" \
|
342
|
+
: "=f"(_r) \
|
343
|
+
: "f"(_a), "f"(_b), "f"(_c) \
|
344
|
+
); \
|
345
|
+
_r; \
|
346
|
+
})
|
347
|
+
|
348
|
+
|
349
|
+
#define FMS(a,b,c) /* r = a*b - c*/ \
|
350
|
+
({ \
|
351
|
+
double _a, _b, _c, _r; \
|
352
|
+
_a=a; _b=b;_c=c; \
|
353
|
+
__asm__ ("fms %0 = %1, %2, %3\n ;;\n" \
|
354
|
+
: "=f"(_r) \
|
355
|
+
: "f"(_a), "f"(_b), "f"(_c) \
|
356
|
+
); \
|
357
|
+
_r; \
|
358
|
+
})
|
359
|
+
#endif /* defined(CRLIBM_TYPECPU_ITANIUM) && defined(__GCC__) && !defined(__INTEL_COMPILER) */
|
360
|
+
|
361
|
+
|
362
|
+
|
363
|
+
|
364
|
+
#if defined(CRLIBM_TYPECPU_ITANIUM) && defined(__INTEL_COMPILER)
|
365
|
+
#define PROCESSOR_HAS_FMA 1
|
366
|
+
#if 0 /* Commented out because it shouldn't be there: There should be
|
367
|
+
a standard #include doing all this, but as of april 2005
|
368
|
+
it doesn't exist, say intel people). Leave
|
369
|
+
it as documentation, though, until it is replaced by #include
|
370
|
+
*/
|
371
|
+
/* Table 1-17: legal floating-point precision completers (.pc) */
|
372
|
+
typedef enum {
|
373
|
+
_PC_S = 1 /* single .s */
|
374
|
+
,_PC_D = 2 /* double .d */
|
375
|
+
,_PC_NONE = 3 /* dynamic */
|
376
|
+
} _Asm_pc;
|
377
|
+
|
378
|
+
/* Table 1-22: legal getf/setf floating-point register access completers */
|
379
|
+
typedef enum {
|
380
|
+
_FR_S = 1 /* single form .s */
|
381
|
+
,_FR_D = 2 /* double form .d */
|
382
|
+
,_FR_EXP = 3 /* exponent form .exp */
|
383
|
+
,_FR_SIG = 4 /* significand form .sig */
|
384
|
+
} _Asm_fr_access;
|
385
|
+
|
386
|
+
/* Table 1-24: legal floating-point FPSR status field completers (.sf) */
|
387
|
+
typedef enum {
|
388
|
+
_SF0 = 0 /* FPSR status field 0 .s0 */
|
389
|
+
,_SF1 = 1 /* FPSR status field 1 .s1 */
|
390
|
+
,_SF2 = 2 /* FPSR status field 2 .s2 */
|
391
|
+
,_SF3 = 3 /* FPSR status field 3 .s3 */
|
392
|
+
} _Asm_sf;
|
393
|
+
#endif
|
394
|
+
|
395
|
+
#define FMA(a,b,c) /* r = a*b + c*/ \
|
396
|
+
_Asm_fma( 2/*_PC_D*/, a, b, c, 0/*_SF0*/ );
|
397
|
+
|
398
|
+
|
399
|
+
#define FMS(a,b,c) /* r = a*b - c*/ \
|
400
|
+
_Asm_fms( 2/*_PC_D*/, a, b, c, 0/*_SF0*/);
|
401
|
+
|
402
|
+
#endif /*defined(CRLIBM_TYPECPU_ITANIUM) && defined(__INTEL_COMPILER)*/
|
403
|
+
|
404
|
+
|
405
|
+
|
406
|
+
|
407
|
+
|
408
|
+
|
409
|
+
|
410
|
+
|
411
|
+
#ifdef WORDS_BIGENDIAN
|
412
|
+
#define DB_ONE {{0x3ff00000, 0x00000000}}
|
413
|
+
#else
|
414
|
+
#define DB_ONE {{0x00000000 ,0x3ff00000}}
|
415
|
+
#endif
|
416
|
+
|
417
|
+
|
418
|
+
|
419
|
+
|
420
|
+
|
421
|
+
|
422
|
+
extern const scs scs_zer, scs_half, scs_one, scs_two, scs_sixinv;
|
423
|
+
|
424
|
+
|
425
|
+
#define SCS_ZERO (scs_ptr)(&scs_zer)
|
426
|
+
#define SCS_HALF (scs_ptr)(&scs_half)
|
427
|
+
#define SCS_ONE (scs_ptr)(&scs_one)
|
428
|
+
#define SCS_TWO (scs_ptr)(&scs_two)
|
429
|
+
#define SCS_SIXINV (scs_ptr)(&scs_sixinv)
|
430
|
+
|
431
|
+
|
432
|
+
|
433
|
+
|
434
|
+
|
435
|
+
#define ABS(x) (((x)>0) ? (x) : (-(x)))
|
436
|
+
|
437
|
+
|
438
|
+
|
439
|
+
|
440
|
+
|
441
|
+
/*
|
442
|
+
* In the following, when an operator is preceded by a '@' it means that we
|
443
|
+
* are considering the IEEE-compliant machine operator, otherwise it
|
444
|
+
* is the mathematical operator.
|
445
|
+
*
|
446
|
+
*/
|
447
|
+
|
448
|
+
|
449
|
+
/*
|
450
|
+
* computes s and r such that s + r = a + b, with s = a @+ b exactly
|
451
|
+
*/
|
452
|
+
#define Add12Cond(s, r, a, b) \
|
453
|
+
{double _z, _a=a, _b=b; \
|
454
|
+
s = _a + _b; \
|
455
|
+
if (ABS(a) > ABS(b)){ \
|
456
|
+
_z = s - _a; \
|
457
|
+
r = _b - _z; \
|
458
|
+
}else { \
|
459
|
+
_z = s - _b; \
|
460
|
+
r = _a - _z;}}
|
461
|
+
|
462
|
+
/*
|
463
|
+
* computes s and r such that s + r = a + b, with s = a @+ b exactly
|
464
|
+
* under the condition a >= b
|
465
|
+
*/
|
466
|
+
#define Add12(s, r, a, b) \
|
467
|
+
{double _z, _a=a, _b=b; \
|
468
|
+
s = _a + _b; \
|
469
|
+
_z = s - _a; \
|
470
|
+
r = _b - _z; }
|
471
|
+
|
472
|
+
|
473
|
+
/*
|
474
|
+
* computes r1, r2, r3 such that r1 + r2 + r3 = a + b + c exactly
|
475
|
+
*/
|
476
|
+
#define Fast3Sum(r1, r2, r3, a, b, c) \
|
477
|
+
{double u, v, w; \
|
478
|
+
Fast2Sum(u, v, b, c); \
|
479
|
+
Fast2Sum(r1, w, a, u); \
|
480
|
+
Fast2Sum(r2, r3, w, v); }
|
481
|
+
|
482
|
+
|
483
|
+
|
484
|
+
|
485
|
+
|
486
|
+
|
487
|
+
|
488
|
+
/*
|
489
|
+
* Functions to computes double-double addition: zh+zl = xh+xl + yh+yl
|
490
|
+
* knowing that xh>yh
|
491
|
+
* relative error is smaller than 2^-103
|
492
|
+
*/
|
493
|
+
|
494
|
+
|
495
|
+
#if ADD22_AS_FUNCTIONS
|
496
|
+
extern void Add22(double *zh, double *zl, double xh, double xl, double yh, double yl);
|
497
|
+
extern void Add22Cond(double *zh, double *zl, double xh, double xl, double yh, double yl);
|
498
|
+
|
499
|
+
#else /* ADD22_AS_FUNCTIONS */
|
500
|
+
|
501
|
+
#define Add22Cond(zh,zl,xh,xl,yh,yl) \
|
502
|
+
do { \
|
503
|
+
double _r,_s; \
|
504
|
+
_r = (xh)+(yh); \
|
505
|
+
_s = ((ABS(xh)) > (ABS(yh)))? ((xh)-_r+(yh)+(yl)+(xl)) : ((yh)-_r+(xh)+(xl)+(yl)); \
|
506
|
+
*zh = _r+_s; \
|
507
|
+
*zl = (_r - (*zh)) + _s; \
|
508
|
+
} while(2+2==5)
|
509
|
+
|
510
|
+
|
511
|
+
|
512
|
+
#define Add22(zh,zl,xh,xl,yh,yl) \
|
513
|
+
do { \
|
514
|
+
double _r,_s; \
|
515
|
+
_r = (xh)+(yh); \
|
516
|
+
_s = ((((xh)-_r) +(yh)) + (yl)) + (xl); \
|
517
|
+
*zh = _r+_s; \
|
518
|
+
*zl = (_r - (*zh)) + _s; \
|
519
|
+
} while(0)
|
520
|
+
|
521
|
+
#endif /* ADD22_AS_FUNCTIONS */
|
522
|
+
|
523
|
+
|
524
|
+
|
525
|
+
#ifdef PROCESSOR_HAS_FMA
|
526
|
+
/* One of the nice things with the fused multiply-and-add is that it
|
527
|
+
greatly simplifies the double-double multiplications : */
|
528
|
+
#define Mul12(rh,rl,u,v) \
|
529
|
+
{ \
|
530
|
+
*rh = u*v; \
|
531
|
+
*rl = FMS(u,v, *rh); \
|
532
|
+
}
|
533
|
+
|
534
|
+
#define Mul22(pzh,pzl, xh,xl, yh,yl) \
|
535
|
+
{ \
|
536
|
+
double ph, pl; \
|
537
|
+
ph = xh*yh; \
|
538
|
+
pl = FMS(xh, yh, ph); \
|
539
|
+
pl = FMA(xh,yl, pl); \
|
540
|
+
pl = FMA(xl,yh,pl); \
|
541
|
+
*pzh = ph+pl; \
|
542
|
+
*pzl = ph - (*pzh); \
|
543
|
+
*pzl += pl; \
|
544
|
+
}
|
545
|
+
|
546
|
+
|
547
|
+
/* besides we don't care anymore about overflows in the mult */
|
548
|
+
#define Mul12Cond Mul12
|
549
|
+
#define Mul22cond Mul22
|
550
|
+
|
551
|
+
|
552
|
+
#else /* ! PROCESSOR_HAS_FMA */
|
553
|
+
|
554
|
+
|
555
|
+
#if DEKKER_AS_FUNCTIONS
|
556
|
+
extern void Mul12(double *rh, double *rl, double u, double v);
|
557
|
+
extern void Mul12Cond(double *rh, double *rl, double a, double b);
|
558
|
+
extern void Mul22(double *zh, double *zl, double xh, double xl, double yh, double yl);
|
559
|
+
#else /* if DEKKER_AS_FUNCTIONS */
|
560
|
+
/*
|
561
|
+
* computes rh and rl such that rh + rl = a * b with rh = a @* b exactly
|
562
|
+
* under the conditions : a < 2^970 et b < 2^970
|
563
|
+
*/
|
564
|
+
#define Mul12(rh,rl,u,v) \
|
565
|
+
{ \
|
566
|
+
const double c = 134217729.; /* 2^27 +1 */ \
|
567
|
+
double up, u1, u2, vp, v1, v2; \
|
568
|
+
double _u =u, _v=v; \
|
569
|
+
\
|
570
|
+
up = _u*c; vp = _v*c; \
|
571
|
+
u1 = (_u-up)+up; v1 = (_v-vp)+vp; \
|
572
|
+
u2 = _u-u1; v2 = _v-v1; \
|
573
|
+
\
|
574
|
+
*rh = _u*_v; \
|
575
|
+
*rl = (((u1*v1-*rh)+(u1*v2))+(u2*v1))+(u2*v2);\
|
576
|
+
}
|
577
|
+
|
578
|
+
|
579
|
+
/*
|
580
|
+
* Computes rh and rl such that rh + rl = a * b and rh = a @* b exactly
|
581
|
+
*/
|
582
|
+
#define Mul12Cond(rh, rl, a, b) \
|
583
|
+
{\
|
584
|
+
const double two_em53 = 1.1102230246251565404e-16; /* 0x3CA00000, 0x00000000 */\
|
585
|
+
const double two_e53 = 9007199254740992.; /* 0x43400000, 0x00000000 */\
|
586
|
+
double u, v; \
|
587
|
+
db_number _a=a, _b=b; \
|
588
|
+
\
|
589
|
+
if (_a.i[HI]>0x7C900000) u = _a*two_em53; \
|
590
|
+
else u = _a; \
|
591
|
+
if (_b.i[HI]>0x7C900000) v = _b*two_em53; \
|
592
|
+
else v = _b; \
|
593
|
+
\
|
594
|
+
Mul12(rh, rl, u, v); \
|
595
|
+
\
|
596
|
+
if (_a.i[HI]>0x7C900000) {*rh *= two_e53; *rl *= two_e53;} \
|
597
|
+
if (_b.i[HI]>0x7C900000) {*rh *= two_e53; *rl *= two_e53;} \
|
598
|
+
}
|
599
|
+
|
600
|
+
|
601
|
+
|
602
|
+
/*
|
603
|
+
* computes double-double multiplication: zh+zl = (xh+xl) * (yh+yl)
|
604
|
+
* relative error is smaller than 2^-102
|
605
|
+
*/
|
606
|
+
|
607
|
+
|
608
|
+
|
609
|
+
#define Mul22(zh,zl,xh,xl,yh,yl) \
|
610
|
+
{ \
|
611
|
+
double mh, ml; \
|
612
|
+
\
|
613
|
+
const double c = 134217729.; \
|
614
|
+
double up, u1, u2, vp, v1, v2; \
|
615
|
+
\
|
616
|
+
up = (xh)*c; vp = (yh)*c; \
|
617
|
+
u1 = ((xh)-up)+up; v1 = ((yh)-vp)+vp; \
|
618
|
+
u2 = (xh)-u1; v2 = (yh)-v1; \
|
619
|
+
\
|
620
|
+
mh = (xh)*(yh); \
|
621
|
+
ml = (((u1*v1-mh)+(u1*v2))+(u2*v1))+(u2*v2); \
|
622
|
+
\
|
623
|
+
ml += (xh)*(yl) + (xl)*(yh); \
|
624
|
+
*zh = mh+ml; \
|
625
|
+
*zl = mh - (*zh) + ml; \
|
626
|
+
}
|
627
|
+
|
628
|
+
|
629
|
+
|
630
|
+
#endif /* DEKKER_AS_FUNCTIONS */
|
631
|
+
|
632
|
+
#endif /* PROCESSOR_HAS_FMA */
|
633
|
+
|
634
|
+
|
635
|
+
|
636
|
+
/* In the following the one-line computation of _cl was split so that
|
637
|
+
icc(8.1) would compile it properly. It's a bug of icc */
|
638
|
+
|
639
|
+
#if DEKKER_AS_FUNCTIONS
|
640
|
+
extern void Div22(double *z, double *zz, double x, double xx, double y, double yy);
|
641
|
+
#else
|
642
|
+
#define Div22(pzh,pzl,xh,xl,yh,yl) { \
|
643
|
+
double _ch,_cl,_uh,_ul; \
|
644
|
+
_ch=(xh)/(yh); Mul12(&_uh,&_ul,_ch,(yh)); \
|
645
|
+
_cl=((xh)-_uh); \
|
646
|
+
_cl -= _ul; \
|
647
|
+
_cl += (xl); \
|
648
|
+
_cl -= _ch*(yl); \
|
649
|
+
_cl /= (yh); \
|
650
|
+
*pzh=_ch+_cl; *pzl=(_ch-(*pzh))+_cl; \
|
651
|
+
}
|
652
|
+
#endif /* DEKKER_AS_FUNCTIONS */
|
653
|
+
|
654
|
+
|
655
|
+
|
656
|
+
|
657
|
+
|
658
|
+
#endif /*CRLIBM_PRIVATE_H*/
|