intervals 0.3.56

Sign up to get free protection for your applications and to get access to all the features.
Files changed (131) hide show
  1. data/VERSION.txt +1 -0
  2. data/ext/crlibm/AUTHORS +2 -0
  3. data/ext/crlibm/COPYING +504 -0
  4. data/ext/crlibm/ChangeLog +80 -0
  5. data/ext/crlibm/INSTALL +182 -0
  6. data/ext/crlibm/Makefile.am +84 -0
  7. data/ext/crlibm/Makefile.in +530 -0
  8. data/ext/crlibm/NEWS +0 -0
  9. data/ext/crlibm/README +31 -0
  10. data/ext/crlibm/TODO +47 -0
  11. data/ext/crlibm/VERSION +1 -0
  12. data/ext/crlibm/aclocal.m4 +989 -0
  13. data/ext/crlibm/atan-itanium.c +846 -0
  14. data/ext/crlibm/atan-pentium.c +261 -0
  15. data/ext/crlibm/atan_accurate.c +244 -0
  16. data/ext/crlibm/atan_accurate.h +191 -0
  17. data/ext/crlibm/atan_fast.c +324 -0
  18. data/ext/crlibm/atan_fast.h +678 -0
  19. data/ext/crlibm/config.guess +1461 -0
  20. data/ext/crlibm/config.sub +1566 -0
  21. data/ext/crlibm/configure +7517 -0
  22. data/ext/crlibm/configure.ac +364 -0
  23. data/ext/crlibm/crlibm.h +125 -0
  24. data/ext/crlibm/crlibm_config.h +149 -0
  25. data/ext/crlibm/crlibm_config.h.in +148 -0
  26. data/ext/crlibm/crlibm_private.c +293 -0
  27. data/ext/crlibm/crlibm_private.h +658 -0
  28. data/ext/crlibm/csh_fast.c +631 -0
  29. data/ext/crlibm/csh_fast.h +771 -0
  30. data/ext/crlibm/double-extended.h +496 -0
  31. data/ext/crlibm/exp-td.c +962 -0
  32. data/ext/crlibm/exp-td.h +685 -0
  33. data/ext/crlibm/exp_accurate.c +197 -0
  34. data/ext/crlibm/exp_accurate.h +85 -0
  35. data/ext/crlibm/gappa/log-de-E0-logir0.gappa +106 -0
  36. data/ext/crlibm/gappa/log-de-E0.gappa +79 -0
  37. data/ext/crlibm/gappa/log-de.gappa +81 -0
  38. data/ext/crlibm/gappa/log-td-E0-logir0.gappa +126 -0
  39. data/ext/crlibm/gappa/log-td-E0.gappa +143 -0
  40. data/ext/crlibm/gappa/log-td-accurate-E0-logir0.gappa +230 -0
  41. data/ext/crlibm/gappa/log-td-accurate-E0.gappa +213 -0
  42. data/ext/crlibm/gappa/log-td-accurate.gappa +217 -0
  43. data/ext/crlibm/gappa/log-td.gappa +156 -0
  44. data/ext/crlibm/gappa/trigoSinCosCase3.gappa +204 -0
  45. data/ext/crlibm/gappa/trigoTanCase2.gappa +73 -0
  46. data/ext/crlibm/install-sh +269 -0
  47. data/ext/crlibm/log-de.c +431 -0
  48. data/ext/crlibm/log-de.h +732 -0
  49. data/ext/crlibm/log-td.c +852 -0
  50. data/ext/crlibm/log-td.h +819 -0
  51. data/ext/crlibm/log10-td.c +906 -0
  52. data/ext/crlibm/log10-td.h +823 -0
  53. data/ext/crlibm/log2-td.c +935 -0
  54. data/ext/crlibm/log2-td.h +821 -0
  55. data/ext/crlibm/maple/atan.mpl +359 -0
  56. data/ext/crlibm/maple/common-procedures.mpl +997 -0
  57. data/ext/crlibm/maple/csh.mpl +446 -0
  58. data/ext/crlibm/maple/double-extended.mpl +151 -0
  59. data/ext/crlibm/maple/exp-td.mpl +195 -0
  60. data/ext/crlibm/maple/log-de.mpl +243 -0
  61. data/ext/crlibm/maple/log-td.mpl +316 -0
  62. data/ext/crlibm/maple/log10-td.mpl +345 -0
  63. data/ext/crlibm/maple/log2-td.mpl +334 -0
  64. data/ext/crlibm/maple/trigo.mpl +728 -0
  65. data/ext/crlibm/maple/triple-double.mpl +58 -0
  66. data/ext/crlibm/missing +198 -0
  67. data/ext/crlibm/mkinstalldirs +40 -0
  68. data/ext/crlibm/rem_pio2_accurate.c +219 -0
  69. data/ext/crlibm/rem_pio2_accurate.h +53 -0
  70. data/ext/crlibm/scs_lib/AUTHORS +3 -0
  71. data/ext/crlibm/scs_lib/COPYING +504 -0
  72. data/ext/crlibm/scs_lib/ChangeLog +16 -0
  73. data/ext/crlibm/scs_lib/INSTALL +215 -0
  74. data/ext/crlibm/scs_lib/Makefile.am +18 -0
  75. data/ext/crlibm/scs_lib/Makefile.in +328 -0
  76. data/ext/crlibm/scs_lib/NEWS +0 -0
  77. data/ext/crlibm/scs_lib/README +9 -0
  78. data/ext/crlibm/scs_lib/TODO +4 -0
  79. data/ext/crlibm/scs_lib/addition_scs.c +623 -0
  80. data/ext/crlibm/scs_lib/config.guess +1461 -0
  81. data/ext/crlibm/scs_lib/config.sub +1566 -0
  82. data/ext/crlibm/scs_lib/configure +6226 -0
  83. data/ext/crlibm/scs_lib/division_scs.c +110 -0
  84. data/ext/crlibm/scs_lib/double2scs.c +174 -0
  85. data/ext/crlibm/scs_lib/install-sh +269 -0
  86. data/ext/crlibm/scs_lib/missing +198 -0
  87. data/ext/crlibm/scs_lib/mkinstalldirs +40 -0
  88. data/ext/crlibm/scs_lib/multiplication_scs.c +456 -0
  89. data/ext/crlibm/scs_lib/poly_fct.c +112 -0
  90. data/ext/crlibm/scs_lib/print_scs.c +73 -0
  91. data/ext/crlibm/scs_lib/rand_scs.c +63 -0
  92. data/ext/crlibm/scs_lib/scs.h +353 -0
  93. data/ext/crlibm/scs_lib/scs2double.c +391 -0
  94. data/ext/crlibm/scs_lib/scs2mpf.c +58 -0
  95. data/ext/crlibm/scs_lib/scs2mpfr.c +61 -0
  96. data/ext/crlibm/scs_lib/scs_private.c +23 -0
  97. data/ext/crlibm/scs_lib/scs_private.h +133 -0
  98. data/ext/crlibm/scs_lib/tests/tbx_timing.h +102 -0
  99. data/ext/crlibm/scs_lib/wrapper_scs.h +486 -0
  100. data/ext/crlibm/scs_lib/zero_scs.c +52 -0
  101. data/ext/crlibm/stamp-h.in +1 -0
  102. data/ext/crlibm/tests/Makefile.am +43 -0
  103. data/ext/crlibm/tests/Makefile.in +396 -0
  104. data/ext/crlibm/tests/blind_test.c +148 -0
  105. data/ext/crlibm/tests/generate_test_vectors.c +258 -0
  106. data/ext/crlibm/tests/soak_test.c +334 -0
  107. data/ext/crlibm/tests/test_common.c +627 -0
  108. data/ext/crlibm/tests/test_common.h +28 -0
  109. data/ext/crlibm/tests/test_perf.c +570 -0
  110. data/ext/crlibm/tests/test_val.c +249 -0
  111. data/ext/crlibm/trigo_accurate.c +500 -0
  112. data/ext/crlibm/trigo_accurate.h +331 -0
  113. data/ext/crlibm/trigo_fast.c +1219 -0
  114. data/ext/crlibm/trigo_fast.h +639 -0
  115. data/ext/crlibm/triple-double.h +878 -0
  116. data/ext/extconf.rb +31 -0
  117. data/ext/fpu.c +107 -0
  118. data/ext/jamis-mod.rb +591 -0
  119. data/lib/fpu.rb +287 -0
  120. data/lib/interval.rb +1170 -0
  121. data/lib/intervals.rb +212 -0
  122. data/lib/struct_float.rb +133 -0
  123. data/test/data_atan.txt +360 -0
  124. data/test/data_cos.txt +346 -0
  125. data/test/data_cosh.txt +3322 -0
  126. data/test/data_exp.txt +3322 -0
  127. data/test/data_log.txt +141 -0
  128. data/test/data_sin.txt +140 -0
  129. data/test/data_sinh.txt +3322 -0
  130. data/test/data_tan.txt +342 -0
  131. metadata +186 -0
@@ -0,0 +1,658 @@
1
+ /*
2
+ * crlibm_private.h
3
+ *
4
+ * This file contains useful tools and data for the crlibm functions.
5
+ *
6
+ */
7
+
8
+ #ifndef CRLIBM_PRIVATE_H
9
+ #define CRLIBM_PRIVATE_H 1
10
+
11
+ #include "scs_lib/scs.h"
12
+ #include "scs_lib/scs_private.h"
13
+
14
+ #ifdef HAVE_CONFIG_H
15
+ #include "crlibm_config.h"
16
+ #endif
17
+ /* otherwise CMake is used, and defines all the useful variables using -D switch */
18
+
19
+ #ifdef HAVE_INTTYPES_H
20
+ #include <inttypes.h>
21
+ #endif
22
+
23
+
24
+
25
+ #if (defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64))
26
+ # ifndef CRLIBM_TYPEOS_BSD
27
+ # include <fpu_control.h>
28
+ # ifndef _FPU_SETCW
29
+ # define _FPU_SETCW(cw) __asm__ ("fldcw %0" : : "m" (*&cw))
30
+ # endif
31
+ # ifndef _FPU_GETCW
32
+ # define _FPU_GETCW(cw) __asm__ ("fnstcw %0" : "=m" (*&cw))
33
+ # endif
34
+ # endif
35
+ #endif
36
+
37
+ /* 64 bit arithmetic may be standardised, but people still do want they want */
38
+ #ifdef HAVE_INTTYPES_H
39
+ #define ULL(bits) 0x##bits##uLL
40
+ #elif defined(_WIN32)
41
+ /* Windows garbage there */
42
+ typedef long long int int64_t;
43
+ typedef unsigned long long int uint64_t;
44
+ #define ULL(bits) 0x##bits##i64
45
+ /* Default, hoping it works, hopefully less and less relevant */
46
+ #else
47
+ typedef long long int int64_t;
48
+ typedef unsigned long long int uint64_t;
49
+ #define ULL(bits) 0x##bits##uLL
50
+ #endif
51
+
52
+ #ifndef SCS_DEF_INT64
53
+ #define SCS_DEF_INT64
54
+ #ifdef CRLIBM_TYPEOS_HPUX
55
+ #ifndef __LP64__ /* To solve the problem with 64 bits integer on HPPA */
56
+ typedef long long int64_t;
57
+ typedef unsigned long long uint64_t;
58
+ #define ULL(bits) 0x##bits##uLL
59
+ #endif
60
+ #endif
61
+ #endif
62
+
63
+
64
+
65
+
66
+ /* The Add22 and Add22 functions, as well as double-double
67
+ multiplications of the Dekker family may be either defined as
68
+ functions, or as #defines. Which one is better depends on the
69
+ processor/compiler/OS. As #define has to be used with more care (not
70
+ type-safe), the two following variables should be set to 1 in the
71
+ development/debugging phase, until no type warning remains.
72
+
73
+ */
74
+
75
+ #define ADD22_AS_FUNCTIONS 0
76
+ #define DEKKER_AS_FUNCTIONS 0
77
+
78
+
79
+
80
+ /* setting the following variable adds variables and code for
81
+ monitoring the performance.
82
+ Note that sometimes only round to nearest is instrumented */
83
+ #define EVAL_PERF 1
84
+
85
+
86
+ #if EVAL_PERF==1
87
+ /* counter of calls to the second step (accurate step) */
88
+ extern int crlibm_second_step_taken;
89
+ #endif
90
+
91
+
92
+
93
+ /* The prototypes of the second steps */
94
+ extern void exp_SC(scs_ptr res_scs, double x);
95
+ extern double scs_exp_rn(double);
96
+ extern double scs_exp_ru(double);
97
+ extern double scs_exp_rd(double);
98
+
99
+ extern void scs_log(scs_ptr,db_number, int);
100
+
101
+ extern double scs_atan_rn(double);
102
+ extern double scs_atan_rd(double);
103
+ extern double scs_atan_ru(double);
104
+
105
+ extern double scs_sin_rn(double);
106
+ extern double scs_sin_ru(double);
107
+ extern double scs_sin_rd(double);
108
+ extern double scs_sin_rz(double);
109
+ extern double scs_cos_rn(double);
110
+ extern double scs_cos_ru(double);
111
+ extern double scs_cos_rd(double);
112
+ extern double scs_cos_rz(double);
113
+ extern double scs_tan_rn(double);
114
+ extern double scs_tan_rd(double);
115
+ extern double scs_tan_ru(double);
116
+ extern double scs_tan_rz(double);
117
+
118
+ extern int rem_pio2_scs(scs_ptr, scs_ptr);
119
+
120
+ /*
121
+ * i = d in rounding to nearest
122
+ The constant added is 2^52 + 2^51
123
+ */
124
+ #define DOUBLE2INT(_i, _d) \
125
+ {db_number _t; \
126
+ _t.d = (_d+6755399441055744.0); \
127
+ _i = _t.i[LO];}
128
+
129
+
130
+ /* Same idea but beware: works only for |_i| < 2^51 -1 */
131
+ #define DOUBLE2LONGINT(_i, _d) \
132
+ { \
133
+ db_number _t; \
134
+ _t.d = (_d+6755399441055744.0); \
135
+ if (_d >= 0) /* sign extend */ \
136
+ _i = _t.l & ULL(0007FFFFFFFFFFFF); \
137
+ else \
138
+ _i = (_t.l & ULL(0007FFFFFFFFFFFF)) | (ULL(FFF8000000000000)); \
139
+ }
140
+
141
+
142
+
143
+
144
+
145
+ /* Macros for the rounding tests in directed modes */
146
+ /* After Evgeny Gvozdev pointed out a bug in the rounding procedures I
147
+ decided to centralize them here
148
+
149
+ Note that these tests launch the accurate phase when yl=0, in
150
+ particular in the exceptional cases when the image of a double is a
151
+ double. See the chapter about the log for an example
152
+
153
+ */
154
+
155
+
156
+ #define TEST_AND_RETURN_RU(__yh__, __yl__, __eps__) \
157
+ { \
158
+ db_number yh, yl, u53; int yh_neg, yl_neg; \
159
+ yh.d = __yh__; yl.d = __yl__; \
160
+ yh_neg = (yh.i[HI] & 0x80000000); \
161
+ yl_neg = (yl.i[HI] & 0x80000000); \
162
+ yh.l = yh.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
163
+ yl.l = yl.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
164
+ u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
165
+ if(yl.d > __eps__ * u53.d){ \
166
+ if(!yl_neg) { /* The case yl==0 is filtered by the above test*/ \
167
+ /* return next up */ \
168
+ yh.d = __yh__; \
169
+ if(yh_neg) yh.l--; else yh.l++; /* Beware: fails for zero */ \
170
+ return yh.d ; \
171
+ } \
172
+ else return __yh__; \
173
+ } \
174
+ }
175
+
176
+
177
+ #define TEST_AND_RETURN_RD(__yh__, __yl__, __eps__) \
178
+ { \
179
+ db_number yh, yl, u53; int yh_neg, yl_neg; \
180
+ yh.d = __yh__; yl.d = __yl__; \
181
+ yh_neg = (yh.i[HI] & 0x80000000); \
182
+ yl_neg = (yl.i[HI] & 0x80000000); \
183
+ yh.l = yh.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
184
+ yl.l = yl.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
185
+ u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
186
+ if(yl.d > __eps__ * u53.d){ \
187
+ if(yl_neg) { /* The case yl==0 is filtered by the above test*/ \
188
+ /* return next down */ \
189
+ yh.d = __yh__; \
190
+ if(yh_neg) yh.l++; else yh.l--; /* Beware: fails for zero */ \
191
+ return yh.d ; \
192
+ } \
193
+ else return __yh__; \
194
+ } \
195
+ }
196
+
197
+
198
+
199
+ #define TEST_AND_RETURN_RZ(__yh__, __yl__, __eps__) \
200
+ { \
201
+ db_number yh, yl, u53; int yh_neg, yl_neg; \
202
+ yh.d = __yh__; yl.d = __yl__; \
203
+ yh_neg = (yh.i[HI] & 0x80000000); \
204
+ yl_neg = (yl.i[HI] & 0x80000000); \
205
+ yh.l = yh.l & ULL(7fffffffffffffff); /* compute the absolute value*/\
206
+ yl.l = yl.l & ULL(7fffffffffffffff); /* compute the absolute value*/\
207
+ u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
208
+ if(yl.d > __eps__ * u53.d){ \
209
+ if(yl_neg!=yh_neg) { \
210
+ yh.d = __yh__; \
211
+ yh.l--; /* Beware: fails for zero */ \
212
+ return yh.d ; \
213
+ } \
214
+ else return __yh__; \
215
+ } \
216
+ }
217
+
218
+
219
+
220
+ #define TEST_AND_COPY_RU(__cond__, __res__, __yh__, __yl__, __eps__) \
221
+ { \
222
+ db_number yh, yl, u53; int yh_neg, yl_neg; \
223
+ yh.d = __yh__; yl.d = __yl__; \
224
+ yh_neg = (yh.i[HI] & 0x80000000); \
225
+ yl_neg = (yl.i[HI] & 0x80000000); \
226
+ yh.l = yh.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
227
+ yl.l = yl.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
228
+ u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
229
+ __cond__ = 0; \
230
+ if(yl.d > __eps__ * u53.d){ \
231
+ __cond__ = 1; \
232
+ if(!yl_neg) { /* The case yl==0 is filtered by the above test*/ \
233
+ /* return next up */ \
234
+ yh.d = __yh__; \
235
+ if(yh_neg) yh.l--; else yh.l++; /* Beware: fails for zero */ \
236
+ __res__ = yh.d ; \
237
+ } \
238
+ else { \
239
+ __res__ = __yh__; \
240
+ } \
241
+ } \
242
+ }
243
+
244
+ #define TEST_AND_COPY_RD(__cond__, __res__, __yh__, __yl__, __eps__) \
245
+ { \
246
+ db_number yh, yl, u53; int yh_neg, yl_neg; \
247
+ yh.d = __yh__; yl.d = __yl__; \
248
+ yh_neg = (yh.i[HI] & 0x80000000); \
249
+ yl_neg = (yl.i[HI] & 0x80000000); \
250
+ yh.l = yh.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
251
+ yl.l = yl.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
252
+ u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
253
+ __cond__ = 0; \
254
+ if(yl.d > __eps__ * u53.d){ \
255
+ __cond__ = 1; \
256
+ if(yl_neg) { /* The case yl==0 is filtered by the above test*/ \
257
+ /* return next down */ \
258
+ yh.d = __yh__; \
259
+ if(yh_neg) yh.l++; else yh.l--; /* Beware: fails for zero */ \
260
+ __res__ = yh.d ; \
261
+ } \
262
+ else { \
263
+ __res__ = __yh__; \
264
+ } \
265
+ } \
266
+ }
267
+
268
+
269
+ #define TEST_AND_COPY_RZ(__cond__, __res__, __yh__, __yl__, __eps__) \
270
+ { \
271
+ db_number yh, yl, u53; int yh_neg, yl_neg; \
272
+ yh.d = __yh__; yl.d = __yl__; \
273
+ yh_neg = (yh.i[HI] & 0x80000000); \
274
+ yl_neg = (yl.i[HI] & 0x80000000); \
275
+ yh.l = yh.l & ULL(7fffffffffffffff); /* compute the absolute value*/\
276
+ yl.l = yl.l & ULL(7fffffffffffffff); /* compute the absolute value*/\
277
+ u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
278
+ __cond__ = 0; \
279
+ if(yl.d > __eps__ * u53.d){ \
280
+ if(yl_neg!=yh_neg) { \
281
+ yh.d = __yh__; \
282
+ yh.l--; /* Beware: fails for zero */ \
283
+ __res__ = yh.d ; \
284
+ __cond__ = 1; \
285
+ } \
286
+ else { \
287
+ __res__ = __yh__; \
288
+ __cond__ = 1; \
289
+ } \
290
+ }
291
+
292
+
293
+
294
+ /* If the processor has a FMA, use it ! **/
295
+
296
+ /* All this probably works only with gcc.
297
+ See Markstein book for the case of HP's compiler */
298
+
299
+ #if defined(CRLIBM_TYPECPU_POWERPC) && defined(__GNUC__)
300
+ #define PROCESSOR_HAS_FMA 1
301
+ #undef PROCESSOR_HAS_FMA
302
+ #define FMA(a,b,c) /* r = a*b + c*/ \
303
+ ({ \
304
+ double _a, _b,_c,_r; \
305
+ _a=a; _b=b;_c=c; \
306
+ __asm__ ("fmadd %0, %1, %2, %3\n ;;\n" \
307
+ : "=f"(_r) \
308
+ : "f"(_a), "f"(_b), "f"(_c) \
309
+ ); \
310
+ _r; \
311
+ })
312
+
313
+
314
+ #define FMS(a,b,c) /* r = a*b - c*/ \
315
+ ({ \
316
+ double _a, _b,_c,_r; \
317
+ _a=a; _b=b;_c=c; \
318
+ __asm__ ("fmsub %0, %1, %2, %3\n ;;\n" \
319
+ : "=f"(_r) \
320
+ : "f"(_a), "f"(_b), "f"(_c) \
321
+ ); \
322
+ _r; \
323
+ })
324
+
325
+ #endif /* defined(CRLIBM_TYPECPU_POWERPC) && defined(__GCC__) */
326
+
327
+
328
+
329
+
330
+ /* On the Itanium 1 / gcc3.2 we lose 10 cycles when using the FMA !?!
331
+ It probably breaks the scheduling algorithms somehow...
332
+ To test again with higher gcc versions
333
+ */
334
+
335
+ #if defined(CRLIBM_TYPECPU_ITANIUM) && defined(__GNUC__) && !defined(__INTEL_COMPILER) && 0
336
+ #define PROCESSOR_HAS_FMA 1
337
+ #define FMA(a,b,c) /* r = a*b + c*/ \
338
+ ({ \
339
+ double _a, _b,_c,_r; \
340
+ _a=a; _b=b;_c=c; \
341
+ __asm__ ("fma %0 = %1, %2, %3\n ;;\n" \
342
+ : "=f"(_r) \
343
+ : "f"(_a), "f"(_b), "f"(_c) \
344
+ ); \
345
+ _r; \
346
+ })
347
+
348
+
349
+ #define FMS(a,b,c) /* r = a*b - c*/ \
350
+ ({ \
351
+ double _a, _b, _c, _r; \
352
+ _a=a; _b=b;_c=c; \
353
+ __asm__ ("fms %0 = %1, %2, %3\n ;;\n" \
354
+ : "=f"(_r) \
355
+ : "f"(_a), "f"(_b), "f"(_c) \
356
+ ); \
357
+ _r; \
358
+ })
359
+ #endif /* defined(CRLIBM_TYPECPU_ITANIUM) && defined(__GCC__) && !defined(__INTEL_COMPILER) */
360
+
361
+
362
+
363
+
364
+ #if defined(CRLIBM_TYPECPU_ITANIUM) && defined(__INTEL_COMPILER)
365
+ #define PROCESSOR_HAS_FMA 1
366
+ #if 0 /* Commented out because it shouldn't be there: There should be
367
+ a standard #include doing all this, but as of april 2005
368
+ it doesn't exist, say intel people). Leave
369
+ it as documentation, though, until it is replaced by #include
370
+ */
371
+ /* Table 1-17: legal floating-point precision completers (.pc) */
372
+ typedef enum {
373
+ _PC_S = 1 /* single .s */
374
+ ,_PC_D = 2 /* double .d */
375
+ ,_PC_NONE = 3 /* dynamic */
376
+ } _Asm_pc;
377
+
378
+ /* Table 1-22: legal getf/setf floating-point register access completers */
379
+ typedef enum {
380
+ _FR_S = 1 /* single form .s */
381
+ ,_FR_D = 2 /* double form .d */
382
+ ,_FR_EXP = 3 /* exponent form .exp */
383
+ ,_FR_SIG = 4 /* significand form .sig */
384
+ } _Asm_fr_access;
385
+
386
+ /* Table 1-24: legal floating-point FPSR status field completers (.sf) */
387
+ typedef enum {
388
+ _SF0 = 0 /* FPSR status field 0 .s0 */
389
+ ,_SF1 = 1 /* FPSR status field 1 .s1 */
390
+ ,_SF2 = 2 /* FPSR status field 2 .s2 */
391
+ ,_SF3 = 3 /* FPSR status field 3 .s3 */
392
+ } _Asm_sf;
393
+ #endif
394
+
395
+ #define FMA(a,b,c) /* r = a*b + c*/ \
396
+ _Asm_fma( 2/*_PC_D*/, a, b, c, 0/*_SF0*/ );
397
+
398
+
399
+ #define FMS(a,b,c) /* r = a*b - c*/ \
400
+ _Asm_fms( 2/*_PC_D*/, a, b, c, 0/*_SF0*/);
401
+
402
+ #endif /*defined(CRLIBM_TYPECPU_ITANIUM) && defined(__INTEL_COMPILER)*/
403
+
404
+
405
+
406
+
407
+
408
+
409
+
410
+
411
+ #ifdef WORDS_BIGENDIAN
412
+ #define DB_ONE {{0x3ff00000, 0x00000000}}
413
+ #else
414
+ #define DB_ONE {{0x00000000 ,0x3ff00000}}
415
+ #endif
416
+
417
+
418
+
419
+
420
+
421
+
422
+ extern const scs scs_zer, scs_half, scs_one, scs_two, scs_sixinv;
423
+
424
+
425
+ #define SCS_ZERO (scs_ptr)(&scs_zer)
426
+ #define SCS_HALF (scs_ptr)(&scs_half)
427
+ #define SCS_ONE (scs_ptr)(&scs_one)
428
+ #define SCS_TWO (scs_ptr)(&scs_two)
429
+ #define SCS_SIXINV (scs_ptr)(&scs_sixinv)
430
+
431
+
432
+
433
+
434
+
435
+ #define ABS(x) (((x)>0) ? (x) : (-(x)))
436
+
437
+
438
+
439
+
440
+
441
+ /*
442
+ * In the following, when an operator is preceded by a '@' it means that we
443
+ * are considering the IEEE-compliant machine operator, otherwise it
444
+ * is the mathematical operator.
445
+ *
446
+ */
447
+
448
+
449
+ /*
450
+ * computes s and r such that s + r = a + b, with s = a @+ b exactly
451
+ */
452
+ #define Add12Cond(s, r, a, b) \
453
+ {double _z, _a=a, _b=b; \
454
+ s = _a + _b; \
455
+ if (ABS(a) > ABS(b)){ \
456
+ _z = s - _a; \
457
+ r = _b - _z; \
458
+ }else { \
459
+ _z = s - _b; \
460
+ r = _a - _z;}}
461
+
462
+ /*
463
+ * computes s and r such that s + r = a + b, with s = a @+ b exactly
464
+ * under the condition a >= b
465
+ */
466
+ #define Add12(s, r, a, b) \
467
+ {double _z, _a=a, _b=b; \
468
+ s = _a + _b; \
469
+ _z = s - _a; \
470
+ r = _b - _z; }
471
+
472
+
473
+ /*
474
+ * computes r1, r2, r3 such that r1 + r2 + r3 = a + b + c exactly
475
+ */
476
+ #define Fast3Sum(r1, r2, r3, a, b, c) \
477
+ {double u, v, w; \
478
+ Fast2Sum(u, v, b, c); \
479
+ Fast2Sum(r1, w, a, u); \
480
+ Fast2Sum(r2, r3, w, v); }
481
+
482
+
483
+
484
+
485
+
486
+
487
+
488
+ /*
489
+ * Functions to computes double-double addition: zh+zl = xh+xl + yh+yl
490
+ * knowing that xh>yh
491
+ * relative error is smaller than 2^-103
492
+ */
493
+
494
+
495
+ #if ADD22_AS_FUNCTIONS
496
+ extern void Add22(double *zh, double *zl, double xh, double xl, double yh, double yl);
497
+ extern void Add22Cond(double *zh, double *zl, double xh, double xl, double yh, double yl);
498
+
499
+ #else /* ADD22_AS_FUNCTIONS */
500
+
501
+ #define Add22Cond(zh,zl,xh,xl,yh,yl) \
502
+ do { \
503
+ double _r,_s; \
504
+ _r = (xh)+(yh); \
505
+ _s = ((ABS(xh)) > (ABS(yh)))? ((xh)-_r+(yh)+(yl)+(xl)) : ((yh)-_r+(xh)+(xl)+(yl)); \
506
+ *zh = _r+_s; \
507
+ *zl = (_r - (*zh)) + _s; \
508
+ } while(2+2==5)
509
+
510
+
511
+
512
+ #define Add22(zh,zl,xh,xl,yh,yl) \
513
+ do { \
514
+ double _r,_s; \
515
+ _r = (xh)+(yh); \
516
+ _s = ((((xh)-_r) +(yh)) + (yl)) + (xl); \
517
+ *zh = _r+_s; \
518
+ *zl = (_r - (*zh)) + _s; \
519
+ } while(0)
520
+
521
+ #endif /* ADD22_AS_FUNCTIONS */
522
+
523
+
524
+
525
+ #ifdef PROCESSOR_HAS_FMA
526
+ /* One of the nice things with the fused multiply-and-add is that it
527
+ greatly simplifies the double-double multiplications : */
528
+ #define Mul12(rh,rl,u,v) \
529
+ { \
530
+ *rh = u*v; \
531
+ *rl = FMS(u,v, *rh); \
532
+ }
533
+
534
+ #define Mul22(pzh,pzl, xh,xl, yh,yl) \
535
+ { \
536
+ double ph, pl; \
537
+ ph = xh*yh; \
538
+ pl = FMS(xh, yh, ph); \
539
+ pl = FMA(xh,yl, pl); \
540
+ pl = FMA(xl,yh,pl); \
541
+ *pzh = ph+pl; \
542
+ *pzl = ph - (*pzh); \
543
+ *pzl += pl; \
544
+ }
545
+
546
+
547
+ /* besides we don't care anymore about overflows in the mult */
548
+ #define Mul12Cond Mul12
549
+ #define Mul22cond Mul22
550
+
551
+
552
+ #else /* ! PROCESSOR_HAS_FMA */
553
+
554
+
555
+ #if DEKKER_AS_FUNCTIONS
556
+ extern void Mul12(double *rh, double *rl, double u, double v);
557
+ extern void Mul12Cond(double *rh, double *rl, double a, double b);
558
+ extern void Mul22(double *zh, double *zl, double xh, double xl, double yh, double yl);
559
+ #else /* if DEKKER_AS_FUNCTIONS */
560
+ /*
561
+ * computes rh and rl such that rh + rl = a * b with rh = a @* b exactly
562
+ * under the conditions : a < 2^970 et b < 2^970
563
+ */
564
+ #define Mul12(rh,rl,u,v) \
565
+ { \
566
+ const double c = 134217729.; /* 2^27 +1 */ \
567
+ double up, u1, u2, vp, v1, v2; \
568
+ double _u =u, _v=v; \
569
+ \
570
+ up = _u*c; vp = _v*c; \
571
+ u1 = (_u-up)+up; v1 = (_v-vp)+vp; \
572
+ u2 = _u-u1; v2 = _v-v1; \
573
+ \
574
+ *rh = _u*_v; \
575
+ *rl = (((u1*v1-*rh)+(u1*v2))+(u2*v1))+(u2*v2);\
576
+ }
577
+
578
+
579
+ /*
580
+ * Computes rh and rl such that rh + rl = a * b and rh = a @* b exactly
581
+ */
582
+ #define Mul12Cond(rh, rl, a, b) \
583
+ {\
584
+ const double two_em53 = 1.1102230246251565404e-16; /* 0x3CA00000, 0x00000000 */\
585
+ const double two_e53 = 9007199254740992.; /* 0x43400000, 0x00000000 */\
586
+ double u, v; \
587
+ db_number _a=a, _b=b; \
588
+ \
589
+ if (_a.i[HI]>0x7C900000) u = _a*two_em53; \
590
+ else u = _a; \
591
+ if (_b.i[HI]>0x7C900000) v = _b*two_em53; \
592
+ else v = _b; \
593
+ \
594
+ Mul12(rh, rl, u, v); \
595
+ \
596
+ if (_a.i[HI]>0x7C900000) {*rh *= two_e53; *rl *= two_e53;} \
597
+ if (_b.i[HI]>0x7C900000) {*rh *= two_e53; *rl *= two_e53;} \
598
+ }
599
+
600
+
601
+
602
+ /*
603
+ * computes double-double multiplication: zh+zl = (xh+xl) * (yh+yl)
604
+ * relative error is smaller than 2^-102
605
+ */
606
+
607
+
608
+
609
+ #define Mul22(zh,zl,xh,xl,yh,yl) \
610
+ { \
611
+ double mh, ml; \
612
+ \
613
+ const double c = 134217729.; \
614
+ double up, u1, u2, vp, v1, v2; \
615
+ \
616
+ up = (xh)*c; vp = (yh)*c; \
617
+ u1 = ((xh)-up)+up; v1 = ((yh)-vp)+vp; \
618
+ u2 = (xh)-u1; v2 = (yh)-v1; \
619
+ \
620
+ mh = (xh)*(yh); \
621
+ ml = (((u1*v1-mh)+(u1*v2))+(u2*v1))+(u2*v2); \
622
+ \
623
+ ml += (xh)*(yl) + (xl)*(yh); \
624
+ *zh = mh+ml; \
625
+ *zl = mh - (*zh) + ml; \
626
+ }
627
+
628
+
629
+
630
+ #endif /* DEKKER_AS_FUNCTIONS */
631
+
632
+ #endif /* PROCESSOR_HAS_FMA */
633
+
634
+
635
+
636
+ /* In the following the one-line computation of _cl was split so that
637
+ icc(8.1) would compile it properly. It's a bug of icc */
638
+
639
+ #if DEKKER_AS_FUNCTIONS
640
+ extern void Div22(double *z, double *zz, double x, double xx, double y, double yy);
641
+ #else
642
+ #define Div22(pzh,pzl,xh,xl,yh,yl) { \
643
+ double _ch,_cl,_uh,_ul; \
644
+ _ch=(xh)/(yh); Mul12(&_uh,&_ul,_ch,(yh)); \
645
+ _cl=((xh)-_uh); \
646
+ _cl -= _ul; \
647
+ _cl += (xl); \
648
+ _cl -= _ch*(yl); \
649
+ _cl /= (yh); \
650
+ *pzh=_ch+_cl; *pzl=(_ch-(*pzh))+_cl; \
651
+ }
652
+ #endif /* DEKKER_AS_FUNCTIONS */
653
+
654
+
655
+
656
+
657
+
658
+ #endif /*CRLIBM_PRIVATE_H*/