intervals 0.3.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. data/VERSION.txt +1 -0
  2. data/ext/crlibm/AUTHORS +2 -0
  3. data/ext/crlibm/COPYING +504 -0
  4. data/ext/crlibm/ChangeLog +80 -0
  5. data/ext/crlibm/INSTALL +182 -0
  6. data/ext/crlibm/Makefile.am +84 -0
  7. data/ext/crlibm/Makefile.in +530 -0
  8. data/ext/crlibm/NEWS +0 -0
  9. data/ext/crlibm/README +31 -0
  10. data/ext/crlibm/TODO +47 -0
  11. data/ext/crlibm/VERSION +1 -0
  12. data/ext/crlibm/aclocal.m4 +989 -0
  13. data/ext/crlibm/atan-itanium.c +846 -0
  14. data/ext/crlibm/atan-pentium.c +261 -0
  15. data/ext/crlibm/atan_accurate.c +244 -0
  16. data/ext/crlibm/atan_accurate.h +191 -0
  17. data/ext/crlibm/atan_fast.c +324 -0
  18. data/ext/crlibm/atan_fast.h +678 -0
  19. data/ext/crlibm/config.guess +1461 -0
  20. data/ext/crlibm/config.sub +1566 -0
  21. data/ext/crlibm/configure +7517 -0
  22. data/ext/crlibm/configure.ac +364 -0
  23. data/ext/crlibm/crlibm.h +125 -0
  24. data/ext/crlibm/crlibm_config.h +149 -0
  25. data/ext/crlibm/crlibm_config.h.in +148 -0
  26. data/ext/crlibm/crlibm_private.c +293 -0
  27. data/ext/crlibm/crlibm_private.h +658 -0
  28. data/ext/crlibm/csh_fast.c +631 -0
  29. data/ext/crlibm/csh_fast.h +771 -0
  30. data/ext/crlibm/double-extended.h +496 -0
  31. data/ext/crlibm/exp-td.c +962 -0
  32. data/ext/crlibm/exp-td.h +685 -0
  33. data/ext/crlibm/exp_accurate.c +197 -0
  34. data/ext/crlibm/exp_accurate.h +85 -0
  35. data/ext/crlibm/gappa/log-de-E0-logir0.gappa +106 -0
  36. data/ext/crlibm/gappa/log-de-E0.gappa +79 -0
  37. data/ext/crlibm/gappa/log-de.gappa +81 -0
  38. data/ext/crlibm/gappa/log-td-E0-logir0.gappa +126 -0
  39. data/ext/crlibm/gappa/log-td-E0.gappa +143 -0
  40. data/ext/crlibm/gappa/log-td-accurate-E0-logir0.gappa +230 -0
  41. data/ext/crlibm/gappa/log-td-accurate-E0.gappa +213 -0
  42. data/ext/crlibm/gappa/log-td-accurate.gappa +217 -0
  43. data/ext/crlibm/gappa/log-td.gappa +156 -0
  44. data/ext/crlibm/gappa/trigoSinCosCase3.gappa +204 -0
  45. data/ext/crlibm/gappa/trigoTanCase2.gappa +73 -0
  46. data/ext/crlibm/install-sh +269 -0
  47. data/ext/crlibm/log-de.c +431 -0
  48. data/ext/crlibm/log-de.h +732 -0
  49. data/ext/crlibm/log-td.c +852 -0
  50. data/ext/crlibm/log-td.h +819 -0
  51. data/ext/crlibm/log10-td.c +906 -0
  52. data/ext/crlibm/log10-td.h +823 -0
  53. data/ext/crlibm/log2-td.c +935 -0
  54. data/ext/crlibm/log2-td.h +821 -0
  55. data/ext/crlibm/maple/atan.mpl +359 -0
  56. data/ext/crlibm/maple/common-procedures.mpl +997 -0
  57. data/ext/crlibm/maple/csh.mpl +446 -0
  58. data/ext/crlibm/maple/double-extended.mpl +151 -0
  59. data/ext/crlibm/maple/exp-td.mpl +195 -0
  60. data/ext/crlibm/maple/log-de.mpl +243 -0
  61. data/ext/crlibm/maple/log-td.mpl +316 -0
  62. data/ext/crlibm/maple/log10-td.mpl +345 -0
  63. data/ext/crlibm/maple/log2-td.mpl +334 -0
  64. data/ext/crlibm/maple/trigo.mpl +728 -0
  65. data/ext/crlibm/maple/triple-double.mpl +58 -0
  66. data/ext/crlibm/missing +198 -0
  67. data/ext/crlibm/mkinstalldirs +40 -0
  68. data/ext/crlibm/rem_pio2_accurate.c +219 -0
  69. data/ext/crlibm/rem_pio2_accurate.h +53 -0
  70. data/ext/crlibm/scs_lib/AUTHORS +3 -0
  71. data/ext/crlibm/scs_lib/COPYING +504 -0
  72. data/ext/crlibm/scs_lib/ChangeLog +16 -0
  73. data/ext/crlibm/scs_lib/INSTALL +215 -0
  74. data/ext/crlibm/scs_lib/Makefile.am +18 -0
  75. data/ext/crlibm/scs_lib/Makefile.in +328 -0
  76. data/ext/crlibm/scs_lib/NEWS +0 -0
  77. data/ext/crlibm/scs_lib/README +9 -0
  78. data/ext/crlibm/scs_lib/TODO +4 -0
  79. data/ext/crlibm/scs_lib/addition_scs.c +623 -0
  80. data/ext/crlibm/scs_lib/config.guess +1461 -0
  81. data/ext/crlibm/scs_lib/config.sub +1566 -0
  82. data/ext/crlibm/scs_lib/configure +6226 -0
  83. data/ext/crlibm/scs_lib/division_scs.c +110 -0
  84. data/ext/crlibm/scs_lib/double2scs.c +174 -0
  85. data/ext/crlibm/scs_lib/install-sh +269 -0
  86. data/ext/crlibm/scs_lib/missing +198 -0
  87. data/ext/crlibm/scs_lib/mkinstalldirs +40 -0
  88. data/ext/crlibm/scs_lib/multiplication_scs.c +456 -0
  89. data/ext/crlibm/scs_lib/poly_fct.c +112 -0
  90. data/ext/crlibm/scs_lib/print_scs.c +73 -0
  91. data/ext/crlibm/scs_lib/rand_scs.c +63 -0
  92. data/ext/crlibm/scs_lib/scs.h +353 -0
  93. data/ext/crlibm/scs_lib/scs2double.c +391 -0
  94. data/ext/crlibm/scs_lib/scs2mpf.c +58 -0
  95. data/ext/crlibm/scs_lib/scs2mpfr.c +61 -0
  96. data/ext/crlibm/scs_lib/scs_private.c +23 -0
  97. data/ext/crlibm/scs_lib/scs_private.h +133 -0
  98. data/ext/crlibm/scs_lib/tests/tbx_timing.h +102 -0
  99. data/ext/crlibm/scs_lib/wrapper_scs.h +486 -0
  100. data/ext/crlibm/scs_lib/zero_scs.c +52 -0
  101. data/ext/crlibm/stamp-h.in +1 -0
  102. data/ext/crlibm/tests/Makefile.am +43 -0
  103. data/ext/crlibm/tests/Makefile.in +396 -0
  104. data/ext/crlibm/tests/blind_test.c +148 -0
  105. data/ext/crlibm/tests/generate_test_vectors.c +258 -0
  106. data/ext/crlibm/tests/soak_test.c +334 -0
  107. data/ext/crlibm/tests/test_common.c +627 -0
  108. data/ext/crlibm/tests/test_common.h +28 -0
  109. data/ext/crlibm/tests/test_perf.c +570 -0
  110. data/ext/crlibm/tests/test_val.c +249 -0
  111. data/ext/crlibm/trigo_accurate.c +500 -0
  112. data/ext/crlibm/trigo_accurate.h +331 -0
  113. data/ext/crlibm/trigo_fast.c +1219 -0
  114. data/ext/crlibm/trigo_fast.h +639 -0
  115. data/ext/crlibm/triple-double.h +878 -0
  116. data/ext/extconf.rb +31 -0
  117. data/ext/fpu.c +107 -0
  118. data/ext/jamis-mod.rb +591 -0
  119. data/lib/fpu.rb +287 -0
  120. data/lib/interval.rb +1170 -0
  121. data/lib/intervals.rb +212 -0
  122. data/lib/struct_float.rb +133 -0
  123. data/test/data_atan.txt +360 -0
  124. data/test/data_cos.txt +346 -0
  125. data/test/data_cosh.txt +3322 -0
  126. data/test/data_exp.txt +3322 -0
  127. data/test/data_log.txt +141 -0
  128. data/test/data_sin.txt +140 -0
  129. data/test/data_sinh.txt +3322 -0
  130. data/test/data_tan.txt +342 -0
  131. metadata +186 -0
@@ -0,0 +1,658 @@
1
+ /*
2
+ * crlibm_private.h
3
+ *
4
+ * This file contains useful tools and data for the crlibm functions.
5
+ *
6
+ */
7
+
8
+ #ifndef CRLIBM_PRIVATE_H
9
+ #define CRLIBM_PRIVATE_H 1
10
+
11
+ #include "scs_lib/scs.h"
12
+ #include "scs_lib/scs_private.h"
13
+
14
+ #ifdef HAVE_CONFIG_H
15
+ #include "crlibm_config.h"
16
+ #endif
17
+ /* otherwise CMake is used, and defines all the useful variables using -D switch */
18
+
19
+ #ifdef HAVE_INTTYPES_H
20
+ #include <inttypes.h>
21
+ #endif
22
+
23
+
24
+
25
+ #if (defined(CRLIBM_TYPECPU_X86) || defined(CRLIBM_TYPECPU_AMD64))
26
+ # ifndef CRLIBM_TYPEOS_BSD
27
+ # include <fpu_control.h>
28
+ # ifndef _FPU_SETCW
29
+ # define _FPU_SETCW(cw) __asm__ ("fldcw %0" : : "m" (*&cw))
30
+ # endif
31
+ # ifndef _FPU_GETCW
32
+ # define _FPU_GETCW(cw) __asm__ ("fnstcw %0" : "=m" (*&cw))
33
+ # endif
34
+ # endif
35
+ #endif
36
+
37
+ /* 64 bit arithmetic may be standardised, but people still do want they want */
38
+ #ifdef HAVE_INTTYPES_H
39
+ #define ULL(bits) 0x##bits##uLL
40
+ #elif defined(_WIN32)
41
+ /* Windows garbage there */
42
+ typedef long long int int64_t;
43
+ typedef unsigned long long int uint64_t;
44
+ #define ULL(bits) 0x##bits##i64
45
+ /* Default, hoping it works, hopefully less and less relevant */
46
+ #else
47
+ typedef long long int int64_t;
48
+ typedef unsigned long long int uint64_t;
49
+ #define ULL(bits) 0x##bits##uLL
50
+ #endif
51
+
52
+ #ifndef SCS_DEF_INT64
53
+ #define SCS_DEF_INT64
54
+ #ifdef CRLIBM_TYPEOS_HPUX
55
+ #ifndef __LP64__ /* To solve the problem with 64 bits integer on HPPA */
56
+ typedef long long int64_t;
57
+ typedef unsigned long long uint64_t;
58
+ #define ULL(bits) 0x##bits##uLL
59
+ #endif
60
+ #endif
61
+ #endif
62
+
63
+
64
+
65
+
66
+ /* The Add22 and Add22 functions, as well as double-double
67
+ multiplications of the Dekker family may be either defined as
68
+ functions, or as #defines. Which one is better depends on the
69
+ processor/compiler/OS. As #define has to be used with more care (not
70
+ type-safe), the two following variables should be set to 1 in the
71
+ development/debugging phase, until no type warning remains.
72
+
73
+ */
74
+
75
+ #define ADD22_AS_FUNCTIONS 0
76
+ #define DEKKER_AS_FUNCTIONS 0
77
+
78
+
79
+
80
+ /* setting the following variable adds variables and code for
81
+ monitoring the performance.
82
+ Note that sometimes only round to nearest is instrumented */
83
+ #define EVAL_PERF 1
84
+
85
+
86
+ #if EVAL_PERF==1
87
+ /* counter of calls to the second step (accurate step) */
88
+ extern int crlibm_second_step_taken;
89
+ #endif
90
+
91
+
92
+
93
+ /* The prototypes of the second steps */
94
+ extern void exp_SC(scs_ptr res_scs, double x);
95
+ extern double scs_exp_rn(double);
96
+ extern double scs_exp_ru(double);
97
+ extern double scs_exp_rd(double);
98
+
99
+ extern void scs_log(scs_ptr,db_number, int);
100
+
101
+ extern double scs_atan_rn(double);
102
+ extern double scs_atan_rd(double);
103
+ extern double scs_atan_ru(double);
104
+
105
+ extern double scs_sin_rn(double);
106
+ extern double scs_sin_ru(double);
107
+ extern double scs_sin_rd(double);
108
+ extern double scs_sin_rz(double);
109
+ extern double scs_cos_rn(double);
110
+ extern double scs_cos_ru(double);
111
+ extern double scs_cos_rd(double);
112
+ extern double scs_cos_rz(double);
113
+ extern double scs_tan_rn(double);
114
+ extern double scs_tan_rd(double);
115
+ extern double scs_tan_ru(double);
116
+ extern double scs_tan_rz(double);
117
+
118
+ extern int rem_pio2_scs(scs_ptr, scs_ptr);
119
+
120
+ /*
121
+ * i = d in rounding to nearest
122
+ The constant added is 2^52 + 2^51
123
+ */
124
+ #define DOUBLE2INT(_i, _d) \
125
+ {db_number _t; \
126
+ _t.d = (_d+6755399441055744.0); \
127
+ _i = _t.i[LO];}
128
+
129
+
130
+ /* Same idea but beware: works only for |_i| < 2^51 -1 */
131
+ #define DOUBLE2LONGINT(_i, _d) \
132
+ { \
133
+ db_number _t; \
134
+ _t.d = (_d+6755399441055744.0); \
135
+ if (_d >= 0) /* sign extend */ \
136
+ _i = _t.l & ULL(0007FFFFFFFFFFFF); \
137
+ else \
138
+ _i = (_t.l & ULL(0007FFFFFFFFFFFF)) | (ULL(FFF8000000000000)); \
139
+ }
140
+
141
+
142
+
143
+
144
+
145
+ /* Macros for the rounding tests in directed modes */
146
+ /* After Evgeny Gvozdev pointed out a bug in the rounding procedures I
147
+ decided to centralize them here
148
+
149
+ Note that these tests launch the accurate phase when yl=0, in
150
+ particular in the exceptional cases when the image of a double is a
151
+ double. See the chapter about the log for an example
152
+
153
+ */
154
+
155
+
156
+ #define TEST_AND_RETURN_RU(__yh__, __yl__, __eps__) \
157
+ { \
158
+ db_number yh, yl, u53; int yh_neg, yl_neg; \
159
+ yh.d = __yh__; yl.d = __yl__; \
160
+ yh_neg = (yh.i[HI] & 0x80000000); \
161
+ yl_neg = (yl.i[HI] & 0x80000000); \
162
+ yh.l = yh.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
163
+ yl.l = yl.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
164
+ u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
165
+ if(yl.d > __eps__ * u53.d){ \
166
+ if(!yl_neg) { /* The case yl==0 is filtered by the above test*/ \
167
+ /* return next up */ \
168
+ yh.d = __yh__; \
169
+ if(yh_neg) yh.l--; else yh.l++; /* Beware: fails for zero */ \
170
+ return yh.d ; \
171
+ } \
172
+ else return __yh__; \
173
+ } \
174
+ }
175
+
176
+
177
+ #define TEST_AND_RETURN_RD(__yh__, __yl__, __eps__) \
178
+ { \
179
+ db_number yh, yl, u53; int yh_neg, yl_neg; \
180
+ yh.d = __yh__; yl.d = __yl__; \
181
+ yh_neg = (yh.i[HI] & 0x80000000); \
182
+ yl_neg = (yl.i[HI] & 0x80000000); \
183
+ yh.l = yh.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
184
+ yl.l = yl.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
185
+ u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
186
+ if(yl.d > __eps__ * u53.d){ \
187
+ if(yl_neg) { /* The case yl==0 is filtered by the above test*/ \
188
+ /* return next down */ \
189
+ yh.d = __yh__; \
190
+ if(yh_neg) yh.l++; else yh.l--; /* Beware: fails for zero */ \
191
+ return yh.d ; \
192
+ } \
193
+ else return __yh__; \
194
+ } \
195
+ }
196
+
197
+
198
+
199
+ #define TEST_AND_RETURN_RZ(__yh__, __yl__, __eps__) \
200
+ { \
201
+ db_number yh, yl, u53; int yh_neg, yl_neg; \
202
+ yh.d = __yh__; yl.d = __yl__; \
203
+ yh_neg = (yh.i[HI] & 0x80000000); \
204
+ yl_neg = (yl.i[HI] & 0x80000000); \
205
+ yh.l = yh.l & ULL(7fffffffffffffff); /* compute the absolute value*/\
206
+ yl.l = yl.l & ULL(7fffffffffffffff); /* compute the absolute value*/\
207
+ u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
208
+ if(yl.d > __eps__ * u53.d){ \
209
+ if(yl_neg!=yh_neg) { \
210
+ yh.d = __yh__; \
211
+ yh.l--; /* Beware: fails for zero */ \
212
+ return yh.d ; \
213
+ } \
214
+ else return __yh__; \
215
+ } \
216
+ }
217
+
218
+
219
+
220
+ #define TEST_AND_COPY_RU(__cond__, __res__, __yh__, __yl__, __eps__) \
221
+ { \
222
+ db_number yh, yl, u53; int yh_neg, yl_neg; \
223
+ yh.d = __yh__; yl.d = __yl__; \
224
+ yh_neg = (yh.i[HI] & 0x80000000); \
225
+ yl_neg = (yl.i[HI] & 0x80000000); \
226
+ yh.l = yh.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
227
+ yl.l = yl.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
228
+ u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
229
+ __cond__ = 0; \
230
+ if(yl.d > __eps__ * u53.d){ \
231
+ __cond__ = 1; \
232
+ if(!yl_neg) { /* The case yl==0 is filtered by the above test*/ \
233
+ /* return next up */ \
234
+ yh.d = __yh__; \
235
+ if(yh_neg) yh.l--; else yh.l++; /* Beware: fails for zero */ \
236
+ __res__ = yh.d ; \
237
+ } \
238
+ else { \
239
+ __res__ = __yh__; \
240
+ } \
241
+ } \
242
+ }
243
+
244
+ #define TEST_AND_COPY_RD(__cond__, __res__, __yh__, __yl__, __eps__) \
245
+ { \
246
+ db_number yh, yl, u53; int yh_neg, yl_neg; \
247
+ yh.d = __yh__; yl.d = __yl__; \
248
+ yh_neg = (yh.i[HI] & 0x80000000); \
249
+ yl_neg = (yl.i[HI] & 0x80000000); \
250
+ yh.l = yh.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
251
+ yl.l = yl.l & 0x7fffffffffffffffLL; /* compute the absolute value*/ \
252
+ u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
253
+ __cond__ = 0; \
254
+ if(yl.d > __eps__ * u53.d){ \
255
+ __cond__ = 1; \
256
+ if(yl_neg) { /* The case yl==0 is filtered by the above test*/ \
257
+ /* return next down */ \
258
+ yh.d = __yh__; \
259
+ if(yh_neg) yh.l++; else yh.l--; /* Beware: fails for zero */ \
260
+ __res__ = yh.d ; \
261
+ } \
262
+ else { \
263
+ __res__ = __yh__; \
264
+ } \
265
+ } \
266
+ }
267
+
268
+
269
+ #define TEST_AND_COPY_RZ(__cond__, __res__, __yh__, __yl__, __eps__) \
270
+ { \
271
+ db_number yh, yl, u53; int yh_neg, yl_neg; \
272
+ yh.d = __yh__; yl.d = __yl__; \
273
+ yh_neg = (yh.i[HI] & 0x80000000); \
274
+ yl_neg = (yl.i[HI] & 0x80000000); \
275
+ yh.l = yh.l & ULL(7fffffffffffffff); /* compute the absolute value*/\
276
+ yl.l = yl.l & ULL(7fffffffffffffff); /* compute the absolute value*/\
277
+ u53.l = (yh.l & ULL(7ff0000000000000)) + ULL(0010000000000000); \
278
+ __cond__ = 0; \
279
+ if(yl.d > __eps__ * u53.d){ \
280
+ if(yl_neg!=yh_neg) { \
281
+ yh.d = __yh__; \
282
+ yh.l--; /* Beware: fails for zero */ \
283
+ __res__ = yh.d ; \
284
+ __cond__ = 1; \
285
+ } \
286
+ else { \
287
+ __res__ = __yh__; \
288
+ __cond__ = 1; \
289
+ } \
290
+ }
291
+
292
+
293
+
294
+ /* If the processor has a FMA, use it ! **/
295
+
296
+ /* All this probably works only with gcc.
297
+ See Markstein book for the case of HP's compiler */
298
+
299
+ #if defined(CRLIBM_TYPECPU_POWERPC) && defined(__GNUC__)
300
+ #define PROCESSOR_HAS_FMA 1
301
+ #undef PROCESSOR_HAS_FMA
302
+ #define FMA(a,b,c) /* r = a*b + c*/ \
303
+ ({ \
304
+ double _a, _b,_c,_r; \
305
+ _a=a; _b=b;_c=c; \
306
+ __asm__ ("fmadd %0, %1, %2, %3\n ;;\n" \
307
+ : "=f"(_r) \
308
+ : "f"(_a), "f"(_b), "f"(_c) \
309
+ ); \
310
+ _r; \
311
+ })
312
+
313
+
314
+ #define FMS(a,b,c) /* r = a*b - c*/ \
315
+ ({ \
316
+ double _a, _b,_c,_r; \
317
+ _a=a; _b=b;_c=c; \
318
+ __asm__ ("fmsub %0, %1, %2, %3\n ;;\n" \
319
+ : "=f"(_r) \
320
+ : "f"(_a), "f"(_b), "f"(_c) \
321
+ ); \
322
+ _r; \
323
+ })
324
+
325
+ #endif /* defined(CRLIBM_TYPECPU_POWERPC) && defined(__GCC__) */
326
+
327
+
328
+
329
+
330
+ /* On the Itanium 1 / gcc3.2 we lose 10 cycles when using the FMA !?!
331
+ It probably breaks the scheduling algorithms somehow...
332
+ To test again with higher gcc versions
333
+ */
334
+
335
+ #if defined(CRLIBM_TYPECPU_ITANIUM) && defined(__GNUC__) && !defined(__INTEL_COMPILER) && 0
336
+ #define PROCESSOR_HAS_FMA 1
337
+ #define FMA(a,b,c) /* r = a*b + c*/ \
338
+ ({ \
339
+ double _a, _b,_c,_r; \
340
+ _a=a; _b=b;_c=c; \
341
+ __asm__ ("fma %0 = %1, %2, %3\n ;;\n" \
342
+ : "=f"(_r) \
343
+ : "f"(_a), "f"(_b), "f"(_c) \
344
+ ); \
345
+ _r; \
346
+ })
347
+
348
+
349
+ #define FMS(a,b,c) /* r = a*b - c*/ \
350
+ ({ \
351
+ double _a, _b, _c, _r; \
352
+ _a=a; _b=b;_c=c; \
353
+ __asm__ ("fms %0 = %1, %2, %3\n ;;\n" \
354
+ : "=f"(_r) \
355
+ : "f"(_a), "f"(_b), "f"(_c) \
356
+ ); \
357
+ _r; \
358
+ })
359
+ #endif /* defined(CRLIBM_TYPECPU_ITANIUM) && defined(__GCC__) && !defined(__INTEL_COMPILER) */
360
+
361
+
362
+
363
+
364
+ #if defined(CRLIBM_TYPECPU_ITANIUM) && defined(__INTEL_COMPILER)
365
+ #define PROCESSOR_HAS_FMA 1
366
+ #if 0 /* Commented out because it shouldn't be there: There should be
367
+ a standard #include doing all this, but as of april 2005
368
+ it doesn't exist, say intel people). Leave
369
+ it as documentation, though, until it is replaced by #include
370
+ */
371
+ /* Table 1-17: legal floating-point precision completers (.pc) */
372
+ typedef enum {
373
+ _PC_S = 1 /* single .s */
374
+ ,_PC_D = 2 /* double .d */
375
+ ,_PC_NONE = 3 /* dynamic */
376
+ } _Asm_pc;
377
+
378
+ /* Table 1-22: legal getf/setf floating-point register access completers */
379
+ typedef enum {
380
+ _FR_S = 1 /* single form .s */
381
+ ,_FR_D = 2 /* double form .d */
382
+ ,_FR_EXP = 3 /* exponent form .exp */
383
+ ,_FR_SIG = 4 /* significand form .sig */
384
+ } _Asm_fr_access;
385
+
386
+ /* Table 1-24: legal floating-point FPSR status field completers (.sf) */
387
+ typedef enum {
388
+ _SF0 = 0 /* FPSR status field 0 .s0 */
389
+ ,_SF1 = 1 /* FPSR status field 1 .s1 */
390
+ ,_SF2 = 2 /* FPSR status field 2 .s2 */
391
+ ,_SF3 = 3 /* FPSR status field 3 .s3 */
392
+ } _Asm_sf;
393
+ #endif
394
+
395
+ #define FMA(a,b,c) /* r = a*b + c*/ \
396
+ _Asm_fma( 2/*_PC_D*/, a, b, c, 0/*_SF0*/ );
397
+
398
+
399
+ #define FMS(a,b,c) /* r = a*b - c*/ \
400
+ _Asm_fms( 2/*_PC_D*/, a, b, c, 0/*_SF0*/);
401
+
402
+ #endif /*defined(CRLIBM_TYPECPU_ITANIUM) && defined(__INTEL_COMPILER)*/
403
+
404
+
405
+
406
+
407
+
408
+
409
+
410
+
411
+ #ifdef WORDS_BIGENDIAN
412
+ #define DB_ONE {{0x3ff00000, 0x00000000}}
413
+ #else
414
+ #define DB_ONE {{0x00000000 ,0x3ff00000}}
415
+ #endif
416
+
417
+
418
+
419
+
420
+
421
+
422
+ extern const scs scs_zer, scs_half, scs_one, scs_two, scs_sixinv;
423
+
424
+
425
+ #define SCS_ZERO (scs_ptr)(&scs_zer)
426
+ #define SCS_HALF (scs_ptr)(&scs_half)
427
+ #define SCS_ONE (scs_ptr)(&scs_one)
428
+ #define SCS_TWO (scs_ptr)(&scs_two)
429
+ #define SCS_SIXINV (scs_ptr)(&scs_sixinv)
430
+
431
+
432
+
433
+
434
+
435
+ #define ABS(x) (((x)>0) ? (x) : (-(x)))
436
+
437
+
438
+
439
+
440
+
441
+ /*
442
+ * In the following, when an operator is preceded by a '@' it means that we
443
+ * are considering the IEEE-compliant machine operator, otherwise it
444
+ * is the mathematical operator.
445
+ *
446
+ */
447
+
448
+
449
+ /*
450
+ * computes s and r such that s + r = a + b, with s = a @+ b exactly
451
+ */
452
+ #define Add12Cond(s, r, a, b) \
453
+ {double _z, _a=a, _b=b; \
454
+ s = _a + _b; \
455
+ if (ABS(a) > ABS(b)){ \
456
+ _z = s - _a; \
457
+ r = _b - _z; \
458
+ }else { \
459
+ _z = s - _b; \
460
+ r = _a - _z;}}
461
+
462
+ /*
463
+ * computes s and r such that s + r = a + b, with s = a @+ b exactly
464
+ * under the condition a >= b
465
+ */
466
+ #define Add12(s, r, a, b) \
467
+ {double _z, _a=a, _b=b; \
468
+ s = _a + _b; \
469
+ _z = s - _a; \
470
+ r = _b - _z; }
471
+
472
+
473
+ /*
474
+ * computes r1, r2, r3 such that r1 + r2 + r3 = a + b + c exactly
475
+ */
476
+ #define Fast3Sum(r1, r2, r3, a, b, c) \
477
+ {double u, v, w; \
478
+ Fast2Sum(u, v, b, c); \
479
+ Fast2Sum(r1, w, a, u); \
480
+ Fast2Sum(r2, r3, w, v); }
481
+
482
+
483
+
484
+
485
+
486
+
487
+
488
+ /*
489
+ * Functions to computes double-double addition: zh+zl = xh+xl + yh+yl
490
+ * knowing that xh>yh
491
+ * relative error is smaller than 2^-103
492
+ */
493
+
494
+
495
+ #if ADD22_AS_FUNCTIONS
496
+ extern void Add22(double *zh, double *zl, double xh, double xl, double yh, double yl);
497
+ extern void Add22Cond(double *zh, double *zl, double xh, double xl, double yh, double yl);
498
+
499
+ #else /* ADD22_AS_FUNCTIONS */
500
+
501
+ #define Add22Cond(zh,zl,xh,xl,yh,yl) \
502
+ do { \
503
+ double _r,_s; \
504
+ _r = (xh)+(yh); \
505
+ _s = ((ABS(xh)) > (ABS(yh)))? ((xh)-_r+(yh)+(yl)+(xl)) : ((yh)-_r+(xh)+(xl)+(yl)); \
506
+ *zh = _r+_s; \
507
+ *zl = (_r - (*zh)) + _s; \
508
+ } while(2+2==5)
509
+
510
+
511
+
512
+ #define Add22(zh,zl,xh,xl,yh,yl) \
513
+ do { \
514
+ double _r,_s; \
515
+ _r = (xh)+(yh); \
516
+ _s = ((((xh)-_r) +(yh)) + (yl)) + (xl); \
517
+ *zh = _r+_s; \
518
+ *zl = (_r - (*zh)) + _s; \
519
+ } while(0)
520
+
521
+ #endif /* ADD22_AS_FUNCTIONS */
522
+
523
+
524
+
525
+ #ifdef PROCESSOR_HAS_FMA
526
+ /* One of the nice things with the fused multiply-and-add is that it
527
+ greatly simplifies the double-double multiplications : */
528
+ #define Mul12(rh,rl,u,v) \
529
+ { \
530
+ *rh = u*v; \
531
+ *rl = FMS(u,v, *rh); \
532
+ }
533
+
534
+ #define Mul22(pzh,pzl, xh,xl, yh,yl) \
535
+ { \
536
+ double ph, pl; \
537
+ ph = xh*yh; \
538
+ pl = FMS(xh, yh, ph); \
539
+ pl = FMA(xh,yl, pl); \
540
+ pl = FMA(xl,yh,pl); \
541
+ *pzh = ph+pl; \
542
+ *pzl = ph - (*pzh); \
543
+ *pzl += pl; \
544
+ }
545
+
546
+
547
+ /* besides we don't care anymore about overflows in the mult */
548
+ #define Mul12Cond Mul12
549
+ #define Mul22cond Mul22
550
+
551
+
552
+ #else /* ! PROCESSOR_HAS_FMA */
553
+
554
+
555
+ #if DEKKER_AS_FUNCTIONS
556
+ extern void Mul12(double *rh, double *rl, double u, double v);
557
+ extern void Mul12Cond(double *rh, double *rl, double a, double b);
558
+ extern void Mul22(double *zh, double *zl, double xh, double xl, double yh, double yl);
559
+ #else /* if DEKKER_AS_FUNCTIONS */
560
+ /*
561
+ * computes rh and rl such that rh + rl = a * b with rh = a @* b exactly
562
+ * under the conditions : a < 2^970 et b < 2^970
563
+ */
564
+ #define Mul12(rh,rl,u,v) \
565
+ { \
566
+ const double c = 134217729.; /* 2^27 +1 */ \
567
+ double up, u1, u2, vp, v1, v2; \
568
+ double _u =u, _v=v; \
569
+ \
570
+ up = _u*c; vp = _v*c; \
571
+ u1 = (_u-up)+up; v1 = (_v-vp)+vp; \
572
+ u2 = _u-u1; v2 = _v-v1; \
573
+ \
574
+ *rh = _u*_v; \
575
+ *rl = (((u1*v1-*rh)+(u1*v2))+(u2*v1))+(u2*v2);\
576
+ }
577
+
578
+
579
+ /*
580
+ * Computes rh and rl such that rh + rl = a * b and rh = a @* b exactly
581
+ */
582
+ #define Mul12Cond(rh, rl, a, b) \
583
+ {\
584
+ const double two_em53 = 1.1102230246251565404e-16; /* 0x3CA00000, 0x00000000 */\
585
+ const double two_e53 = 9007199254740992.; /* 0x43400000, 0x00000000 */\
586
+ double u, v; \
587
+ db_number _a=a, _b=b; \
588
+ \
589
+ if (_a.i[HI]>0x7C900000) u = _a*two_em53; \
590
+ else u = _a; \
591
+ if (_b.i[HI]>0x7C900000) v = _b*two_em53; \
592
+ else v = _b; \
593
+ \
594
+ Mul12(rh, rl, u, v); \
595
+ \
596
+ if (_a.i[HI]>0x7C900000) {*rh *= two_e53; *rl *= two_e53;} \
597
+ if (_b.i[HI]>0x7C900000) {*rh *= two_e53; *rl *= two_e53;} \
598
+ }
599
+
600
+
601
+
602
+ /*
603
+ * computes double-double multiplication: zh+zl = (xh+xl) * (yh+yl)
604
+ * relative error is smaller than 2^-102
605
+ */
606
+
607
+
608
+
609
+ #define Mul22(zh,zl,xh,xl,yh,yl) \
610
+ { \
611
+ double mh, ml; \
612
+ \
613
+ const double c = 134217729.; \
614
+ double up, u1, u2, vp, v1, v2; \
615
+ \
616
+ up = (xh)*c; vp = (yh)*c; \
617
+ u1 = ((xh)-up)+up; v1 = ((yh)-vp)+vp; \
618
+ u2 = (xh)-u1; v2 = (yh)-v1; \
619
+ \
620
+ mh = (xh)*(yh); \
621
+ ml = (((u1*v1-mh)+(u1*v2))+(u2*v1))+(u2*v2); \
622
+ \
623
+ ml += (xh)*(yl) + (xl)*(yh); \
624
+ *zh = mh+ml; \
625
+ *zl = mh - (*zh) + ml; \
626
+ }
627
+
628
+
629
+
630
+ #endif /* DEKKER_AS_FUNCTIONS */
631
+
632
+ #endif /* PROCESSOR_HAS_FMA */
633
+
634
+
635
+
636
+ /* In the following the one-line computation of _cl was split so that
637
+ icc(8.1) would compile it properly. It's a bug of icc */
638
+
639
+ #if DEKKER_AS_FUNCTIONS
640
+ extern void Div22(double *z, double *zz, double x, double xx, double y, double yy);
641
+ #else
642
+ #define Div22(pzh,pzl,xh,xl,yh,yl) { \
643
+ double _ch,_cl,_uh,_ul; \
644
+ _ch=(xh)/(yh); Mul12(&_uh,&_ul,_ch,(yh)); \
645
+ _cl=((xh)-_uh); \
646
+ _cl -= _ul; \
647
+ _cl += (xl); \
648
+ _cl -= _ch*(yl); \
649
+ _cl /= (yh); \
650
+ *pzh=_ch+_cl; *pzl=(_ch-(*pzh))+_cl; \
651
+ }
652
+ #endif /* DEKKER_AS_FUNCTIONS */
653
+
654
+
655
+
656
+
657
+
658
+ #endif /*CRLIBM_PRIVATE_H*/