pspline 5.0.5 → 5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +5 -5
  3. data/README.md +44 -43
  4. data/Rakefile +6 -6
  5. data/bin/console +14 -14
  6. data/bin/setup +8 -8
  7. data/ext/pspline/basis.cpp +394 -351
  8. data/ext/pspline/example/exbspline.rb +57 -57
  9. data/ext/pspline/example/excspline.rb +57 -57
  10. data/ext/pspline/example/exdspline.rb +55 -55
  11. data/ext/pspline/example/exfspline.rb +44 -44
  12. data/ext/pspline/example/exfspline1.rb +40 -40
  13. data/ext/pspline/example/exfspline2.rb +68 -68
  14. data/ext/pspline/example/exfspline3.rb +64 -64
  15. data/ext/pspline/example/exmspline.rb +68 -68
  16. data/ext/pspline/example/expspline.rb +29 -29
  17. data/ext/pspline/example/expspline1.rb +29 -29
  18. data/ext/pspline/example/expspline2.rb +47 -47
  19. data/ext/pspline/example/exqspline.rb +31 -31
  20. data/ext/pspline/example/exqspline1.rb +31 -31
  21. data/ext/pspline/example/exqspline2.rb +50 -50
  22. data/ext/pspline/example/exqspline3.rb +51 -51
  23. data/ext/pspline/example/exqspline4.rb +35 -35
  24. data/ext/pspline/example/exrspline.rb +34 -34
  25. data/ext/pspline/example/exrspline1.rb +34 -34
  26. data/ext/pspline/example/exrspline2.rb +44 -44
  27. data/ext/pspline/example/exsspline.rb +35 -35
  28. data/ext/pspline/example/exsspline1.rb +35 -35
  29. data/ext/pspline/example/extspline.rb +54 -54
  30. data/ext/pspline/extconf.rb +7 -7
  31. data/ext/pspline/fft.cpp +27 -552
  32. data/ext/pspline/include/basis/basis.h +145 -137
  33. data/ext/pspline/include/basis/fft.h +188 -152
  34. data/ext/pspline/include/basis/fft_complex.h +215 -0
  35. data/ext/pspline/include/basis/fft_real.h +625 -0
  36. data/ext/pspline/include/basis/gabs.h +35 -0
  37. data/ext/pspline/include/basis/marray_class_ext.h +568 -0
  38. data/ext/pspline/include/basis/marray_ext.h +100 -0
  39. data/ext/pspline/include/basis/matrix_luc_ext.h +300 -0
  40. data/ext/pspline/include/basis/matrix_lud_ext.h +298 -0
  41. data/ext/pspline/include/basis/poly.h +454 -0
  42. data/ext/pspline/include/basis/poly_array.h +1030 -1568
  43. data/ext/pspline/include/basis/pspline.h +806 -642
  44. data/ext/pspline/include/basis/real.h +526 -0
  45. data/ext/pspline/include/basis/real_inline.h +442 -0
  46. data/ext/pspline/include/basis/spline.h +83 -0
  47. data/ext/pspline/include/basis/uspline.h +251 -210
  48. data/ext/pspline/include/basis/util.h +122 -656
  49. data/ext/pspline/include/bspline.h +71 -377
  50. data/ext/pspline/include/bspline_Config.h +8 -2
  51. data/ext/pspline/include/real_config.h +3 -0
  52. data/ext/pspline/pspline.cpp +1236 -1038
  53. data/ext/pspline/real.cpp +1607 -0
  54. data/ext/pspline/real_const.cpp +585 -0
  55. data/lib/pspline.rb +71 -71
  56. data/lib/pspline/version.rb +1 -1
  57. data/pspline.gemspec +25 -25
  58. metadata +17 -5
  59. data/ext/pspline/plotsub.cpp +0 -139
  60. data/ext/pspline/util.cpp +0 -483
@@ -0,0 +1,1607 @@
1
+ #include <stddef.h>
2
+ #include <stdlib.h>
3
+ #include <stdio.h>
4
+ #include <assert.h>
5
+ #include <string.h>
6
+ #include <math.h>
7
+ #ifdef USE_RANDOM
8
+ #include <random>
9
+ #endif
10
+ #include "real_config.h"
11
+ #include "bspline_Config.h"
12
+ #include "basis/real.h"
13
+
14
+ namespace rios {
15
+ unsigned int fixed = 0x00000001;
16
+ unsigned int scientific = 0x00000002;
17
+ };
18
+
19
+ #define REAL(p) (p[N])
20
+ #define DOUBLE(e) (e(N))
21
+
22
+ const Real real_inv_fact (size_t n, int i) { return Real(n, real_inv_data[i]); }
23
+ const Real real_cos_table(size_t n, int i) { return Real(n, real_cos_data[i]); }
24
+ const Real real_sin_table(size_t n, int i) { return Real(n, real_sin_data[i]); }
25
+
26
+ template <> Real& Real::operator = (const Real& p)
27
+ {
28
+ if (this != &p) {
29
+ if (_f || w == NULL) {
30
+ delete[] w;
31
+ _size = 1; N = p.N; w = new double[N+1]; _st = _f = 1;
32
+ }
33
+ for (size_t i = 0; i <= N; ++i) w[i] = (i <= p.N) ? p.w[i] : 0.0;
34
+ }
35
+ return *this;
36
+ }
37
+
38
+ template <> Real& Real::operator = (const varray<double>& v)
39
+ {
40
+ if (this != &v) {
41
+ size_t M = v.atom();
42
+ if (_f || w == NULL) {
43
+ delete[] w;
44
+ _size = 1; N = M; w = new double[N+1]; _st = _f = 1;
45
+ }
46
+ double *u = (double*)v;
47
+ for (size_t i = 0; i <= N; ++i) w[i] = (i <= M) ? u[i] : 0.0;
48
+ }
49
+ return *this;
50
+ }
51
+
52
+ template <> Real& Real::operator = (double a)
53
+ {
54
+ if (w == NULL) { _size = 1; N = MAXN; w = new double[N+1]; _st = _f = 1; }
55
+ w[0] = a; for (size_t i = 1; i <= N; ++i) w[i] = 0.0;
56
+ return *this;
57
+ }
58
+
59
+ void qd_add(size_t N, double *s, double *a, double b)
60
+ {
61
+ if (N == 0) { s[0] = a[0] + b; } else
62
+ if (N == 1) { s[0] = four_sum1(a[0], a[1], b, s[1]); } else
63
+ {
64
+ for (size_t i = 0; i <= N; ++i) s[i] = two_sum(a[i], b, b);
65
+ renorm(N, s, b);
66
+ }
67
+ }
68
+
69
+ void qd_minus(size_t N, double *s, double *a)
70
+ {
71
+ for (size_t i = 0; i <= N; ++i) s[i] = -a[i];
72
+ }
73
+
74
+ void qd_sub(size_t N, double *s, double *a, double b)
75
+ {
76
+ qd_add(N, s, a, -b);
77
+ }
78
+
79
+ void qd_add(size_t N, double *x, double *a, double *b)
80
+ {
81
+ if (N == 0) { x[0] = a[0] + b[0]; } else
82
+ if (N == 1) {
83
+ #ifdef IEEE_ADD
84
+ double s, e, t1, t2;
85
+ s = two_sum(a[0], b[0], e);
86
+ t1 = two_sum(a[1], b[1], t2);
87
+ e += t1;
88
+ s = quick_two_sum(s, e, e);
89
+ e += t2;
90
+ x[0] = quick_two_sum(s, e, x[1]);
91
+ #else
92
+ x[0] = four_sum2(a[0], a[1], b[0], b[1], x[1]);
93
+ #endif
94
+ } else {
95
+ #ifdef IEEE_ADD
96
+ double s, t, u, v, w[N+1]; /* double-length accumulator */
97
+ size_t i = 0, j = 0, k = 0;
98
+ if (fabs(a[i]) > fabs(b[j]))
99
+ u = a[i++];
100
+ else u = b[j++];
101
+ if (fabs(a[i]) > fabs(b[j]))
102
+ v = a[i++];
103
+ else v = b[j++];
104
+ u = quick_two_sum(u, v, v);
105
+ while (k <= N) {
106
+ if (i > N && j > N) { w[k++] = u; if (k <= N) w[k++] = v; break; }
107
+ if (i > N) t = b[j++]; else
108
+ if (j > N) t = a[i++]; else
109
+ if (fabs(a[i]) > fabs(b[j]))
110
+ t = a[i++];
111
+ else t = b[j++];
112
+ s = quick_three_accum(u, v, t);
113
+ if (s != 0.0) w[k++] = s;
114
+ } for (; k <= N; ++k) w[k] = 0.0;
115
+ /* add the rest. */
116
+ for (k = i; k <= N; k++) w[N] += a[k];
117
+ for (k = j; k <= N; k++) w[N] += b[k];
118
+ for (k = 0; k <= N; ++k) x[k] = w[k];
119
+ renorm(N, x);
120
+ #else
121
+ double t[N+1], e = 0;
122
+ for (size_t i = 0; i <= N; ++i) x[i] = two_sum(a[i], b[i], t[i]);
123
+ x[1] = two_sum(x[1], t[0], t[0]); // s1 = two_sum(s1, t0, t0);
124
+ for (size_t i = 2; i < N; ++i) {
125
+ three_sum(x[i], t[0], t[i-1]); // three_sum(s2, t0, t1);
126
+ e += t[i-1];
127
+ } three_sum2(x[N], t[0], t[N-1]); // three_sum2(s3, t0, t2);
128
+ e += t[0] + t[N]; // t0 = t0 + t1 + t3;
129
+ /* renormalize */
130
+ renorm(N, x, e);
131
+ #endif
132
+ }
133
+ }
134
+
135
+ void qd_sub(size_t N, double *x, double *a, double *b)
136
+ {
137
+ double c[N+1];
138
+ for (size_t i = 0; i <= N; ++i) c[i] = -b[i];
139
+ qd_add(N, x, a, c);
140
+ }
141
+
142
+ void qd_mul(size_t N, double *s, double *a, double b)
143
+ {
144
+ if (N == 0) { s[0] = a[0] * b; } else
145
+ if (N == 1) { s[0] = four_prod1(a[0], a[1], b, s[1]); } else
146
+ {
147
+ double p[2*N+1], *q = &p[N+1], e = 0;
148
+
149
+ for (size_t i = 0; i < N; ++i)
150
+ p[i] = two_prod(a[i], b, q[i]);
151
+ p[N] = a[N] * b;
152
+
153
+ s[0] = p[0]; // s0 = p0;
154
+ s[1] = two_sum(q[0], p[1], q[0]); // s1 = two_sum(q0, p1, s2);
155
+ for (size_t i = 2; i < N; ++i) {
156
+ three_sum(q[i-2], q[i-1], p[i]); // three_sum(s2, q1, p2);
157
+ s[i] = q[i-2]; e += p[i];
158
+ } three_sum2(q[N-2], q[N-1], p[N]); // three_sum2(q1, q2, p3);
159
+ s[N] = q[N-2]; e += q[N-1]; // s3 = q1;
160
+ // s4 = q2 + p2;
161
+ renorm(N, s, e);
162
+ }
163
+ }
164
+
165
+ /* Real * Real
166
+
167
+ p[0][0] = a0 * b0 p0, q0
168
+ p[1][0] = a0 * b1 p1, q1
169
+ p[1][1] = a1 * b0 p2, q2
170
+ p[2][0] = a0 * b2 p3, q3
171
+ p[2][1] = a1 * b1 p4, q4
172
+ p[2][2] = a2 * b0 p5, q5
173
+ p[3][0] = a0 * b3 p6, q6
174
+ p[3][1] = a1 * b2 p7, q7
175
+ p[3][2] = a2 * b1 p8, q8
176
+ p[3][3] = a3 * b0 p9, q9 */
177
+
178
+ void qd_mul(size_t N, double *x, double *a, double *b)
179
+ {
180
+ if (N == 0) { x[0] = a[0] * b[0]; } else
181
+ if (N == 1) { x[0] = four_prod2(a[0], a[1], b[0], b[1], x[1]); } else
182
+ {
183
+ #ifdef ACCURATE
184
+ const size_t H = N + 1;
185
+ #else
186
+ const size_t H = N;
187
+ #endif
188
+ size_t K = 0, s[H];
189
+ for (size_t i = 0; i < H; ++i) {
190
+ s[i] = i + 1; K += s[i];
191
+ }
192
+ double m[K*2];
193
+ double **p = create_carray_view(m, H, s);
194
+ double **q = create_carray_view(m + K, H, s);
195
+ for (size_t i = 0; i < H; ++i)
196
+ for (size_t j = 0; j <= i; ++j)
197
+ p[i][j] = two_prod(a[j], b[i-j], q[i][j]);
198
+ double &q0 = q[0][0]; // O(eps^0)
199
+ double &q1 = q[1][0]; // O(eps^1)
200
+ double &q2 = q[1][1];
201
+ /* O(eps^0) Accumulation */
202
+ // m[0] = p0;
203
+ // O(eps^1) 3_sum of p1, (q0), (p1, p2)
204
+ three_sum(m[1], m[2], q0);
205
+ // m[1] = p1, m[2] = p2;
206
+ // O(eps^2) 6_sum of p2, (q1, q2), (p3, p4, p5).
207
+ for (size_t i = 2; i < H; ++i) {
208
+ /* compute (s0, s1, s2) = (p2, q1, q2) + (p3, p4, p5). */
209
+ three_sum(m[i], q1, q2); // three_sum(p2, q1, q2);
210
+ for (size_t j = 2; j < i; ++j)
211
+ four_sum3(m[i], q1, q2, q[i-1][j]);
212
+ for (size_t j = 0; j < i+1; ++j)
213
+ four_sum3(m[i], q1, q2, p[i][j]); // three_sum(p3, p4, p5);
214
+ /* H == L ? O(eps^3) : O(eps^4) order terms*/
215
+ m[i+1] = q1 + q0; q0 = q2;
216
+ if (i < H-1) { q1 = q[i][0]; q2 = q[i][1]; }
217
+ }
218
+ // H == L ? O(eps^3) 8_sum of p3, (q3, q4, q5), (p6, p7, p8, p9). m[3] = p3;
219
+ // H != L ? O(eps^4) 9_sum of q0, p4, (q6, q7, q8, q9), (p10, p11, p12)
220
+
221
+ // H == L ? (q3, q4, q5) : (q6, q7, q8, q9)
222
+ for (size_t j = 0; j < H; j++) m[H] += q[H-1][j];
223
+ // H == L ? (p6, p7, p8, p9) : (p10, p11, p12)
224
+ for (size_t i = H-N; i <= N; ++i) m[H] += a[i] * b[H-i];
225
+ #ifdef ACCURATE
226
+ q0 += m[H];
227
+ #endif
228
+ for (size_t i = 0; i <= N; ++i) x[i] = m[i];
229
+ free((void*)q);
230
+ free((void*)p);
231
+ renorm(N, x, q0);
232
+ }
233
+ }
234
+
235
+ void qd_div(size_t N, double *q, double *a, double b)
236
+ {
237
+ if (N == 0) { q[0] = a[0] / b; } else
238
+ if (N == 1) {
239
+ double q1, q2, p1, p2, s, e;
240
+
241
+ q1 = a[0] / b; /* approximate quotient. */
242
+ /* Compute this - q1 * d */
243
+ p1 = two_prod(q1, b, p2);
244
+ s = two_diff(a[0], p1, e);
245
+ e += a[1];
246
+ e -= p2;
247
+ /* get next approximation. */
248
+ q2 = (s + e) / b;
249
+ /* renormalize */
250
+ q[0] = quick_two_sum(q1, q2, q[1]);
251
+ } else {
252
+ /* Strategy: compute approximate quotient using high order
253
+ doubles, and then correct it 3 times using the remainder.
254
+ (Analogous to long division.) */
255
+
256
+ double r[N+1], s[N+1], t[N+1];
257
+ for (size_t i = 0; i <= N; ++i) {
258
+ r[i] = a[i]; t[i] = 0.0;
259
+ }
260
+ for (size_t i = 0; i < N; ++i) {
261
+ q[i] = r[0] / b; /* approximate quotient */
262
+ t[0] = two_prod(q[i], b, t[1]); /* Compute the remainder a - q0 * b */
263
+ qd_sub(N, s, r, t);
264
+ for (size_t j = 0; j <= N; ++j) r[j] = s[j];
265
+ } q[N] = r[0] / b; /* Final correction to the quotient. */
266
+ renorm(N, q);
267
+ }
268
+
269
+ }
270
+
271
+ void qd_div(size_t N, double *q, double *a, double *b)
272
+ {
273
+ if (N == 0) { q[0] = a[0] / b[0]; } else
274
+ if (N == 1) {
275
+ #ifdef ACCURATE
276
+ double q0, q1, s[3], t[2];
277
+
278
+ q0 = a[0]; q1 = a[1];
279
+ for (size_t i = 0; i < 3; ++i) {
280
+ s[i] = q0 / b[0]; /* approximate quotient */
281
+ if (i < 2) {
282
+ // q -= s[i] * b;
283
+ t[0] = four_prod1(b[0], b[1], s[i], t[1]);
284
+ q0 = four_diff2(q0, q1, t[0], t[1], q1);
285
+ }
286
+ }
287
+ q0 = quick_two_sum(s[0], s[1], q1);
288
+ // q += s[2];
289
+ q[0] = four_sum1(q0, q1, s[2], q[1]);
290
+ #else
291
+ double s1, s2, q0, q1, r[2];
292
+
293
+ q0 = a[0] / b[0]; /* approximate quotient */
294
+ /* compute this - q0 * dd */
295
+ // Real r = b * q0;
296
+ r[0] = four_prod1(b[0], b[1], q0, r[1]);
297
+ s1 = two_diff(a[0], r[0], s2);
298
+ s2 -= r[1];
299
+ s2 += a[1];
300
+ /* get next approximation */
301
+ q1 = (s1 + s2) / b[0];
302
+ /* renormalize */
303
+ q[0] = quick_two_sum(q0, q1, q[1]);
304
+ #endif
305
+ } else {
306
+ double r[N+1], s[N+1];
307
+ for (size_t i = 0; i <= N; ++i) r[i] = a[i];
308
+ for (size_t i = 0; i <= N; ++i) {
309
+ q[i] = r[0] / b[0];
310
+ if (i < N) {
311
+ // r -= (b * q[i]);
312
+ qd_mul(N, s, b, q[i]);
313
+ qd_sub(N, s, r, s);
314
+ for (size_t i = 0; i <= N; ++i) r[i] = s[i];
315
+ }
316
+ }
317
+ #ifdef ACCURATE
318
+ // r -= (b * q[N]);
319
+ qd_mul(N, s, b, q[N]);
320
+ qd_sub(N, s, r, s);
321
+ for (size_t i = 0; i <= N; ++i) r[i] = s[i];
322
+ double q4 = r[0] / b[0];
323
+ renorm(N, q, q4);
324
+ #else
325
+ renorm(N, q);
326
+ #endif
327
+ }
328
+ }
329
+
330
+ bool qd_eq(size_t N, double *a, double *b)
331
+ {
332
+ size_t i;
333
+ for (i = 0; i <= N; ++i) if (a[i] != b[i]) break;
334
+ return (i > N);
335
+ }
336
+
337
+ bool qd_lt(size_t N, double *a, double *b)
338
+ {
339
+ size_t i;
340
+ for (i = 0; i <= N; ++i) if (a[i] != b[i]) break;
341
+ return (i <= N) && (a[i] < b[i]);
342
+ }
343
+
344
+ bool qd_gt(size_t N, double *a, double *b)
345
+ {
346
+ size_t i;
347
+ for (i = 0; i <= N; ++i) if (a[i] != b[i]) break;
348
+ return (i <= N) && (a[i] > b[i]);
349
+ }
350
+
351
+ bool qd_le(size_t N, double *a, double *b)
352
+ {
353
+ size_t i;
354
+ for (i = 0; i <= N; ++i) if (a[i] != b[i]) break;
355
+ return (i > N) || (a[i] < b[i]);
356
+ }
357
+
358
+ bool qd_ge(size_t N, double *a, double *b)
359
+ {
360
+ size_t i;
361
+ for (i = 0; i <= N; ++i) if (a[i] != b[i]) break;
362
+ return (i > N) || (a[i] > b[i]);
363
+ }
364
+
365
+ bool qd_eq(size_t N, double *a, double b)
366
+ {
367
+ size_t i = 0; bool f = (a[0] == b);
368
+ if (f) for (i = 1; i <= N; ++i) if (a[i] != 0.0) break;
369
+ return f && (i > N);
370
+ }
371
+
372
+ bool qd_lt(size_t N, double *a, double b)
373
+ {
374
+ size_t i = 0; bool f = (a[0] == b);
375
+ if (f) for (i = 1; i <= N; ++i) if (a[i] != 0.0) break;
376
+ return (a[0] < b || (i <= N && a[i] < 0.0));
377
+ }
378
+
379
+ bool qd_gt(size_t N, double *a, double b)
380
+ {
381
+ size_t i = 0; bool f = (a[0] == b);
382
+ if (f) for (i = 1; i <= N; ++i) if (a[i] != 0.0) break;
383
+ return (a[0] > b || (i <= N && a[i] > 0.0));
384
+ }
385
+
386
+ bool qd_le(size_t N, double *a, double b)
387
+ {
388
+ size_t i; bool f = (a[0] < b);
389
+ if (!f) for (i = 1; i <= N; ++i) if (a[i] != 0) break;
390
+ return (f || ((a[0] == b) && (i > N || a[i] < 0.0)));
391
+ }
392
+
393
+ bool qd_ge(size_t N, double *a, double b)
394
+ {
395
+ size_t i = 0; bool f = (a[0] > b);
396
+ if (!f) for (i = 1; i <= N; ++i) if (a[i] != 0.0) break;
397
+ return (f || ((a[0] == b) && (i > N || a[i] > 0.0)));
398
+ }
399
+
400
+ void qd_abs(size_t N, double *a, double *b)
401
+ {
402
+ if (qd_lt(N, b, 0.0)) qd_minus(N, a, b);
403
+ else
404
+ for (size_t i = 0; i <= N; ++i) a[i] = b[i];
405
+ }
406
+
407
+ Real sqrt(const Real& a)
408
+ {
409
+ /* Strategy:
410
+
411
+ Perform the following Newton iteration:
412
+
413
+ x' = x + (1 - a * x^2) * x / 2;
414
+
415
+ which converges to 1/sqrt(a), starting with the
416
+ double precision approximation to 1/sqrt(a).
417
+ Since Newton's iteration more or less doubles the
418
+ number of correct digits, we only need to perform it
419
+ twice.
420
+ */
421
+ const size_t N = a.atom();
422
+ Real r(N);
423
+
424
+ if (a.is_negative()) throw "(real::sqrt): Negative argument.";
425
+
426
+ if (!a.is_zero()) {
427
+ Real h = mul_pwr2(a, 0.5);
428
+ r = 1.0 / _sqrt(a(0));
429
+ for (size_t i = 0; i < N; ++i)
430
+ r += ((0.5 - h * sqr(r)) * r);
431
+ r *= a;
432
+ }
433
+ return r;
434
+ }
435
+
436
+ /* Computes the n-th root of a */
437
+ Real nroot(const Real& a, int n)
438
+ {
439
+ /* Strategy: Use Newton's iteration to solve
440
+
441
+ 1/(x^n) - a = 0
442
+
443
+ Newton iteration becomes
444
+
445
+ x' = x + x * (1 - a * x^n) / n
446
+
447
+ Since Newton's iteration converges quadratically,
448
+ we only need to perform it twice.
449
+ */
450
+ const size_t N = a.atom();
451
+ Real x(N);
452
+
453
+ if (n <= 0) throw "(real::nroot): N must be positive.";
454
+
455
+ if (n % 2 == 0 && a.is_negative())
456
+ throw "(real::nroot): Negative argument.";
457
+
458
+ if (n == 1) x = a; else
459
+ if (n == 2) x = sqrt(a); else
460
+ if (!a.is_zero()) {
461
+ /* Note a^{-1/n} = exp(-log(a)/n) */
462
+ auto r = abs(a);
463
+ x = _exp(-_log(r(0)) / n);
464
+ /* Perform Newton's iteration. */
465
+ double dbl_n = static_cast<double>(n);
466
+ for (size_t i = 0; i < N; ++i)
467
+ x += x * (1.0 - r * npwr(x, n)) / dbl_n;
468
+ if (a(0) < 0.0) x = -x;
469
+ x = 1.0 / x;
470
+ }
471
+ return x;
472
+ }
473
+
474
+ Real exp(const Real& a)
475
+ {
476
+ const int N = a.atom();
477
+ int n1 = 2 * N + 3, n2 = 2 * n1 - (N + 1) / 2;
478
+
479
+ /* Strategy: We first reduce the size of x by noting that
480
+
481
+ exp(kr + m * log(2)) = 2^m * exp(r)^k
482
+
483
+ where m and k are integers. By choosing m appropriately
484
+ we can make |kr| <= log(2) / 2 = 0.347. Then exp(r) is
485
+ evaluated using the familiar Taylor series. Reducing the
486
+ argument substantially speeds up the convergence. */
487
+
488
+ const double k = ldexp(1.0, n2);
489
+ const double inv_k = 1.0 / k;
490
+ Real result((size_t)N);
491
+
492
+ if (a(0) <= -709.0) result = 0.0; else
493
+
494
+ if (a(0) >= 709.0) result = REAL(_inf); else
495
+
496
+ if (a.is_zero() ) result = 1.0; else
497
+
498
+ if (a.is_one() ) result = REAL(_e); else
499
+
500
+ {
501
+ double m = _floor(a(0) / REAL(_log2)(0) + 0.5);
502
+ Real r = mul_pwr2(a - REAL(_log2) * m, inv_k);
503
+ Real s, p, t;
504
+ double thresh = inv_k * DOUBLE(_eps);
505
+
506
+ p = sqr(r);
507
+ s = r + mul_pwr2(p, 0.5);
508
+ int i = 0;
509
+ do {
510
+ p *= r;
511
+ t = p * real_inv_fact(N, i++);
512
+ s += t;
513
+ } while (fabs(t.to_d()) > thresh && i < n1+2);
514
+
515
+ for (int i = 0; i < n2; ++i)
516
+ s = mul_pwr2(s, 2.0) + sqr(s);
517
+ s += 1.0;
518
+ result = ldexp(s, static_cast<int>(m));
519
+ }
520
+ return result;
521
+ }
522
+
523
+ /* Logarithm. Computes log(x) in quad-double precision.
524
+ This is a natural logarithm (i.e., base e). */
525
+ Real log(const Real& a)
526
+ {
527
+ /* Strategy. The Taylor series for log converges much more
528
+ slowly than that of exp, due to the lack of the factorial
529
+ term in the denominator. Hence this routine instead tries
530
+ to determine the root of the function
531
+
532
+ f(x) = exp(x) - a
533
+
534
+ using Newton iteration. The iteration is given by
535
+
536
+ x' = x - f(x)/f'(x)
537
+ = x - (1 - a * exp(-x))
538
+ = x + a * exp(-x) - 1.
539
+
540
+ Two iteration is needed, since Newton's iteration
541
+ approximately doubles the number of digits per iteration. */
542
+
543
+ const size_t N = a.atom();
544
+ Real x(N);
545
+
546
+ if (a(0) < 0.0) throw "(poly::log): Non-positive argument.";
547
+ if (a(0) == 0.0) x = -REAL(_inf);
548
+ else if (!a.is_one())
549
+ {
550
+ x = _log(a(0)); /* Initial approximation */
551
+
552
+ for (size_t i = 0; i < N; ++i) x = x + a * exp(-x) - 1.0;
553
+ }
554
+ return x;
555
+ }
556
+
557
+ Real log10(const Real& a)
558
+ {
559
+ const size_t N = a.atom();
560
+ return log(a) / REAL(_log10);
561
+ }
562
+
563
+ /* Computes sin(a) and cos(a) using Taylor series.
564
+ Assumes |a| <= pi/2048. */
565
+ static void sincos_taylor(const Real& a, Real& sin_a, Real& cos_a)
566
+ {
567
+ const size_t N = a.atom();
568
+ const double thresh = 0.5 * DOUBLE(_eps) * fabs(a.to_d());
569
+ Real p, s, t, x;
570
+
571
+ if (a.is_zero()) {
572
+ sin_a = 0.0;
573
+ cos_a = 1.0;
574
+ return;
575
+ }
576
+
577
+ x = -sqr(a);
578
+ s = a;
579
+ p = a;
580
+ for (int i = 0; i < n_inv_fact; i += 2) {
581
+ p *= x;
582
+ t = p * real_inv_fact(N, i);
583
+ s += t;
584
+ if (fabs(t.to_d()) <= thresh) break;
585
+ }
586
+
587
+ sin_a = s;
588
+ cos_a = sqrt(1.0 - sqr(s));
589
+ }
590
+
591
+ static Real sin_taylor(const Real& a)
592
+ {
593
+ const size_t N = a.atom();
594
+ const double thresh = 0.5 * DOUBLE(_eps) * fabs(a.to_d());
595
+ Real p, s(N), t, x;
596
+
597
+ if (!a.is_zero())
598
+ {
599
+ x = -sqr(a);
600
+ s = a;
601
+ p = a;
602
+ for (int i = 0; i < n_inv_fact; i += 2) {
603
+ p *= x;
604
+ t = p * real_inv_fact(N, i);
605
+ s += t;
606
+ if (fabs(t.to_d()) <= thresh) break;
607
+ }
608
+ }
609
+ return s;
610
+ }
611
+
612
+ static Real cos_taylor(const Real& a)
613
+ {
614
+ const size_t N = a.atom();
615
+ const double thresh = 0.5 * DOUBLE(_eps);
616
+ Real p, s(N), t, x;
617
+
618
+ if (a.is_zero())
619
+ {
620
+ x = -sqr(a);
621
+ s = 1.0 + mul_pwr2(x, 0.5);
622
+ p = x;
623
+ for (int i = 1; i < n_inv_fact; i += 2) {
624
+ p *= x;
625
+ t = p * real_inv_fact(N, i);
626
+ s += t;
627
+ if (fabs(t.to_d()) <= thresh) break;
628
+ }
629
+ }
630
+ return s;
631
+ }
632
+
633
+ Real sin(const Real& a)
634
+ {
635
+ /* Strategy. To compute sin(x), we choose integers a, b so that
636
+
637
+ x = s + a * (pi/2) + b * (pi/1024)
638
+
639
+ and |s| <= pi/2048. Using a precomputed table of
640
+ sin(k pi / 1024) and cos(k pi / 1024), we can compute
641
+ sin(x) from sin(s) and cos(s). This greatly increases the
642
+ convergence of the sine Taylor series. */
643
+
644
+ const size_t N = a.atom();
645
+ Real r(N);
646
+ if (!a.is_zero())
647
+ {
648
+ // approximately reduce modulo 2*pi
649
+ Real z = nint(a / REAL(_2pi));
650
+ Real v = a - REAL(_2pi) * z;
651
+
652
+ // approximately reduce modulo pi/2 and then modulo pi/1024
653
+ double q = _floor(v(0) / REAL(_pi2)(0) + 0.5);
654
+ Real t = v - REAL(_pi2) * q;
655
+ int j = static_cast<int>(q);
656
+ q = _floor(t(0) / _pi1024[N](0) + 0.5);
657
+ t -= _pi1024[N] * q;
658
+ int k = static_cast<int>(q);
659
+ int abs_k = abs(k);
660
+
661
+ if (j < -2 || j > 2) throw "(poly::sin): Cannot reduce modulo pi/2.";
662
+
663
+ if (abs_k > 256) throw "(poly::sin): Cannot reduce modulo pi/1024.";
664
+
665
+ if (k == 0) switch (j) {
666
+ case 0: r = sin_taylor(t); break;
667
+ case 1: r = cos_taylor(t); break;
668
+ case -1: r = -cos_taylor(t); break;
669
+ default: r = -sin_taylor(t);
670
+ } else {
671
+ Real sin_t, cos_t;
672
+ Real u = real_cos_table(N, abs_k-1);
673
+ Real v = real_sin_table(N, abs_k-1);
674
+ sincos_taylor(t, sin_t, cos_t);
675
+
676
+ if (j == 0) {
677
+ if (k > 0) {
678
+ r = u * sin_t + v * cos_t;
679
+ } else {
680
+ r = u * sin_t - v * cos_t;
681
+ }
682
+ } else if (j == 1) {
683
+ if (k > 0) {
684
+ r = u * cos_t - v * sin_t;
685
+ } else {
686
+ r = u * cos_t + v * sin_t;
687
+ }
688
+ } else if (j == -1) {
689
+ if (k > 0) {
690
+ r = v * sin_t - u * cos_t;
691
+ } else {
692
+ r = - u * cos_t - v * sin_t;
693
+ }
694
+ } else {
695
+ if (k > 0) {
696
+ r = - u * sin_t - v * cos_t;
697
+ } else {
698
+ r = v * cos_t - u * sin_t;
699
+ }
700
+ }
701
+ }
702
+ }
703
+ return r;
704
+ }
705
+
706
+ Real cos(const Real& a)
707
+ {
708
+ const size_t N = a.atom();
709
+ Real r(1.0, N);
710
+
711
+ if (!a.is_zero())
712
+ {
713
+ // approximately reduce modulo 2*pi
714
+ Real z = nint(a / REAL(_2pi));
715
+ Real v = a - REAL(_2pi) * z;
716
+
717
+ // approximately reduce modulo pi/2 and then modulo pi/1024
718
+ double q = _floor(v(0) / REAL(_pi2)(0) + 0.5);
719
+ Real t = v - REAL(_pi2) * q;
720
+ int j = static_cast<int>(q);
721
+ q = _floor(t(0) / _pi1024[N](0) + 0.5);
722
+ t -= _pi1024[N] * q;
723
+ int k = static_cast<int>(q);
724
+ int abs_k = abs(k);
725
+
726
+ if (j < -2 || j > 2) throw "(poly::cos): Cannot reduce modulo pi/2.";
727
+
728
+ if (abs_k > 256) throw "(poly::cos): Cannot reduce modulo pi/1024.";
729
+
730
+ if (k == 0) switch (j) {
731
+ case 0: r = cos_taylor(t); break;
732
+ case 1: r = -sin_taylor(t); break;
733
+ case -1: r = sin_taylor(t); break;
734
+ default: r = -cos_taylor(t);
735
+ } else {
736
+
737
+ Real sin_t, cos_t;
738
+ sincos_taylor(t, sin_t, cos_t);
739
+
740
+ Real u = real_cos_table(N, abs_k-1);
741
+ Real v = real_sin_table(N, abs_k-1);
742
+
743
+ if (j == 0) {
744
+ if (k > 0) {
745
+ r = u * cos_t - v * sin_t;
746
+ } else {
747
+ r = u * cos_t + v * sin_t;
748
+ }
749
+ } else if (j == 1) {
750
+ if (k > 0) {
751
+ r = - u * sin_t - v * cos_t;
752
+ } else {
753
+ r = v * cos_t - u * sin_t;
754
+ }
755
+ } else if (j == -1) {
756
+ if (k > 0) {
757
+ r = u * sin_t + v * cos_t;
758
+ } else {
759
+ r = u * sin_t - v * cos_t;
760
+ }
761
+ } else {
762
+ if (k > 0) {
763
+ r = v * sin_t - u * cos_t;
764
+ } else {
765
+ r = - u * cos_t - v * sin_t;
766
+ }
767
+ }
768
+ }
769
+ }
770
+ return r;
771
+ }
772
+
773
+ void sincos(const Real& a, Real& sin_a, Real& cos_a)
774
+ {
775
+ const size_t N = a.atom();
776
+ if (a.is_zero()) {
777
+ sin_a = 0.0;
778
+ cos_a = 1.0;
779
+ return;
780
+ }
781
+
782
+ // approximately reduce by 2*pi
783
+ Real z = nint(a / REAL(_2pi));
784
+ Real t = a - REAL(_2pi) * z;
785
+
786
+ // approximately reduce by pi/2 and then by pi/1024.
787
+ double q = _floor(t(0) / REAL(_pi2)(0) + 0.5);
788
+ t -= REAL(_pi2) * q;
789
+ int j = static_cast<int>(q);
790
+ q = _floor(t(0) / _pi1024[N](0) + 0.5);
791
+ t -= _pi1024[N] * q;
792
+ int k = static_cast<int>(q);
793
+ int abs_k = abs(k);
794
+
795
+ if (j < -2 || j > 2) throw "(poly::sincos): Cannot reduce modulo pi/2.";
796
+
797
+ if (abs_k > 256) throw "(poly::sincos): Cannot reduce modulo pi/1024.";
798
+
799
+ Real sin_t, cos_t;
800
+ sincos_taylor(t, sin_t, cos_t);
801
+
802
+ if (k == 0) {
803
+ if (j == 0) {
804
+ sin_a = sin_t;
805
+ cos_a = cos_t;
806
+ } else if (j == 1) {
807
+ sin_a = cos_t;
808
+ cos_a = -sin_t;
809
+ } else if (j == -1) {
810
+ sin_a = -cos_t;
811
+ cos_a = sin_t;
812
+ } else {
813
+ sin_a = -sin_t;
814
+ cos_a = -cos_t;
815
+ }
816
+ return;
817
+ }
818
+
819
+ Real u = real_cos_table(N, abs_k-1);
820
+ Real v = real_sin_table(N, abs_k-1);
821
+
822
+ if (j == 0) {
823
+ if (k > 0) {
824
+ sin_a = u * sin_t + v * cos_t;
825
+ cos_a = u * cos_t - v * sin_t;
826
+ } else {
827
+ sin_a = u * sin_t - v * cos_t;
828
+ cos_a = u * cos_t + v * sin_t;
829
+ }
830
+ } else if (j == 1) {
831
+ if (k > 0) {
832
+ cos_a = - u * sin_t - v * cos_t;
833
+ sin_a = u * cos_t - v * sin_t;
834
+ } else {
835
+ cos_a = v * cos_t - u * sin_t;
836
+ sin_a = u * cos_t + v * sin_t;
837
+ }
838
+ } else if (j == -1) {
839
+ if (k > 0) {
840
+ cos_a = u * sin_t + v * cos_t;
841
+ sin_a = v * sin_t - u * cos_t;
842
+ } else {
843
+ cos_a = u * sin_t - v * cos_t;
844
+ sin_a = - u * cos_t - v * sin_t;
845
+ }
846
+ } else {
847
+ if (k > 0) {
848
+ sin_a = - u * sin_t - v * cos_t;
849
+ cos_a = v * sin_t - u * cos_t;
850
+ } else {
851
+ sin_a = v * cos_t - u * sin_t;
852
+ cos_a = - u * cos_t - v * sin_t;
853
+ }
854
+ }
855
+ }
856
+
857
+ Real atan(const Real& a)
858
+ {
859
+ const size_t N = a.atom();
860
+ return atan2(a, Real(1.0, N));
861
+ }
862
+
863
+ Real atan2(const Real &y, const Real &x)
864
+ {
865
+ /* Strategy: Instead of using Taylor series to compute
866
+ arctan, we instead use Newton's iteration to solve
867
+ the equation
868
+
869
+ sin(z) = y/r or cos(z) = x/r
870
+
871
+ where r = sqrt(x^2 + y^2).
872
+ The iteration is given by
873
+
874
+ z' = z + (y - sin(z)) / cos(z) (for equation 1)
875
+ z' = z - (x - cos(z)) / sin(z) (for equation 2)
876
+
877
+ Here, x and y are normalized so that x^2 + y^2 = 1.
878
+ If |x| > |y|, then first iteration is used since the
879
+ denominator is larger. Otherwise, the second is used.
880
+ */
881
+ const size_t N = y.atom();
882
+ #ifndef NDEBUG
883
+ const size_t M = x.atom();
884
+ #endif
885
+ assert(N == M);
886
+ if (x.is_zero()) {
887
+ if (y.is_zero()) /* Both x and y is zero. */
888
+ throw "(poly::atan2): Both arguments zero.";
889
+ return (y.is_positive()) ? REAL(_pi2) : -REAL(_pi2);
890
+ } else if (y.is_zero()) {
891
+ return (x.is_positive()) ? Real(0.0) : REAL(_pi);
892
+ }
893
+
894
+ if (x == y) return (y.is_positive()) ? REAL(_pi4) : -REAL(_3pi4);
895
+
896
+ if (x == -y) return (y.is_positive()) ? REAL(_3pi4) : -REAL(_pi4);
897
+
898
+ Real r = sqrt(sqr(x) + sqr(y));
899
+ Real xx = x / r;
900
+ Real yy = y / r;
901
+
902
+ /* Compute double precision approximation to atan. */
903
+ Real z(_atan2(y.to_d(), x.to_d()), N);
904
+ Real sin_z, cos_z;
905
+
906
+ if (fabs(xx(0)) > fabs(yy(0))) {
907
+ /* Use Newton iteration 1. z' = z + (y - sin(z)) / cos(z) */
908
+ for (size_t i = 0; i < N; ++i) {
909
+ sincos(z, sin_z, cos_z);
910
+ z += (yy - sin_z) / cos_z;
911
+ }
912
+ } else {
913
+ /* Use Newton iteration 2. z' = z - (x - cos(z)) / sin(z) */
914
+ for (size_t i = 0; i < N; ++i) {
915
+ sincos(z, sin_z, cos_z);
916
+ z -= (xx - cos_z) / sin_z;
917
+ }
918
+ }
919
+ return z;
920
+ }
921
+
922
+ Real drem(const Real& a, const Real& b)
923
+ {
924
+ Real n = nint(a/b);
925
+ return (a - n * b);
926
+ }
927
+
928
+ Real divrem(const Real &a, const Real &b, Real &r)
929
+ {
930
+ Real n = nint(a/b);
931
+ r = a - n * b;
932
+ return n;
933
+ }
934
+
935
+ Real tan(const Real &a)
936
+ {
937
+ Real s, c;
938
+ sincos(a, s, c);
939
+ return s/c;
940
+ }
941
+
942
+ Real asin(const Real &a)
943
+ {
944
+ const size_t N = a.atom();
945
+ Real abs_a = abs(a);
946
+
947
+ if (abs_a > 1.0) throw "(poly::asin): Argument out of domain.";
948
+ if (abs_a.is_one())
949
+ return (a.is_positive()) ? REAL(_pi2) : -REAL(_pi2);
950
+ return atan2(a, sqrt(1.0 - sqr(a)));
951
+ }
952
+
953
+ Real acos(const Real &a)
954
+ {
955
+ const size_t N = a.atom();
956
+ Real abs_a = abs(a);
957
+
958
+ if (abs_a > 1.0) throw "(poly::acos): Argument out of domain.";
959
+ if (abs_a.is_one())
960
+ return (a.is_positive()) ? Real(0.0) : REAL(_pi);
961
+ return atan2(sqrt(1.0 - sqr(a)), a);
962
+ }
963
+
964
+ Real sinh(const Real &a)
965
+ {
966
+ const size_t N = a.atom();
967
+ Real z(N);
968
+
969
+ if (!a.is_zero())
970
+ {
971
+ if (abs(a) > 0.05) {
972
+ Real ea = exp(a);
973
+ return mul_pwr2(ea - inv(ea), 0.5);
974
+ }
975
+
976
+ /* Since a is small, using the above formula gives
977
+ a lot of cancellation. So use Taylor series. */
978
+ Real s = a;
979
+ Real t = a;
980
+ Real r = sqr(t);
981
+ double m = 1.0;
982
+ double thresh = fabs(a.to_d() * DOUBLE(_eps));
983
+
984
+ do {
985
+ m += 2.0;
986
+ t *= r;
987
+ t /= (m-1) * m;
988
+ s += t;
989
+ } while (abs(t) > thresh);
990
+ z = s;
991
+ }
992
+ return z;
993
+ }
994
+
995
+ Real cosh(const Real &a)
996
+ {
997
+ const size_t N = a.atom();
998
+ Real z(1.0, N);
999
+
1000
+ if (!a.is_zero())
1001
+ {
1002
+ Real ea = exp(a);
1003
+ z = mul_pwr2(ea + inv(ea), 0.5);
1004
+ }
1005
+ return z;
1006
+ }
1007
+
1008
+ Real tanh(const Real &a)
1009
+ {
1010
+ const size_t N = a.atom();
1011
+ Real z(N);
1012
+
1013
+ if (!a.is_zero())
1014
+ {
1015
+ if (fabs(a.to_d()) > 0.05) {
1016
+ Real ea = exp(a);
1017
+ Real inv_ea = inv(ea);
1018
+ z = (ea - inv_ea) / (ea + inv_ea);
1019
+ } else {
1020
+ Real s, c;
1021
+ s = sinh(a);
1022
+ c = sqrt(1.0 + sqr(s));
1023
+ z = s / c;
1024
+ }
1025
+ }
1026
+ return z;
1027
+ }
1028
+
1029
+ void sincosh(const Real &a, Real &s, Real &c)
1030
+ {
1031
+ if (fabs(a.to_d()) <= 0.05) {
1032
+ s = sinh(a);
1033
+ c = sqrt(1.0 + sqr(s));
1034
+ } else {
1035
+ Real ea = exp(a);
1036
+ Real inv_ea = inv(ea);
1037
+ s = mul_pwr2(ea - inv_ea, 0.5);
1038
+ c = mul_pwr2(ea + inv_ea, 0.5);
1039
+ }
1040
+ }
1041
+
1042
+ Real asinh(const Real &a)
1043
+ {
1044
+ return log(a + sqrt(sqr(a) + 1.0));
1045
+ }
1046
+
1047
+ Real acosh(const Real &a)
1048
+ {
1049
+ if (a < 1.0) throw "(poly::acosh): Argument out of domain.";
1050
+ return log(a + sqrt(sqr(a) - 1.0));
1051
+ }
1052
+
1053
+ Real atanh(const Real &a)
1054
+ {
1055
+ if (abs(a) >= 1.0) throw "(poly::atanh): Argument out of domain.";
1056
+ return mul_pwr2(log((1.0 + a) / (1.0 - a)), 0.5);
1057
+ }
1058
+
1059
+ Real fmod(const Real &a, const Real &b)
1060
+ {
1061
+ Real n = aint(a / b);
1062
+ return (a - b * n);
1063
+ }
1064
+
1065
+ /* polyeval(c, n, x)
1066
+ Evaluates the given n-th degree polynomial at x.
1067
+ The polynomial is given by the array of (n+1) coefficients.
1068
+ */
1069
+ Real polyeval(const Real *c, int n, const Real &x)
1070
+ {
1071
+ /* Just use Horner's method of polynomial evaluation. */
1072
+ Real r = c[n];
1073
+
1074
+ for (int i = n-1; i >= 0; i--) {
1075
+ r *= x;
1076
+ r += c[i];
1077
+ }
1078
+ return r;
1079
+ }
1080
+
1081
+ /* polyroot(c, n, x0)
1082
+ Given an n-th degree polynomial, finds a root close to
1083
+ the given guess x0. Note that this uses simple Newton
1084
+ iteration scheme, and does not work for multiple roots.
1085
+ */
1086
+ Real polyroot(const Real *c, int n, const Real& x0, int max_iter, double thresh)
1087
+ {
1088
+ const size_t N = x0.atom();
1089
+ Real x = x0;
1090
+ Real f;
1091
+ Real *d = new Real[n];
1092
+ bool conv = false;
1093
+ int i;
1094
+ double max_c = fabs(c[0].to_d());
1095
+ double v;
1096
+
1097
+ if (thresh == 0.0) thresh = DOUBLE(_eps);
1098
+
1099
+ /* Compute the coefficients of the derivatives. */
1100
+ for (i = 1; i <= n; i++) {
1101
+ v = fabs(c[i].to_d());
1102
+ if (v > max_c) max_c = v;
1103
+ d[i-1] = c[i] * static_cast<double>(i);
1104
+ }
1105
+ thresh *= max_c;
1106
+
1107
+ /* Newton iteration. */
1108
+ for (i = 0; i < max_iter; i++) {
1109
+ f = polyeval(c, n, x);
1110
+
1111
+ if (abs(f) < thresh) {
1112
+ conv = true;
1113
+ break;
1114
+ }
1115
+ x -= (f / polyeval(d, n-1, x));
1116
+ }
1117
+ delete [] d;
1118
+
1119
+ if (!conv) {
1120
+ const char *msg = "(poly::polyroot): Failed to converge.";
1121
+ fprintf(stderr, "ERROR %s\n", msg);
1122
+ x = REAL(_nan);
1123
+ }
1124
+ return x;
1125
+ }
1126
+
1127
+ /* Read a quad-double from s. */
1128
+ template <> int Real::read(const char *s, Real& qd)
1129
+ {
1130
+ const size_t N = qd.N;
1131
+ const char *p = s;
1132
+ char ch;
1133
+ int sign = 0;
1134
+ int point = -1; /* location of decimal point */
1135
+ int nd = 0; /* number of digits read */
1136
+ int e = 0; /* exponent. */
1137
+ bool done = false;
1138
+ Real r(N); /* number being read */
1139
+
1140
+ /* Skip any leading spaces */
1141
+ while (*p == ' ') p++;
1142
+
1143
+ while (!done && (ch = *p) != '\0') {
1144
+ if (ch >= '0' && ch <= '9') {
1145
+ /* It's a digit */
1146
+ int d = ch - '0';
1147
+ r *= 10.0;
1148
+ r += static_cast<double>(d);
1149
+ nd++;
1150
+ } else {
1151
+ /* Non-digit */
1152
+ switch (ch) {
1153
+ case '.':
1154
+ if (point >= 0) return -1; /* we've already encountered a decimal point. */
1155
+ point = nd;
1156
+ break;
1157
+ case '-':
1158
+ case '+':
1159
+ if (sign != 0 || nd > 0) return -1; /* we've already encountered a sign, or if its not at first position. */
1160
+ sign = (ch == '-') ? -1 : 1;
1161
+ break;
1162
+ case 'E':
1163
+ case 'e':
1164
+ int nread;
1165
+ nread = sscanf(p+1, "%d", &e);
1166
+ done = true;
1167
+ if (nread != 1) return -1; /* read of exponent failed. */
1168
+ break;
1169
+ case ' ':
1170
+ done = true;
1171
+ break;
1172
+ default:
1173
+ return -1;
1174
+ }
1175
+ }
1176
+ p++;
1177
+ }
1178
+
1179
+ /* Adjust exponent to account for decimal point */
1180
+ if (point >= 0) { e -= (nd - point); }
1181
+
1182
+ /* Multiply the the exponent */
1183
+ while (e != 0) if (e > 0) { r *= 10.0; e--; } else { r /= 10.0; e++; }
1184
+
1185
+ qd = (sign < 0) ? -r : r;
1186
+ return 0;
1187
+ }
1188
+
1189
+ template <> Real& Real::operator = (const char *s)
1190
+ {
1191
+ if (w == NULL) { _size = 1; N = MAXN; w = new double[N+1]; _st = _f = 1; }
1192
+ if (read(s, *this)) throw "(poly::operator=): INPUT ERROR.";
1193
+ return *this;
1194
+ }
1195
+
1196
+ static char *to_str(const Real& val, char *str, int& exp, size_t& len)
1197
+ {
1198
+ size_t A = val.atom();
1199
+ static const Real t3(1.0e100, A);
1200
+ static const Real t2(1.0e10, A);
1201
+ static const Real t1(10.0, A);
1202
+ static const Real t0(0.0, A);
1203
+ static const Real s1(1.0, A);
1204
+ static const Real s2(1.0e-10, A);
1205
+ static const Real s3(1.0e-100,A);
1206
+ int i, sign = sgn(val), n = _ndigits(A) + 2;
1207
+ Real r = pow(t1, 1 - n);
1208
+ Real m = abs(val);
1209
+ char *b = str, *c = NULL;
1210
+ len = 0; exp = 0;
1211
+ if (m > s1) {
1212
+ while (m > t3) { m /= t3; exp += 100; }
1213
+ while (m > t2) { m /= t2; exp += 10; }
1214
+ while (m > t1) { m /= t1; exp++; }
1215
+ } else if (m > t0) {
1216
+ while (m < s3) { m *= t3; exp -= 100; }
1217
+ while (m < s2) { m *= t2; exp -= 10; }
1218
+ while (m < s1) { m *= t1; exp--; }
1219
+ }
1220
+ *b++ = '0'; *b++ = '0';
1221
+ for (i = 0; i < n; ++i) {
1222
+ Real t = floor(m); m -= t; m *= t1;
1223
+ *b++ = t.to_i() + '0';
1224
+ r *= t1; if (i < n-1 && m < r) break;
1225
+ }
1226
+ char d = '9';
1227
+ if (i == n) d = '4';
1228
+ if (*(b-1) > d) c = (--b) - 1;
1229
+ while (c) {
1230
+ (*c)++;
1231
+ if (*c > '9') { c--; b--; }
1232
+ else c = NULL;
1233
+ } *b = '\0';
1234
+ c = str; if (*c == '0') c++;
1235
+ if (*c == '0') c++; else exp++;
1236
+ if (sign < 0) *--c = '-';
1237
+ len = b - c;
1238
+ return c;
1239
+ }
1240
+
1241
+ static void append_expn(char *str, int expn)
1242
+ {
1243
+ int k;
1244
+ *str++ = (expn < 0 ? '-' : '+');
1245
+ expn = abs(expn);
1246
+ if (expn >= 100) {
1247
+ k = (expn / 100);
1248
+ *str++ = '0' + k;
1249
+ expn -= 100 * k;
1250
+ }
1251
+ k = (expn / 10);
1252
+ *str++ = '0' + k;
1253
+ expn -= 10 * k;
1254
+ *str++ = '0' + expn;
1255
+ *str = '\0';
1256
+ }
1257
+
1258
+ template <> char *Real::to_string(int precision, int width,
1259
+ rios::fmtflags fmt, bool showpos, bool uppercase, char fill) const
1260
+ {
1261
+ if (isnan(w[0])) return const_cast<char*>("nan");
1262
+ if (isinf(w[0])) return const_cast<char*>("inf");
1263
+ size_t str_size, blank, buf_size = _ndigits(this->atom()) + 5;
1264
+ char *buf = new char[buf_size];
1265
+ int exp, exponent;
1266
+ char *s = to_str(*this, buf, exponent, str_size);
1267
+ bool sign = (*s == '-');
1268
+
1269
+ bool fixed = (fmt & rios::fixed) != 0;
1270
+ str_size = 2;
1271
+ if (sign || showpos) str_size++;
1272
+ str_size += precision; if (precision > 0) str_size++;;
1273
+ if (fixed) {
1274
+ if (exponent > 0) str_size += exponent;
1275
+ buf_size = str_size;
1276
+ } else {
1277
+ buf_size = str_size + 5;
1278
+ }
1279
+ blank = width + 1;
1280
+ if (buf_size < blank) {
1281
+ blank -= buf_size;
1282
+ buf_size = width + 1;
1283
+ } else blank = 0;
1284
+ char *str = new char[buf_size+1];
1285
+ char *b = str; bool dot = true;
1286
+ while (blank > 0) { *b++ = fill; blank--; }
1287
+ if (sign) { *b++ = *s++; str_size--; }
1288
+ else if (showpos) { *b++ = '+'; str_size--; }
1289
+ char *c = b; exp = fixed ? exponent : 0;
1290
+ for (size_t i = 0; i < str_size; ++i) {
1291
+ if (exp < 0) {
1292
+ *b++ = '0'; ++exp;
1293
+ if (i == 0) { *b++ = '.'; i++; dot = false; }
1294
+ } else if (exp > 0) {
1295
+ if (*s) *b++ = *s++; else *b++ = '0';
1296
+ dot = (--exp == 0);
1297
+ } else {
1298
+ if (*s) *b++ = *s++; else *b++ = '0';
1299
+ if (dot) { *b++ = '.'; i++; dot = false; }
1300
+ }
1301
+ } char d = *s;
1302
+ s = str;
1303
+ if (*--b >= '5' || (*b == '.' && d >= '5')) {
1304
+ do {
1305
+ if (*b != '.') *b = '0';
1306
+ if (*--b == '.') --b;
1307
+ (*b)++;
1308
+ } while (b > c && *b > '9');
1309
+ if (*c > '9') {
1310
+ size_t i; *c = '1';
1311
+ for (i = 1; i < str_size-1; ++i)
1312
+ if (c[i] == '.') {
1313
+ c[i] = '0'; c[i+1] = '.';
1314
+ c[str_size++] = '0';
1315
+ if (s[0] == fill) s++;
1316
+ break;
1317
+ }
1318
+ if (c[str_size-1] == '.' && i == str_size-1 ) {
1319
+ c[str_size-1] = '0'; str_size++;
1320
+ if (s[0] == fill) s++;
1321
+ }
1322
+ }
1323
+ }
1324
+ b = c + str_size - 1; *b = '\0';
1325
+ if (!fixed) {
1326
+ *b++ = uppercase ? 'E' : 'e';
1327
+ append_expn(b, exponent);
1328
+ while(*b)++b;
1329
+ }
1330
+ str_size = b - s;
1331
+ char *result = new char[str_size + 1];
1332
+ strcpy(result, s);
1333
+ delete[] str;
1334
+ delete[] buf;
1335
+ return result;
1336
+ }
1337
+
1338
+ template <> int Real::write(char *s, int len, int precision,
1339
+ rios::fmtflags fmt, bool showpos, bool uppercase, char fill) const
1340
+ {
1341
+ char *str = this->to_string(precision, 0, fmt, showpos, uppercase, fill);
1342
+ int length = strlen(str);
1343
+ if (len > 0) {
1344
+ strncpy(s, str, len-1);
1345
+ s[len-1] = 0;
1346
+ } delete[] str;
1347
+ return length;
1348
+ }
1349
+
1350
+ template <> char *Real::to_a(const char *f) const
1351
+ {
1352
+ str_buf c(80);
1353
+ c += '<';
1354
+ for (size_t i = 0; i <= N; i++) {
1355
+ sprintg(c, w[i], f);
1356
+ if (i < N) c += ',';
1357
+ }
1358
+ c += '>';
1359
+ char *result = new char[c.size() + 1];
1360
+ strcpy(result, c.c_str());
1361
+ return result;
1362
+ }
1363
+
1364
+ template <> char *Real::to_s(const char *f, char fill) const
1365
+ {
1366
+ if (isinf(w[0])) { char *s = new char[4]; sprintf(s, "inf"); return s; }
1367
+ if (isnan(w[0])) { char *s = new char[4]; sprintf(s, "nan"); return s; }
1368
+ int len = 0, prec = 0;
1369
+ char *a, *b = NULL, *c = const_cast<char*>(f);
1370
+ while(*c != '%')++c;++c; a = c;
1371
+ while((*c >= '0' && *c <= '9') || *c == '.') { if (*c == '.') b = c + 1; ++c; }
1372
+ len = atoi(a); prec = b ? atoi(b) : 6;
1373
+ rios::fmtflags fmt = rios::scientific;
1374
+ if (*c == 'f') fmt = rios::fixed;
1375
+ if (*c == 'g') {
1376
+ int d = floor(log10(fabs(w[0])));
1377
+ if (abs(d) <= prec) fmt = rios::fixed;
1378
+ }
1379
+ int size = write(NULL, 0, prec, fmt);
1380
+ int length = len > size ? len : size;
1381
+ char *str = new char[length+1];
1382
+ write(str, length+1, prec, fmt);
1383
+ if (*c == 'g' && (fmt & rios::fixed)) {
1384
+ c = str; b = NULL;
1385
+ while (*c == '-' || *c == '+') ++c;
1386
+ while((*c >= '0' && *c <= '9') || *c == '.') { if (*c == '.') b = c; ++c; }
1387
+ if (b) {
1388
+ while (*(c-1) == '0') --c; *c = '\0';
1389
+ if (*(b+1) == '\0') *b = '\0';
1390
+ }
1391
+ } size = strlen(str);
1392
+ if (len > size) {
1393
+ c = str + len; b = str + size;
1394
+ for (int i = len; i >= 0; --i) {
1395
+ if (i >= len - size) *c-- = *b--;
1396
+ else *c-- = fill;
1397
+ }
1398
+ }
1399
+ return str;
1400
+ }
1401
+
1402
+ template <> char *Vector::to_s(const char *f) const
1403
+ {
1404
+ str_buf c(80);
1405
+ c += '[';
1406
+ for (size_t i = 0; i < _size; ++i) {
1407
+ Real v = (*this)[i];
1408
+ char *b = v.to_s(f);
1409
+ c += b;
1410
+ if (i < _size-1) c += ", ";
1411
+ delete[] b;
1412
+ }
1413
+ c += ']';
1414
+ char *s = new char[c.size()+1];
1415
+ strcpy(s, c.c_str());
1416
+ return s;
1417
+ }
1418
+
1419
+ template <> char *Matrix::to_s(const char *f) const
1420
+ {
1421
+ str_buf e(80);
1422
+ sprintf(e.c_buf(), "<{%ld, %ld}, %ld, [", r, c, a); ++e;
1423
+ for (size_t i = 0; i < r; ++i) {
1424
+ Vector v = row(i);
1425
+ char *b = v.to_s(f);
1426
+ e += b;
1427
+ if (i < r-1) e += ',';
1428
+ delete[] b;
1429
+ } e += "]>";
1430
+ char *s = new char[e.size()+1];
1431
+ strcpy(s, e.c_str());
1432
+ return s;
1433
+ }
1434
+
1435
+ template <> char *Matrix_view::to_s(const char *f) const
1436
+ {
1437
+ str_buf e(80);
1438
+ sprintf(e.c_buf(), "<{%ld, %ld}, %ld, @[", r, c, a); ++e;
1439
+ for (size_t i = 0; i < r; ++i) {
1440
+ Vector v = row(i);
1441
+ char *b = v.to_s(f);
1442
+ e += b;
1443
+ if (i < r-1) e += ',';
1444
+ delete[] b;
1445
+ } e += "]>";
1446
+ char *s = new char[e.size()+1];
1447
+ strcpy(s, e.c_str());
1448
+ return s;
1449
+ }
1450
+
1451
+ #ifdef USE_RANDOM
1452
+ template <> Real Real::qdrand(size_t N)
1453
+ {
1454
+ static std::random_device rnd;
1455
+ static std::mt19937 mt(rnd());
1456
+ static const double m_const = 4.6566128730773926e-10; /* = 2^{-31} */
1457
+ double m = m_const;
1458
+ Real r(N);
1459
+
1460
+ /* Strategy: Generate 31 bits at a time, using lrand48
1461
+ random number generator. Shift the bits, and repeat
1462
+ 7 times. */
1463
+
1464
+ for (int i = 0; i < 7; i++, m *= m_const) {
1465
+ double d = mt() * m;
1466
+ r += d;
1467
+ }
1468
+ return r - 1.0;
1469
+ }
1470
+
1471
+ template <> Real Real::debug_rand(size_t N)
1472
+ {
1473
+ static std::random_device rnd;
1474
+ static std::mt19937 mt(rnd());
1475
+ if (std::rand() % 2 == 0) return qdrand(N);
1476
+
1477
+ int expn = 0;
1478
+ Real a(N);
1479
+ double d;
1480
+ for (int i = 0; i < 4; i++) {
1481
+ d = std::ldexp(mt() / static_cast<double>(RAND_MAX), -expn);
1482
+ a += d;
1483
+ expn = expn + 54 + mt() % 200;
1484
+ }
1485
+ return a;
1486
+ }
1487
+
1488
+ template <> Real Real::uniform_rand(size_t N, double a, double b)
1489
+ {
1490
+ static std::random_device rnd;
1491
+ static std::mt19937_64 mt(rnd());
1492
+ std::uniform_real_distribution<> dist(a, b);
1493
+ Real d(N);
1494
+ for (size_t i = 0; i <= N; i++) d[i] = dist(mt);
1495
+ d.normalize(dist(mt));
1496
+ return d / static_cast<double>(N+2);
1497
+ }
1498
+
1499
+ template <> Real Real::normal_rand(size_t N, double a, double b)
1500
+ {
1501
+ static std::random_device rnd;
1502
+ static std::mt19937_64 mt(rnd());
1503
+ std::normal_distribution<> dist(a, b);
1504
+ Real d(N);
1505
+ for (size_t i = 0; i <= N; i++) d[i] = dist(mt);
1506
+ d.normalize(dist(mt));
1507
+ return d / static_cast<double>(N+2);
1508
+ }
1509
+ #endif
1510
+
1511
+ void debug_sin_table(real_precision N)
1512
+ {
1513
+ char fmt[80];
1514
+ sprintf(fmt, "%%.%de\n", _ndigits(N));
1515
+ for (int i = 0; i < 256; ++i) {
1516
+ Real s = real_sin_table(N, i);
1517
+ s.print(fmt);
1518
+ }
1519
+ }
1520
+
1521
+ void debug_cos_table(real_precision N)
1522
+ {
1523
+ char fmt[80];
1524
+ sprintf(fmt, "%%.%de\n", _ndigits(N));
1525
+ for (int i = 0; i < 256; ++i) {
1526
+ Real s = real_cos_table(N, i);
1527
+ s.print(fmt);
1528
+ }
1529
+ }
1530
+
1531
+ void debug_inv_fact(real_precision N)
1532
+ {
1533
+ char fmt[80];
1534
+ sprintf(fmt, "%%.%de\n", _ndigits(N));
1535
+ for (int i = 0; i < 15; ++i) {
1536
+ Real s = real_inv_fact(N, i);
1537
+ s.print(fmt);
1538
+ }
1539
+ }
1540
+
1541
+ real_const<double> _2pi (real_const_table[0]);
1542
+ real_const<double> _pi (real_const_table[1]);
1543
+ real_const<double> _pi2 (real_const_table[2]);
1544
+ real_const<double> _pi4 (real_const_table[3]);
1545
+ real_const<double> _3pi4 (real_const_table[4]);
1546
+ real_const<double> _e (real_const_table[5]);
1547
+ real_const<double> _log2 (real_const_table[6]);
1548
+ real_const<double> _log10 (real_const_table[7]);
1549
+ real_const<double> _max (real_const_table[8]);
1550
+ real_const<double> _safe_max(real_const_table[9]);
1551
+ real_const<double> _nan (real_const_table[10]);
1552
+ real_const<double> _inf (real_const_table[11]);
1553
+ real_const<double> _min_normalized(real_const_table[12]);
1554
+ real_const<double> _eps (real_const_table[13]);
1555
+ real_const<double> _pi1024 (real_const_table[14]);
1556
+ real_const<int> _ndigits (real_const_ndigits);
1557
+
1558
+ template <> inline int sprintg(str_buf& c, const Real& v, const char *f)
1559
+ {
1560
+ int width = 0, prec = 5; rios::fmtflags fmt = rios::scientific;
1561
+ if (f && *f == '%') {
1562
+ char *b = const_cast<char*>(f) + 1, *a = NULL, *c = b;
1563
+ while ((*c >= '0' && *c <= '9') || *c == '.') { if (*c == '.') a = c + 1; ++c; }
1564
+ if (*c >= 'e' && *c <= 'g') { width = atoi(b); if (a) prec = atoi(a); }
1565
+ if (*c == 'f') fmt = rios::fixed; else
1566
+ if (*c == 'g') {
1567
+ int d = floor(log10(fabs(v(0))));
1568
+ if (abs(d) <= prec) fmt = rios::fixed;
1569
+ }
1570
+ }
1571
+ char *str = v.to_string(prec, width, fmt, false, false);
1572
+ if (fmt == rios::fixed) {
1573
+ char *s = str; while(*s)++s;
1574
+ do { if (*--s != '0') break; *s = '\0'; } while (s != str);
1575
+ if (*s == '.') *s = '\0';
1576
+ }
1577
+ c += str;
1578
+ int n = strlen(str);
1579
+ delete[] str;
1580
+ return n;
1581
+ }
1582
+
1583
+ template <> void poly_to_s(str_buf& c, int N, const int *size, const double *data, const char *f)
1584
+ {
1585
+ int n = *size;
1586
+ if (N == 0) {
1587
+ if (n == 0) {
1588
+ sprintg<double>(c, *data, f);
1589
+ } else {
1590
+ c += '<';
1591
+ Real a(size_t(n), data, 0);
1592
+ sprintg<Real>(c, a, f);
1593
+ c += '>';
1594
+ }
1595
+ }
1596
+ else {
1597
+ c += (N == 1) ? '(' : '[';
1598
+ int m = size[N] + 1;
1599
+ for (int i = N; i > 1; --i) m *= size[i-1];
1600
+ for (int i = 0; i < n; ++i) {
1601
+ poly_to_s(c, N-1, size+1, &data[i*m], f);
1602
+ if (i < n-1) c += ',';
1603
+ }
1604
+ c += (N == 1) ? ')' : ']';
1605
+ }
1606
+ }
1607
+