pspline 5.0.5 → 5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +5 -5
  3. data/README.md +44 -43
  4. data/Rakefile +6 -6
  5. data/bin/console +14 -14
  6. data/bin/setup +8 -8
  7. data/ext/pspline/basis.cpp +394 -351
  8. data/ext/pspline/example/exbspline.rb +57 -57
  9. data/ext/pspline/example/excspline.rb +57 -57
  10. data/ext/pspline/example/exdspline.rb +55 -55
  11. data/ext/pspline/example/exfspline.rb +44 -44
  12. data/ext/pspline/example/exfspline1.rb +40 -40
  13. data/ext/pspline/example/exfspline2.rb +68 -68
  14. data/ext/pspline/example/exfspline3.rb +64 -64
  15. data/ext/pspline/example/exmspline.rb +68 -68
  16. data/ext/pspline/example/expspline.rb +29 -29
  17. data/ext/pspline/example/expspline1.rb +29 -29
  18. data/ext/pspline/example/expspline2.rb +47 -47
  19. data/ext/pspline/example/exqspline.rb +31 -31
  20. data/ext/pspline/example/exqspline1.rb +31 -31
  21. data/ext/pspline/example/exqspline2.rb +50 -50
  22. data/ext/pspline/example/exqspline3.rb +51 -51
  23. data/ext/pspline/example/exqspline4.rb +35 -35
  24. data/ext/pspline/example/exrspline.rb +34 -34
  25. data/ext/pspline/example/exrspline1.rb +34 -34
  26. data/ext/pspline/example/exrspline2.rb +44 -44
  27. data/ext/pspline/example/exsspline.rb +35 -35
  28. data/ext/pspline/example/exsspline1.rb +35 -35
  29. data/ext/pspline/example/extspline.rb +54 -54
  30. data/ext/pspline/extconf.rb +7 -7
  31. data/ext/pspline/fft.cpp +27 -552
  32. data/ext/pspline/include/basis/basis.h +145 -137
  33. data/ext/pspline/include/basis/fft.h +188 -152
  34. data/ext/pspline/include/basis/fft_complex.h +215 -0
  35. data/ext/pspline/include/basis/fft_real.h +625 -0
  36. data/ext/pspline/include/basis/gabs.h +35 -0
  37. data/ext/pspline/include/basis/marray_class_ext.h +568 -0
  38. data/ext/pspline/include/basis/marray_ext.h +100 -0
  39. data/ext/pspline/include/basis/matrix_luc_ext.h +300 -0
  40. data/ext/pspline/include/basis/matrix_lud_ext.h +298 -0
  41. data/ext/pspline/include/basis/poly.h +454 -0
  42. data/ext/pspline/include/basis/poly_array.h +1030 -1568
  43. data/ext/pspline/include/basis/pspline.h +806 -642
  44. data/ext/pspline/include/basis/real.h +526 -0
  45. data/ext/pspline/include/basis/real_inline.h +442 -0
  46. data/ext/pspline/include/basis/spline.h +83 -0
  47. data/ext/pspline/include/basis/uspline.h +251 -210
  48. data/ext/pspline/include/basis/util.h +122 -656
  49. data/ext/pspline/include/bspline.h +71 -377
  50. data/ext/pspline/include/bspline_Config.h +8 -2
  51. data/ext/pspline/include/real_config.h +3 -0
  52. data/ext/pspline/pspline.cpp +1236 -1038
  53. data/ext/pspline/real.cpp +1607 -0
  54. data/ext/pspline/real_const.cpp +585 -0
  55. data/lib/pspline.rb +71 -71
  56. data/lib/pspline/version.rb +1 -1
  57. data/pspline.gemspec +25 -25
  58. metadata +17 -5
  59. data/ext/pspline/plotsub.cpp +0 -139
  60. data/ext/pspline/util.cpp +0 -483
@@ -0,0 +1,100 @@
1
+ #ifndef _MARRAY_H_
2
+ #define _MARRAY_H_
3
+ /*******************************************************************************
4
+ marray utility.
5
+ *******************************************************************************/
6
+ template <class T>
7
+ T ** marray_alloc(char *mm, size_t nr, size_t sr, size_t sc)
8
+ {
9
+ T **m = (T **)mm;
10
+ m[0] = (T *)(mm = mm + sr);
11
+ for (size_t i = 1; i < nr; ++i) m[i] = (T *)(mm = mm + sc);
12
+ return m;
13
+ }
14
+
15
+ template <typename T>
16
+ T ** create_marray(size_t nr, size_t nc, size_t a = 0)
17
+ {
18
+ size_t sr = nr * sizeof(T *);
19
+ size_t sc = nc * sizeof(T) * (a + 1);
20
+ char * mm = (char *)malloc(sr + nr * sc);
21
+ if (mm == NULL) throw "allocate error, create_marray";
22
+ return marray_alloc<T>(mm, nr, sr, sc);
23
+ }
24
+
25
+ template <typename T>
26
+ T ** marray_view_alloc(T **m, int nr, int nc, T *v, int a = 0)
27
+ {
28
+ m[0] = v;
29
+ for (int i = 1; i < nr; i++) m[i] = m[i-1] + nc * (a + 1);
30
+ m[nr] = v;
31
+ return m;
32
+ }
33
+
34
+ template <typename T>
35
+ T ** create_marray_view(T *v, int nr, int nc, int a = 0)
36
+ {
37
+ T **m = (T**)malloc((nr+1) * sizeof(T*));
38
+ return marray_view_alloc(m, nr, nc, v, a);
39
+ }
40
+
41
+ template <typename T>
42
+ T ** carray_alloc(char *mm, size_t sr, size_t n, size_t *s, size_t a = 0)
43
+ {
44
+ T **m = (T**)mm;
45
+ m[0] = (T*)(mm = mm + sr);
46
+ for (size_t i = 1; i < n; ++i) m[i] = (T*)(mm = mm + s[i-1] * (a + 1) * sizeof(T));
47
+ return m;
48
+ }
49
+
50
+ template <typename T>
51
+ T ** create_carray(size_t n, size_t *s, size_t a = 0)
52
+ {
53
+ size_t c = 0;
54
+ for (size_t i = 0; i < n; ++i) c += s[i];
55
+ size_t sr = n * sizeof(T*);
56
+ size_t sc = c * sizeof(T) * (a + 1);
57
+ char * mm = (char *)malloc(sr + sc);
58
+ if (mm == NULL) throw "allocate error, create_carray";
59
+ return carray_alloc<T>(mm, sr, n, s, a);
60
+ }
61
+
62
+ template <typename T>
63
+ T ** carray_view_alloc(T **m, size_t n, size_t *s, T *d, size_t a = 0)
64
+ {
65
+ m[0] = d;
66
+ for (size_t i = 1; i < n; ++i) m[i] = m[i-1] + s[i-1] * (a+1);
67
+ m[n] = d;
68
+ return m;
69
+ }
70
+
71
+ template <typename T>
72
+ T ** create_carray_view(T *d, size_t n, size_t *s, size_t a = 0)
73
+ {
74
+ T **m = (T**)malloc((n+1) * sizeof(T*));
75
+ m[n] = NULL;
76
+ return d == NULL ? m : carray_view_alloc<T>(m, n, s, d, a);
77
+ }
78
+
79
+ /*
80
+ T行列 [[T00,...],...] : T**
81
+ */
82
+ #define T_MALLOC(T,i,j) create_marray<T>((i),(j))
83
+ #define T_MALLOC_VIEW(T,v,i,j) create_marray_view<T>((v),(i),(j))
84
+ /*
85
+ 実数行列 [[X00,...,X0(c-1)],...,[X(r-1)0,...,X(r-1)(c-1)]] : marray
86
+ */
87
+ #define MALLOC(r,c) create_marray<double>((r),(c))
88
+ #define MALLOC_VIEW(v,r,c) create_marray_view<double>((v),(r),(c))
89
+ /*
90
+ TC行列 [[T00,...],...] : T**
91
+ */
92
+ #define T_CALLOC(T,i,j) create_carray<T>((i),(j))
93
+ #define T_CALLOC_VIEW(T,v,i,j) create_carray<T>((v),(i),(j))
94
+ /*
95
+ C行列 [[X00,...,X0(c-1)],...,[X(r-1)0,...,X(r-1)(c-1)]] : marray
96
+ */
97
+ #define CALLOC(r,c) create_carray<double>((r),(c))
98
+ #define CALLOC_VIEW(v,r,c) create_carray_view<double>((v),(r),(c))
99
+
100
+ #endif
@@ -0,0 +1,300 @@
1
+ #ifndef _MATRIX_LU_H_
2
+ #define _MATRIX_LU_H_
3
+ #define _MATRIX_LU_C_
4
+ #include "basis/gabs.h"
5
+ #include "basis/marray_ext.h"
6
+ /*******************************************************************************
7
+ Crout : LU分解 クラウト法
8
+
9
+ U0j = a0j; j = 0,...,n-1
10
+ Uij = aij - ΣLik*Ukj; i <= j, k = 0,...,i-1
11
+ Lij =(aij - ΣLik*Ukj)/Ujj; i > j, k = 0,...,j-1
12
+ *******************************************************************************/
13
+ template<class T> void luc_decomp(T * a, size_t n, size_t * p, int & s)
14
+ {
15
+ size_t i, j, k, L;
16
+ T *ai, *ak;
17
+
18
+ s = 1;
19
+ for (ak = a, k = 0; k < n-1; ++k, ak += n) {
20
+ T akk = ak[k]; L = k;
21
+ // ピボット選択
22
+ for (j = k+1; j < n; ++j)
23
+ if (gabs(akk) < gabs(ak[j])) { L = j; akk = ak[j]; }
24
+ // ピボット列交換
25
+ if (L != k) {
26
+ for (ai = a, i = 0; i < n; ++i, ai += n) {
27
+ T w = ai[k]; ai[k] = ai[L]; ai[L] = w; // a[*,k] <=> a[*,L]
28
+ } s *= -1;
29
+ } p[k] = L;
30
+ // 前進消去
31
+ for (ai = ak + n, i = k+1; i < n; ++i, ai += n) {
32
+ T aik = ai[k] / akk;
33
+ for (j = k+1; j < n; ++j) ai[j] -= aik * ak[j];
34
+ ai[k] = aik;
35
+ }
36
+ }
37
+ }
38
+
39
+ template<class T, class S> void luc_subst(T * a, size_t n, size_t * p, S * b)
40
+ {
41
+ size_t i, j, js = n, k; S sum;
42
+ T *ai;
43
+ // 前進代入
44
+ for (ai = a, i = 0; i < n; ++i, ai += n) {
45
+ sum = b[i];
46
+ if (js < n)
47
+ for (j = js; j < i; ++j) sum -= ai[j] * b[j];
48
+ else if (sum != 0) js = i;
49
+ b[i] = sum;
50
+ }
51
+ // 後退代入
52
+ for (k = n; k > 0; --k) {
53
+ i = k - 1; ai -= n;
54
+ sum = b[i];
55
+ for (j = n-1; j > i; --j) sum -= ai[j] * b[j];
56
+ b[i] = sum / ai[i];
57
+ }
58
+ // 解の保存
59
+ for (k = n-1; k > 0; --k) {
60
+ i = k - 1; j = p[i];
61
+ if (i != j) { sum = b[j]; b[j] = b[i]; b[i] = sum; }
62
+ }
63
+ }
64
+
65
+ template<class T, class S> void luc_subst(T * a, size_t n, size_t * p, S * x, int K)
66
+ {
67
+ S sum, *su, *sv, **b = T_MALLOC_VIEW(S, x, n, K);
68
+ size_t i, j, k, l, js = n;
69
+ T* ai;
70
+ // 前進代入
71
+ for (ai = a, i = 0; i < n; ++i, ai += n) {
72
+ su = b[i];
73
+ for (l = 0; l < size_t(K); ++l) {
74
+ sum = su[l];
75
+ if (js < n)
76
+ for (j = js; j < i; ++j) sum -= ai[j] * b[j][l];
77
+ else if (sum != 0) js = i;
78
+ su[l] = sum;
79
+ }
80
+ }
81
+ // 後退代入
82
+ for (k = n; k > 0; --k) {
83
+ i = k - 1; ai -= n;
84
+ su = b[i];
85
+ for (l = 0; l < size_t(K); ++l) {
86
+ sum = su[l];
87
+ for (j = n-1; j > i; --j) sum -= ai[j] * b[j][l];
88
+ su[l] = sum / ai[i];
89
+ }
90
+ }
91
+ // 解の保存
92
+ for (k = n-1; k > 0; --k) {
93
+ i = k - 1; j = p[i];
94
+ su = b[i]; sv = b[j];
95
+ if (i != j) for (l = 0; l < size_t(K); ++l) {
96
+ sum = sv[l]; sv[l] = su[l]; su[l] = sum;
97
+ }
98
+ }
99
+ free((void*)b);
100
+ }
101
+
102
+ template <class T, class P> void luc_decomp(P& A, size_t * p, int& s)
103
+ // T = double, P = marray<double> || marray_view<double>
104
+ {
105
+ size_t N = A.rows(), a = A.atom();
106
+ size_t i, j, k, l, L;
107
+ real<T> big, tmp; varray<T> v(N, a);
108
+
109
+ s = 1;
110
+ for (i = 0; i < N; ++i) {
111
+ big = 0.0;
112
+ for (j = 0; j < N; ++j) {
113
+ real<T> aij(a, A(i,j));
114
+ if ((tmp = gabs(aij)) > big) big = tmp;
115
+ }
116
+ if (big == 0.0) throw "Singular matrix in routine luc_decomp";
117
+ tmp = 1.0 / big;
118
+ qd_ass(a, v(i), (double*)tmp);
119
+ }
120
+ for (j = 0; j < N; j++) {
121
+ L = j; big = 0.0;
122
+ for (i = 1; i < N; i++) {
123
+ l = j < i ? j : i;
124
+ T *aij = A(i,j);
125
+ for (k = 0; k < l; k++) {
126
+ T u[a+1], *aik = A(i,k), *akj = A(k,j);
127
+ // aij -= A[i][k] * A[k][j];
128
+ qd_mul(a, u, aik, akj);
129
+ qd_sub(a, aij, aij, u);
130
+ }
131
+ // ピボット選択
132
+ if (i >= j) {
133
+ real<T> Aij(a, aij), vi(v[i]);
134
+ if ((tmp = gabs(Aij) * vi) > big) { big = tmp; L = i; }
135
+ }
136
+ } p[j] = L;
137
+ // ピボット行交換
138
+ if (L != j) {
139
+ A.row_swap(j, L); // A[j,*] <=> A[L,*]
140
+ qd_ass(a, v(L), v(j));
141
+ s *= -1;
142
+ }
143
+ real<T> ajj(a, A(j,j));
144
+ if (ajj == 0.0) throw "Divide by zero in luc_decomp";
145
+ if (j < N-1) for (i = j+1; i < N; i++) {
146
+ T *u = ajj, *aij = A(i,j);
147
+ // A[i][j] /= ajj;
148
+ qd_div(a, aij, aij, u);
149
+ }
150
+ }
151
+ }
152
+
153
+ template <class T, class P> void luc_subst(const P& A, size_t * p, varray<T>& B)
154
+ // T = double, P = marray<double> || marray_view<double>
155
+ {
156
+ size_t N = A.rows(), a = A.atom();
157
+ size_t i, j, k, js = N; T u[a+1];
158
+ // 前進代入
159
+ for (i = 0; i < N; ++i) {
160
+ k = p[i];
161
+ real<T> sum(a, B(k));
162
+ if (i != k) qd_ass(a, B(k), B(i));
163
+ T *s = sum;
164
+ if (js < N)
165
+ for (j = js; j < i; ++j) {
166
+ // sum -= A[i][j] * B[j];
167
+ qd_mul(a, u, A(i,j), B(j));
168
+ qd_sub(a, s, s, u);
169
+ }
170
+ else if (sum != 0) js = i;
171
+ qd_ass(a, B(i), s);
172
+ }
173
+ // 後退代入
174
+ for (k = N; k > 0; --k) {
175
+ i = k - 1;
176
+ for (j = N-1; j > i; --j) {
177
+ // B[i] -= A[i][j] * B[j];
178
+ qd_mul(a, u, A(i,j), B(j));
179
+ qd_sub(a, B(i), B(i), u);
180
+ }
181
+ // B[i] /= A[i][i];
182
+ qd_div(a, B(i), B(i), A(i,i));
183
+ }
184
+ }
185
+
186
+ template <class T, class P> void luc_subst(const P& a, size_t * p, marray_view<T>& x)
187
+ // T = double, P = marray<double> || marray_view<double>
188
+ {
189
+ size_t K = x.cols();
190
+ for (size_t i = 0; i < K; ++i) { varray<T> v = x.col(i); luc_subst<T>(a, p, v); }
191
+ }
192
+
193
+ template <class T, class P, class S> real<T> luc_solve(P& A, S& B)
194
+ // T = double, P = marray<double> || marray_view<double>, S = varray_view<double> || marray_view<double>
195
+ {
196
+ size_t N = A.rows(), a = A.atom();
197
+ size_t *p = new size_t[N];
198
+ int s; luc_decomp<T>(A, p, s); luc_subst<T>(A, p, B);
199
+ real<T> det(a); det = s; T *u = det;
200
+ for (size_t k = 0; k < N; ++k) qd_mul(a, u, u, A(k,k)); // det *= A[k][k];
201
+ delete[] p;
202
+ return det;
203
+ }
204
+
205
+ #define lu_decomp luc_decomp
206
+ #define lu_subst luc_subst
207
+ #define lu_solve luc_solve
208
+
209
+ template <typename T, typename P> void invert(P& A)
210
+ // T = double, P = marray<double> || marray_view<double>;
211
+ {
212
+ size_t N = A.rows(), a = A.atom();
213
+ size_t i, j, k, *p = new size_t[N];
214
+ varray<T> B(N, a);
215
+
216
+ for (k = 0; k < N; ++k)
217
+ {
218
+ real<T> Akk(a, A(k,k)); j = k;
219
+ for (i = k+1; i < N; ++i) {
220
+ real<T> Aik(a, A(i,k));
221
+ if (gabs(Aik) > gabs(Akk)) { j = i; Akk = real<T>(a, A(i,k)); }
222
+ } if (j != k) A.row_swap(k,j);
223
+ p[k] = j;
224
+ for (i = 0; i < N; ++i) {
225
+ // B[i] = A[i][k];
226
+ T *s = B(i), *t = A(i,k); for (j = 0; j <= a; ++j) s[j] = t[j];
227
+ // A[i][k] = (i == k) ? 1.0 : 0.0;
228
+ s = A(i,k); for (j = 0; j <= a; ++j) s[j] = (j == 0) && (i == k) ? 1.0 : 0.0;
229
+ // A[k][i] /= Akk;
230
+ t = A(k,i); qd_div(a, t, t, s = Akk);
231
+ }
232
+ for (i = 0; i < N; ++i)
233
+ if (i != k)
234
+ for (j = 0; j < N; ++j) {
235
+ T u[a + 1], *v = A(i,j), *s = B(i), *t = A(k,j);
236
+ // A[i][j] -= B[i] * A[k][j];
237
+ qd_mul(a, u, s, t);
238
+ qd_sub(a, v, v, u);
239
+ }
240
+ }
241
+ for (k = N; k > 0; --k)
242
+ {
243
+ j = k - 1;
244
+ if (p[j] != j) for (i = 0; i < N; ++i) {
245
+ T *u = A(i,j), *v = A(i,p[j]);
246
+ for (size_t l = 0; l <= a; ++l) {
247
+ T w = u[l]; u[l] = v[l]; v[l] = w;
248
+ }
249
+ }
250
+ }
251
+ delete[] p;
252
+ }
253
+
254
+ template <typename T>
255
+ void invert(T *A, size_t N)
256
+ {
257
+ size_t i, j, k, *P = new size_t[N]; T *B = new T[N];
258
+
259
+ T *Ak = A;
260
+ for (k = 0; k < N; ++k, Ak += N)
261
+ {
262
+ T Akk = Ak[k]; j = k;
263
+ T *Ai = Ak, *Aj = Ak;
264
+ for (i = k+1; i < N; ++i) {
265
+ Ai += N;
266
+ if (gabs(Ai[k]) > gabs(Akk)) {
267
+ j = i; Akk = Ai[k]; Aj = Ai;
268
+ }
269
+ } if (j != k) for (i = 0; i < N; ++i) {
270
+ T W = Ak[i]; Ak[i] = Aj[i]; Aj[i] = W;
271
+ }
272
+ P[k] = j;
273
+ Ai = A;
274
+ for (i = 0; i < N; ++i, Ai += N) {
275
+ B[i] = Ai[k]; Ai[k] = (i == k) ? 1 : 0;
276
+ Ak[i] /= Akk;
277
+ }
278
+ for (j = N; j > 0; --j) {
279
+ i = j - 1;
280
+ Ai -= N;
281
+ if (i != k)
282
+ for (size_t l = 0; l < N; ++l)
283
+ Ai[l] -= B[i] * Ak[l];
284
+ }
285
+ }
286
+ delete[] B;
287
+ for (k = N; k > 0; --k)
288
+ {
289
+ j = k - 1;
290
+ if (P[j] != j) {
291
+ T *Ai = A;
292
+ for (i = 0; i < N; ++i, Ai += N) {
293
+ T W = Ai[j]; Ai[j] = Ai[P[j]]; Ai[P[j]] = W;
294
+ }
295
+ }
296
+ }
297
+ delete[] P;
298
+ }
299
+
300
+ #endif
@@ -0,0 +1,298 @@
1
+ #ifndef _MATRIX_LU_H_
2
+ #define _MATRIX_LU_H_
3
+ #define _MATRIX_LU_D_
4
+ #include "basis/gabs.h"
5
+ #include "basis/marray_ext.h"
6
+ /*******************************************************************************
7
+ Matrix operation
8
+
9
+ Doolittle : LU分解 ドゥーリトル法
10
+
11
+ Li0 = ai0; i = 0,...,n-1
12
+ Lij = aij - ΣLik*Ukj; i >= j, k = 0,...,j-1
13
+ Uij =(aij - ΣLik*Ukj)/Lii; i < j, k = 0,...,i-1
14
+ *******************************************************************************/
15
+ template <class T> void lud_decomp(T * a, size_t n, size_t * p, int & s)
16
+ {
17
+ size_t i, j, k, l, L;
18
+ T *ai, *ak;
19
+
20
+ s = 1;
21
+ for (ai = a, i = 0; i < n; i++, ai += n) {
22
+ T aii = ai[i]; L = i;
23
+ for (j = 1; j < n; j++) {
24
+ T aij = ai[j];
25
+ l = i < j ? i : j;
26
+ for (ak = a, k = 0; k < l; k++, ak += n) aij -= ai[k] * ak[j];
27
+ ai[j] = aij;
28
+ // ピボット選択
29
+ if ((j == i) || ((j > i) && (gabs(aii) < gabs(aij)))) { L = j; aii = aij; }
30
+ }
31
+ // ピボット列交換
32
+ if (L != i) {
33
+ for (ak = a, k = 0; k < n; ++k, ak += n) {
34
+ T w = ak[i]; ak[i] = ak[L]; ak[L] = w; // a[*,i] <=> a[*,L]
35
+ } s *= -1;
36
+ } p[i] = L;
37
+ if (i < n-1) for (j = i+1; j < n; j++) ai[j] /= aii;
38
+ }
39
+ }
40
+
41
+ template <class T, class S> void lud_subst(T * a, size_t n, size_t * p, S * b)
42
+ {
43
+ size_t i, j, k, js = n; S sum;
44
+ T *ai;
45
+ // 前進代入
46
+ for (ai = a, i = 0; i < n; ++i, ai += n) {
47
+ sum = b[i];
48
+ if (js < n)
49
+ for (j = js; j < i; ++j) sum -= ai[j] * b[j];
50
+ else if (sum != 0) js = i;
51
+ b[i] = sum / ai[i];
52
+ } ai -= n;
53
+ // 後退代入
54
+ for (k = n-1; k > 0; --k) {
55
+ i = k - 1; ai -= n;
56
+ sum = b[i];
57
+ for (j = n-1; j > i; --j) sum -= ai[j] * b[j];
58
+ b[i] = sum;
59
+ }
60
+ // 解の保存
61
+ for (k = n-1; k > 0; --k) {
62
+ i = k - 1; j = p[i];
63
+ if (i != j) { sum = b[j]; b[j] = b[i]; b[i] = sum; }
64
+ }
65
+ }
66
+
67
+ template <class T, class S> void lud_subst(T * a, size_t n, size_t * p, S * x, int K)
68
+ {
69
+ S sum, *su, *sv, **b = T_MALLOC_VIEW(S, x, n, K);
70
+ size_t i, j, k, l, js = n;
71
+ T *ai;
72
+ // 前進代入
73
+ for (ai = a, i = 0; i < n; ++i, ai += n) {
74
+ su = b[i];
75
+ for (l = 0; l < size_t(K); ++l) {
76
+ sum = su[l];
77
+ if (js < n)
78
+ for (j = js; j < i; ++j) sum -= ai[j] * b[j][l];
79
+ else if (sum != 0) js = i;
80
+ su[l] = sum / ai[i];
81
+ }
82
+ } ai -= n;
83
+ // 後退代入
84
+ for (k = n-1; k > 0; --k) {
85
+ i = k - 1; ai -= n;
86
+ su = b[i];
87
+ for (l = 0; l < size_t(K); ++l) {
88
+ sum = su[l];
89
+ for (j = n-1; j > i; --j) sum -= ai[j] * b[j][l];
90
+ su[l] = sum;
91
+ }
92
+ }
93
+ // 解の保存
94
+ for (k = n-1; k > 0; --k) {
95
+ i = k - 1; j = p[i];
96
+ su = b[i]; sv = b[j];
97
+ if (i != j) for (l = 0; l < size_t(K); ++l) {
98
+ sum = sv[l]; sv[l] = su[l]; su[l] = sum;
99
+ }
100
+ }
101
+ free((void*)b);
102
+ }
103
+
104
+ template<class T, class P> void lud_decomp(P& A, size_t * p, int &s)
105
+ // T = double, P = marray<double> || marray_view<double>
106
+ {
107
+ size_t N = A.rows(), a = A.atom();
108
+ size_t i, j, k, L;
109
+ real<T> big, tmp; varray<T> v(N, a);
110
+
111
+ s = 1;
112
+ for (i = 0; i < N; ++i) {
113
+ big = 0.0;
114
+ for (j = 0; j < N; ++j) {
115
+ real<T> aij(a, A(i,j));
116
+ if ((tmp = gabs(aij)) > big) big = tmp;
117
+ }
118
+ if (big == 0.0) throw "Singular matrix in routine lud_decomp";
119
+ tmp = 1.0 / big;
120
+ qd_ass(a, v(i), (double*)tmp);
121
+ }
122
+ for (k = 0; k < N-1; ++k) {
123
+ // 陰的ピボット選択
124
+ L = k; big = 0.0;
125
+ for (i = k+1; i < N; ++i) {
126
+ real<T> aik(a, A(i,k)), vi(v[i]);
127
+ if ((tmp = gabs(aik * vi)) > big ) { big = tmp; L = i; }
128
+ }
129
+ // ピボット行交換
130
+ if (L != k) {
131
+ A.row_swap(k, L); // A[k,*] <=> A[L,*]
132
+ qd_ass(a, v(L), v(k));
133
+ s *= -1;
134
+ } p[k] = L;
135
+ // 前進消去
136
+ real<T> akk(a, A(k,k));
137
+ if (akk == 0.0) throw "Divide by zero in lud_decomp";
138
+ for (j = k+1; j < N; ++j) {
139
+ T w[a+1], *u = akk;
140
+ // A[k][j] = A[k][j] / akk;
141
+ qd_div(a, A(k,j), A(k,j), u);
142
+ for (i = k+1; i < N; ++i) {
143
+ // A[i][j] -= A[i][k] * A[k][j];
144
+ qd_mul(a, w, A(i,k), A(k,j));
145
+ qd_sub(a, A(i,j), A(i,j), w);
146
+ }
147
+ }
148
+ }
149
+ }
150
+
151
+ template<class T, class P> void lud_subst(const P& A, size_t * p, varray<T>& B)
152
+ // T = double, P = marray<double> || marray_view<double>
153
+ {
154
+ size_t N = A.rows(), a = A.atom();
155
+ size_t i, j, k, js = N; T w[a+1];
156
+ // 前進代入
157
+ for (i = 0; i < N; ++i) {
158
+ k = (i < N-1) ? p[i] : i;
159
+ real<T> sum(a, B(k));
160
+ if (i != k) qd_ass(a, B(k), B(i));
161
+ T *s = sum;
162
+ if (js < N) {
163
+ for (j = js; j < i; ++j) {
164
+ // sum -= A[i][j] * B[j];
165
+ qd_mul(a, w, A(i,j), B(j));
166
+ qd_sub(a, s, s, w);
167
+ }
168
+ }
169
+ else if (sum != 0) js = i;
170
+ // B[i] = sum / A[i][i];
171
+ qd_div(a, B(i), s, A(i,i));
172
+ }
173
+ // 後退代入
174
+ for (k = N - 1; k > 0; --k) {
175
+ i = k - 1;
176
+ for (j = N - 1; j > i; --j) {
177
+ // B[i] -= A[i][j] * B[j];
178
+ qd_mul(a, w, A(i,j), B(j));
179
+ qd_sub(a, B(i), B(i), w);
180
+ }
181
+ }
182
+ }
183
+
184
+ template <class T, class P> void lud_subst(const P& a, size_t * p, marray_view<T>& x)
185
+ // T = double, P = marray<double> || marray_view<double>
186
+ {
187
+ size_t K = x.cols();
188
+ for (size_t i = 0; i < K; ++i) { varray<T> v = x.col(i); lud_subst<T>(a, p, v); }
189
+ }
190
+
191
+ template <class T, class P, class S> real<T> lud_solve(P& A, S& B)
192
+ // T = double, P = marray<double> || marray_view<double>, S = varray_view<double> || marray_view<double>
193
+ {
194
+ size_t N = A.rows(), a = A.atom();
195
+ size_t *p = new size_t[N];
196
+ int s; lud_decomp<T>(A, p, s); lud_subst<T>(A, p, B);
197
+ real<T> det(a); det = s; T *u = det;
198
+ for (size_t k = 0; k < N; ++k) qd_mul(a, u, u, A(k,k)); // det *= A[k][k];
199
+ delete[] p;
200
+ return det;
201
+ }
202
+
203
+ #define lu_decomp lud_decomp
204
+ #define lu_subst lud_subst
205
+ #define lu_solve lud_solve
206
+
207
+ template <typename T, typename P> void invert(P& A)
208
+ // T = double, P = marray<double> || marray_view<double>;
209
+ {
210
+ size_t N = A.rows(), a = A.atom();
211
+ size_t i, j, k, *p = new size_t[N];
212
+ varray<T> B(N, a);
213
+
214
+ for (k = 0; k < N; ++k)
215
+ {
216
+ real<T> Akk(a, A(k,k)); j = k;
217
+ for (i = k+1; i < N; ++i) {
218
+ real<T> Aik(a, A(i,k));
219
+ if (gabs(Aik) > gabs(Akk)) { j = i; Akk = real<T>(a, A(i,k)); }
220
+ } if (j != k) A.row_swap(k,j);
221
+ p[k] = j;
222
+ for (i = 0; i < N; ++i) {
223
+ // B[i] = A[i][k];
224
+ T *s = B(i), *t = A(i,k); for (j = 0; j <= a; ++j) s[j] = t[j];
225
+ // A[i][k] = (i == k) ? 1.0 : 0.0;
226
+ s = A(i,k); for (j = 0; j <= a; ++j) s[j] = (j == 0) && (i == k) ? 1.0 : 0.0;
227
+ // A[k][i] /= Akk;
228
+ t = A(k,i); qd_div(a, t, t, s = Akk);
229
+ }
230
+ for (i = 0; i < N; ++i)
231
+ if (i != k)
232
+ for (j = 0; j < N; ++j) {
233
+ T u[a + 1], *v = A(i,j), *s = B(i), *t = A(k,j);
234
+ // A[i][j] -= B[i] * A[k][j];
235
+ qd_mul(a, u, s, t);
236
+ qd_sub(a, v, v, u);
237
+ }
238
+ }
239
+ for (k = N; k > 0; --k)
240
+ {
241
+ j = k - 1;
242
+ if (p[j] != j) for (i = 0; i < N; ++i) {
243
+ T *u = A(i,j), *v = A(i,p[j]);
244
+ for (size_t l = 0; l <= a; ++l) {
245
+ T w = u[l]; u[l] = v[l]; v[l] = w;
246
+ }
247
+ }
248
+ }
249
+ delete[] p;
250
+ }
251
+
252
+ template <typename T>
253
+ void invert(T *A, size_t N)
254
+ {
255
+ size_t i, j, k, *P = new size_t[N]; T *B = new T[N];
256
+
257
+ T *Ak = A;
258
+ for (k = 0; k < N; ++k, Ak += N)
259
+ {
260
+ T Akk = Ak[k]; j = k;
261
+ T *Ai = Ak, *Aj = Ak;
262
+ for (i = k+1; i < N; ++i) {
263
+ Ai += N;
264
+ if (gabs(Ai[k]) > gabs(Akk)) {
265
+ j = i; Akk = Ai[k]; Aj = Ai;
266
+ }
267
+ } if (j != k) for (i = 0; i < N; ++i) {
268
+ T W = Ak[i]; Ak[i] = Aj[i]; Aj[i] = W;
269
+ }
270
+ P[k] = j;
271
+ Ai = A;
272
+ for (i = 0; i < N; ++i, Ai += N) {
273
+ B[i] = Ai[k]; Ai[k] = (i == k) ? 1 : 0;
274
+ Ak[i] /= Akk;
275
+ }
276
+ for (j = N; j > 0; --j) {
277
+ i = j - 1;
278
+ Ai -= N;
279
+ if (i != k)
280
+ for (size_t l = 0; l < N; ++l)
281
+ Ai[l] -= B[i] * Ak[l];
282
+ }
283
+ }
284
+ delete[] B;
285
+ for (k = N; k > 0; --k)
286
+ {
287
+ j = k - 1;
288
+ if (P[j] != j) {
289
+ T *Ai = A;
290
+ for (i = 0; i < N; ++i, Ai += N) {
291
+ T W = Ai[j]; Ai[j] = Ai[P[j]]; Ai[P[j]] = W;
292
+ }
293
+ }
294
+ }
295
+ delete[] P;
296
+ }
297
+
298
+ #endif