kmat 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/.gitattributes +3 -0
  3. data/.gitignore +15 -0
  4. data/CHANGELOG.md +15 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE.md +675 -0
  7. data/README.md +224 -0
  8. data/Rakefile +26 -0
  9. data/bin/console +14 -0
  10. data/bin/setup +8 -0
  11. data/ext/kmat/arith/binary.c +1121 -0
  12. data/ext/kmat/arith/logical.c +332 -0
  13. data/ext/kmat/arith/math.c +34 -0
  14. data/ext/kmat/arith/statistics.c +173 -0
  15. data/ext/kmat/arith/unary.c +165 -0
  16. data/ext/kmat/auto_collect.rb +118 -0
  17. data/ext/kmat/elementwise_function.rb +149 -0
  18. data/ext/kmat/extconf.rb +75 -0
  19. data/ext/kmat/id.txt +80 -0
  20. data/ext/kmat/id_sym.rb +40 -0
  21. data/ext/kmat/km_util.h +97 -0
  22. data/ext/kmat/kmat.h +96 -0
  23. data/ext/kmat/lapack_headers/blas.h +354 -0
  24. data/ext/kmat/lapack_headers/lapacke.h +19455 -0
  25. data/ext/kmat/lapack_headers/lapacke_config.h +119 -0
  26. data/ext/kmat/lapack_headers/lapacke_mangling.h +17 -0
  27. data/ext/kmat/lapack_headers/lapacke_utils.h +579 -0
  28. data/ext/kmat/linalg/dla.c +1629 -0
  29. data/ext/kmat/linalg/linalg.c +267 -0
  30. data/ext/kmat/linalg/norm.c +727 -0
  31. data/ext/kmat/linalg/vla.c +102 -0
  32. data/ext/kmat/linalg/working.c +240 -0
  33. data/ext/kmat/main.c +95 -0
  34. data/ext/kmat/smat/accessor.c +719 -0
  35. data/ext/kmat/smat/array.c +108 -0
  36. data/ext/kmat/smat/boxmuller.c +72 -0
  37. data/ext/kmat/smat/constructer.c +302 -0
  38. data/ext/kmat/smat/convert.c +375 -0
  39. data/ext/kmat/smat/elem.c +171 -0
  40. data/ext/kmat/smat/fund.c +702 -0
  41. data/ext/kmat/smat/share.c +427 -0
  42. data/ext/kmat/smat/smat.c +530 -0
  43. data/ext/kmat/smat/sort.c +1156 -0
  44. data/ext/kmat/sym.txt +34 -0
  45. data/kmat.gemspec +46 -0
  46. data/lib/kmat.rb +20 -0
  47. data/lib/kmat/accessor.rb +164 -0
  48. data/lib/kmat/arith.rb +189 -0
  49. data/lib/kmat/linalg.rb +279 -0
  50. data/lib/kmat/logical.rb +150 -0
  51. data/lib/kmat/misc.rb +122 -0
  52. data/lib/kmat/random.rb +106 -0
  53. data/lib/kmat/statistics.rb +98 -0
  54. data/lib/kmat/version.rb +3 -0
  55. metadata +156 -0
@@ -0,0 +1,1629 @@
1
+ #include "../kmat.h"
2
+
3
+ static inline void
4
+ km_check_info_opt(int info, const char *funcname)
5
+ {
6
+ km_check_info(info, rb_eRuntimeError, "error occured while computing optimal lwork", funcname);
7
+ }
8
+
9
+ // move elements of the first column of `body' to the diagonal position
10
+ // set 0 to the original positions
11
+ static void
12
+ km_vec2diag(int m, int n, double *body, int ld)
13
+ {
14
+ int lm1 = MIN(m, n)-1, mp1=ld+1, i=1;
15
+ dcopy_(&lm1, body+1, &i, body+mp1, &mp1);
16
+ for ( ; i<=lm1; i++ ) { body[i] = 0.0; }
17
+ }
18
+
19
+ // restore the modification caused by `dgebal'
20
+ // it means, smat->body = D * AA / D
21
+ // on entry, AA is `smat' and D can be construct by scale, ilo and ihi
22
+ struct km_unbal_arg {
23
+ SMAT *smat;
24
+ double *scale;
25
+ int ilo, ihi;
26
+ int n;
27
+ LAWORK body;
28
+ double *at;
29
+ SMAT *sat;
30
+ };
31
+ static VALUE
32
+ km_unbal_body(VALUE data)
33
+ {
34
+ struct km_unbal_arg *a = (struct km_unbal_arg *)data;
35
+
36
+ int info;
37
+ char job[]="B", side[]="R";
38
+ KALLOC(a->at, (a->n)*(a->n));
39
+ KALLOCn(a->body, a->smat);
40
+ dgebak_(job, side, &(a->n), &(a->ilo), &(a->ihi), a->scale, &(a->n), a->body.d, &(a->body.ld), &info); // multiply D from the left
41
+ km_check_info(info, rb_eRuntimeError, "unexpected info value", "dgebak");
42
+ a->sat = km_smat_alloc_with(a->n, a->n, VT_DOUBLE, a->body.body);
43
+ a->sat->trans = true;
44
+ km_copy2work(a->at, a->n, a->sat); // take transpose
45
+ side[0] = 'L';
46
+ dgebak_(job, side, &(a->n), &(a->ilo), &(a->ihi), a->scale, &(a->n), a->at, &(a->n), &info); // multiply D^(-T) from the left
47
+ km_check_info(info, rb_eRuntimeError, "unexpected info value", "dgebak");
48
+ a->sat->body = a->at; km_copy2work(a->body.body, a->body.ld, a->sat); // take transpose again
49
+
50
+ return Qnil;
51
+ }
52
+ static VALUE
53
+ km_unbal_ensure(VALUE data)
54
+ {
55
+ struct km_unbal_arg *a = (struct km_unbal_arg *)data;
56
+
57
+ km_copy_and_free_if_needed(a->smat, &(a->body));
58
+ ruby_xfree(a->sat);
59
+ ruby_xfree(a->at);
60
+
61
+ return Qnil;
62
+ }
63
+ static void
64
+ km_unbal(SMAT *smat, double *scale, int ilo, int ihi)
65
+ {
66
+ struct km_unbal_arg a = {smat, scale, ilo, ihi, smat->m, {{NULL}, 0, false}, NULL, NULL};
67
+
68
+ km_ensure(km_unbal_body, (VALUE)(&a), km_unbal_ensure, (VALUE)(&a));
69
+ }
70
+
71
+ static void
72
+ km_check_finite_func_d(double *ent, int i, int j, void *null)
73
+ {
74
+ if ( !isfinite(*ent) ) {
75
+ rb_raise(km_eUncomp, "the matrix has an illegal (infinite or nan) element at (%d, %d)", i, j);
76
+ }
77
+ }
78
+ static inline void
79
+ km_check_finite(SMAT *smat)
80
+ {
81
+ km_smat_each_with_index_d(smat, km_check_finite_func_d, NULL);
82
+ }
83
+
84
+ // compute X which satisfies AX=B. `self' is output
85
+ struct km_dmat_solve_arg {
86
+ SMAT *sx;
87
+ LAWORK x;
88
+ SMAT *sa, *sb;
89
+ double *a, *af;
90
+ int *ipiv;
91
+ double *r, *c, *b;
92
+ double *ferr, *berr;
93
+ double *work;
94
+ int *iwork;
95
+ };
96
+ VALUE
97
+ km_dmat_solve_body(VALUE data)
98
+ {
99
+ struct km_dmat_solve_arg *a = (struct km_dmat_solve_arg *)data;
100
+
101
+ int n=a->sx->m, nrhs=a->sx->n;
102
+ km_check_size(4, a->sa->m,n, a->sa->n,n, a->sb->m,n, a->sb->n,nrhs);
103
+
104
+ int info;
105
+ char equed[2];
106
+ char fact[]="E";
107
+ char trans[]="N";
108
+ KALLOCc(a->a, a->sa);
109
+ KALLOC(a->af, n*n);
110
+ KALLOC(a->ipiv, n);
111
+ KALLOC(a->r, n);
112
+ KALLOC(a->c, n);
113
+ KALLOCc(a->b, a->sb);
114
+ KALLOC(a->ferr, nrhs);
115
+ KALLOC(a->berr, nrhs);
116
+ KALLOC(a->work, 4*n);
117
+ KALLOC(a->iwork, n);
118
+ KALLOCn(a->x, a->sx);
119
+ double rcond;
120
+
121
+ dgesvx_(fact, trans, &n, &nrhs, a->a, &n, a->af, &n, a->ipiv, equed, a->r, a->c, a->b, &n,
122
+ a->x.d, &(a->x.ld), &rcond, a->ferr, a->berr, a->work, a->iwork, &info);
123
+ km_check_info(info, km_eUncomp, "A is singular or near singular", "dgesvx");
124
+
125
+ return Qnil;
126
+ }
127
+ VALUE
128
+ km_dmat_solve_ensure(VALUE data)
129
+ {
130
+ struct km_dmat_solve_arg *a = (struct km_dmat_solve_arg *)data;
131
+
132
+ km_copy_and_free_if_needed(a->sx, &(a->x));
133
+ ruby_xfree(a->iwork);
134
+ ruby_xfree(a->work);
135
+ ruby_xfree(a->berr);
136
+ ruby_xfree(a->ferr);
137
+ ruby_xfree(a->b);
138
+ ruby_xfree(a->c);
139
+ ruby_xfree(a->r);
140
+ ruby_xfree(a->ipiv);
141
+ ruby_xfree(a->af);
142
+ ruby_xfree(a->a);
143
+
144
+ return Qnil;
145
+ }
146
+ VALUE
147
+ km_dmat_solve(VALUE self, VALUE va, VALUE vb)
148
+ {
149
+ struct km_dmat_solve_arg a; memset(&a, 0, sizeof(a));
150
+ a.sx = km_mat2smat(self);
151
+ a.sa = km_mat2smat(va);
152
+ a.sb = km_mat2smat(vb);
153
+ km_check_double(3, a.sx, a.sa, a.sb);
154
+ km_check_finite(a.sa); km_check_finite(a.sb);
155
+
156
+ km_ensure(km_dmat_solve_body, (VALUE)&a, km_dmat_solve_ensure, (VALUE)&a);
157
+
158
+ return self;
159
+ }
160
+
161
+ // compute the inverse matrix using Mat#solve
162
+ VALUE
163
+ km_dmat_inverse(VALUE self, VALUE va)
164
+ {
165
+ SMAT *sx = km_mat2smat(self), *sa = km_mat2smat(va);
166
+ km_check_double(2, sx, sa);
167
+ km_check_finite(sa);
168
+ int n = sa->m;
169
+ km_check_size(3, sx->m,n, sx->n,n, sa->n,n);
170
+ return km_dmat_solve(self, va, kmm_Mat_identity(km_cMat, INT2NUM(n)));
171
+ }
172
+
173
+ // compute X with the smallest norm which minimize ||AX-B||. `self' is output
174
+ struct km_ls_arg {
175
+ SMAT *sx, *sa, *sb;
176
+ double *a, *b, *s, *work;
177
+ int *iwork;
178
+ };
179
+ static VALUE
180
+ km_mat_ls_body(VALUE data)
181
+ {
182
+ struct km_ls_arg *a = (struct km_ls_arg *)data;
183
+ int m=a->sa->m, n=a->sa->n, nrhs=a->sx->n;
184
+ int ldb = MAX(m, n);
185
+ km_check_size(3, a->sx->m,n, a->sb->m,m, a->sb->n,nrhs);
186
+
187
+ int rank, lwork=-1, liwork, info;
188
+ double rcond=-1.0, opt;
189
+ dgelsd_(&m, &n, &nrhs, NULL, &m, NULL, &ldb, NULL, NULL, &rank, &opt, &lwork, &liwork, &info);
190
+ km_check_info_opt(info, "dgelsd");
191
+ lwork = (int)opt;
192
+ KALLOCc(a->a, a->sa);
193
+ KALLOC(a->b, ldb*nrhs); // don't use KALLOCc becaus ldb can be greater than sb->m
194
+ km_copy2work(a->b, ldb, a->sb);
195
+ KALLOC(a->s, MIN(m, n));
196
+ KALLOC(a->work, lwork);
197
+ KALLOC(a->iwork, liwork);
198
+ dgelsd_(&m, &n, &nrhs, a->a, &m, a->b, &ldb, a->s, &rcond, &rank, a->work, &lwork, a->iwork, &info);
199
+ km_check_info(info, rb_eRuntimeError, "compution the SVD faild to converge", "dgelsd");
200
+ km_copy_from_work(a->sx, a->b, ldb);
201
+
202
+ return Qnil;
203
+ }
204
+ static VALUE
205
+ km_mat_ls_ensure(VALUE data)
206
+ {
207
+ struct km_ls_arg *a = (struct km_ls_arg *)data;
208
+
209
+ ruby_xfree(a->iwork);
210
+ ruby_xfree(a->work);
211
+ ruby_xfree(a->s);
212
+ ruby_xfree(a->b);
213
+ ruby_xfree(a->a);
214
+
215
+ return Qnil;
216
+ }
217
+
218
+ VALUE
219
+ kmm_mat__ls(VALUE self, VALUE va, VALUE vb)
220
+ {
221
+ km_check_frozen(self);
222
+ struct km_ls_arg a; memset(&a, 0, sizeof(a));
223
+ a.sx = km_mat2smat(self); a.sa = km_mat2smat(va); a.sb = km_mat2smat(vb);
224
+ km_check_double(3, a.sx, a.sa, a.sb);
225
+ km_check_finite(a.sa); km_check_finite(a.sb);
226
+
227
+ km_ensure(km_mat_ls_body, (VALUE)&a, km_mat_ls_ensure, (VALUE)&a);
228
+
229
+ return self;
230
+ }
231
+
232
+ // compute X with the smallest norm which minimize ||A'X-B'||. `self' is output
233
+ static VALUE
234
+ km_mat_ls_wrap(VALUE data)
235
+ {
236
+ return kmm_mat__ls(rb_ary_entry(data, 0), rb_ary_entry(data, 1), rb_ary_entry(data, 2));
237
+ }
238
+ VALUE
239
+ kmm_mat_tls_destl(VALUE self, VALUE va, VALUE vb)
240
+ {
241
+ SMAT *sa = km_mat2smat(va), *sb = km_mat2smat(vb);
242
+ sa->trans = !(sa->trans); SWAP(int, sa->m, sa->n);
243
+ sb->trans = !(sb->trans); SWAP(int, sb->m, sb->n);
244
+ VALUE vab = rb_ary_new3(2, va, vb);
245
+ km_ensure(km_mat_ls_wrap, rb_ary_new3(3, self, va, vb), km_recover_trans, vab);
246
+ return self;
247
+ }
248
+ VALUE
249
+ kmm_mat_tls(VALUE va, VALUE vb)
250
+ {
251
+ SMAT *sa = km_mat2smat(va), *sb = km_mat2smat(vb);
252
+ return kmm_mat_tls_destl(km_Mat(sb->n, sa->n, VT_DOUBLE), va, vb);
253
+ }
254
+
255
+ // compute x which minimize ||A'(b-Ax)|| using the conjugate gradient method. `self' is output
256
+ struct km_ls_conj_arg {
257
+ SMAT *sx, *sa, *sb;
258
+ double tor;
259
+ double *r, *p, *ap, *aa;
260
+ LAWORK a, b, x;
261
+ };
262
+ static VALUE
263
+ km_mat_ls_conj_body(VALUE data)
264
+ {
265
+ struct km_ls_conj_arg *a = (struct km_ls_conj_arg *)data;
266
+
267
+ int m = a->sa->m, n = a->sa->n;
268
+ if ( m < n ) {
269
+ rb_raise(km_eDim, "A must be row-full-rank");
270
+ }
271
+ km_check_size(4, a->sx->m,n, a->sx->n,1, a->sb->m,m, a->sb->n,1);
272
+
273
+ const int ione=1; double dzero=0.0, done=1.0;
274
+ KALLOC(a->r, n);
275
+ KALLOC(a->p, n);
276
+ KALLOC(a->ap, n);
277
+ KALLOC(a->aa, n*n);
278
+ KALLOCn(a->a, a->sa);
279
+ KALLOCn(a->b, a->sb);
280
+ KALLOCz(a->x, a->sx);
281
+ dgemm_("T", "N", &n, &ione, &m, &done, a->a.d, &m, a->b.d, &m, &dzero, a->r, &n); // r = A'b
282
+ dgemm_("T", "N", &n, &n, &m, &done, a->a.d, &m, a->a.d, &m, &dzero, a->aa, &n); // aa = A'A
283
+
284
+ dcopy_(&n, a->r, &ione, a->p, &ione); // p = r
285
+ double rr = ddot_(&n, a->r, &ione, a->r, &ione); // rr = ||r||^2
286
+ double th = rr*(a->tor)*(a->tor);
287
+ if ( th < DBL_MIN ) {
288
+ th = DBL_MIN;
289
+ }
290
+ double rrp;
291
+ int i=0;
292
+ for (;;) {
293
+ dgemm_("N", "N", &n, &ione, &n, &done, a->aa, &n, a->p, &n, &dzero, a->ap, &n); // ap = Ap
294
+ double alp = rr/ddot_(&n, a->p, &ione, a->ap, &ione); // alp = ||r||^2/(p'Ap)
295
+ daxpy_(&n, &alp, a->p, &ione, a->x.d, &ione); // x += alp*p
296
+ alp = -alp;
297
+ daxpy_(&n, &alp, a->ap, &ione, a->r, &ione); // r -= alp*Ap
298
+ rrp = ddot_(&n, a->r, &ione, a->r, &ione); // rrp = ||r||^2
299
+ if ( rrp < th ) {
300
+ break;
301
+ } else if ( rr < rrp ) {
302
+ i++;
303
+ if ( n < i ) {
304
+ rb_raise(km_eUncomp, "A may not be positive definite");
305
+ }
306
+ }
307
+ double bet = rrp/rr;
308
+ for ( int j=0; j<n; j++ ) {
309
+ a->p[j] = bet*a->p[j]+a->r[j];
310
+ }
311
+ // the above is faster than the below
312
+ // dscal_(&n, &bet, a->p, &ione);
313
+ // daxpy_(&n, &done, a->r, &ione, a->p, &ione);
314
+ rr = rrp;
315
+ }
316
+
317
+ return Qnil;
318
+ }
319
+ static VALUE
320
+ km_mat_ls_conj_ensure(VALUE data)
321
+ {
322
+ struct km_ls_conj_arg *a = (struct km_ls_conj_arg *)data;
323
+
324
+ km_copy_and_free_if_needed(a->sx, &(a->x));
325
+ km_free_if_needed(&(a->b));
326
+ km_free_if_needed(&(a->a));
327
+ ruby_xfree(a->aa);
328
+ ruby_xfree(a->ap);
329
+ ruby_xfree(a->p);
330
+ ruby_xfree(a->r);
331
+
332
+ return Qnil;
333
+ }
334
+ VALUE
335
+ kmm_mat__ls_conj(VALUE self, VALUE va, VALUE vb, VALUE vtor)
336
+ {
337
+ km_check_frozen(self);
338
+ struct km_ls_conj_arg a; memset(&a, 0, sizeof(a));
339
+ a.sx = km_mat2smat(self); a.sa = km_mat2smat(va); a.sb = km_mat2smat(vb);
340
+ km_check_double(3, a.sx, a.sa, a.sb);
341
+ km_check_finite(a.sa); km_check_finite(a.sb);
342
+
343
+ a.tor = NUM2DBL(vtor);
344
+ km_ensure(km_mat_ls_conj_body, (VALUE)&a, km_mat_ls_conj_ensure, (VALUE)&a);
345
+
346
+ return self;
347
+ }
348
+
349
+
350
+ // compute the solution x of minimize_x ||y|| s.t. d=Ax+By. `self' is output
351
+ // self: m-vector
352
+ // A: (n, m)-matrix
353
+ // B: (n, p)-matrix
354
+ // d: n-vector
355
+ // if B is regular square matrix, it is equivalent to minimize_x ||B\(d-Ax)||
356
+ struct km_glm_arg {
357
+ SMAT *sx, *sa, *sb, *sd;
358
+ double *a, *b, *d, *y, *work;
359
+ LAWORK x;
360
+ };
361
+ static VALUE
362
+ km_mat_glm_body(VALUE data)
363
+ {
364
+ struct km_glm_arg *a = (struct km_glm_arg *)data;
365
+
366
+ int n=a->sa->m, m=a->sa->n, p=a->sb->n; // m, n are swapped from those of sa
367
+ if ( n < m || m+p < n ) {
368
+ rb_raise(km_eDim, "m <= n <= m+p must be satisfied for glm, given are (m, n, p) = (%d, %d, %d)", m, n, p);
369
+ }
370
+ km_check_size(5, a->sb->m,n, MIN(a->sx->m,a->sx->n),1, LENGTH(a->sx),m, MIN(a->sd->m, a->sd->n),1, LENGTH(a->sd),n);
371
+
372
+ double opt; int lwork=-1, info;
373
+ dggglm_(&n, &m, &p, NULL, &n, NULL, &n, NULL, NULL, NULL, &opt, &lwork, &info);
374
+ km_check_info_opt(info, "dggglm");
375
+ lwork = (int)opt;
376
+
377
+ KALLOCc(a->a, a->sa);
378
+ KALLOCc(a->b, a->sb);
379
+ KALLOCc(a->d, a->sd);
380
+ KALLOC(a->y, p);
381
+ KALLOC(a->work, lwork);
382
+ KALLOCn(a->x, a->sx);
383
+
384
+ dggglm_(&n, &m, &p, a->a, &n, a->b, &n, a->d, a->x.d, a->y, a->work, &lwork, &info);
385
+ km_check_info(info, km_eUncomp, "pair (A, B) is not full-rank", "dgglm");
386
+
387
+ return Qnil;
388
+ }
389
+ static VALUE
390
+ km_mat_glm_ensure(VALUE data)
391
+ {
392
+ struct km_glm_arg *a = (struct km_glm_arg *)data;
393
+
394
+ km_copy_and_free_if_needed(a->sx, &(a->x));
395
+ ruby_xfree(a->work);
396
+ ruby_xfree(a->y);
397
+ ruby_xfree(a->d);
398
+ ruby_xfree(a->b);
399
+ ruby_xfree(a->a);
400
+
401
+ return Qnil;
402
+ }
403
+ VALUE
404
+ kmm_mat_glm_destl(VALUE self, VALUE va, VALUE vb, VALUE vd)
405
+ {
406
+ km_check_frozen(self);
407
+ struct km_glm_arg a; memset(&a, 0, sizeof(a));
408
+ a.sx = km_mat2smat(self); a.sa = km_mat2smat(va); a.sb = km_mat2smat(vb); a.sd = km_mat2smat(vd);
409
+ km_check_double(4, a.sx, a.sa, a.sb, a.sd);
410
+ km_check_finite(a.sa); km_check_finite(a.sb); km_check_finite(a.sd);
411
+
412
+ km_ensure(km_mat_glm_body, (VALUE)&a, km_mat_glm_ensure, (VALUE)&a);
413
+
414
+ return self;
415
+ }
416
+ VALUE
417
+ kmm_mat_glm(VALUE va, VALUE vb, VALUE vd)
418
+ {
419
+ return kmm_mat_glm_destl(km_Mat(km_mat2smat(va)->n, 1, VT_DOUBLE), va, vb, vd);
420
+ }
421
+
422
+ // compute the eigenvalues of a symmetric matrix A. `self' is output
423
+ struct km_sym_ev_arg {
424
+ SMAT *sd, *sa;
425
+ double *a, *work;
426
+ int *iwork;
427
+ LAWORK w;
428
+ };
429
+ static VALUE
430
+ km_sym_ev_body(VALUE data)
431
+ {
432
+ struct km_sym_ev_arg *a = (struct km_sym_ev_arg *)data;
433
+
434
+ int n = LENGTH(a->sd);
435
+ km_check_size(3, MIN(a->sd->m, a->sd->n),1, a->sa->m,n, a->sa->n,n);
436
+
437
+ char cmach[] = "S";
438
+ double abstol = dlamch_(cmach);
439
+ int lwork=-1, liwork=-1;
440
+ double dopt; int m, iopt, info;
441
+ char jobz[]="N", range[] = "A", upto[] = "U";
442
+ dsyevr_(jobz, range, upto, &n, NULL, &n, NULL, NULL, NULL, NULL, &abstol, &m, NULL, NULL, &n, NULL, &dopt, &lwork, &iopt, &liwork, &info);
443
+ km_check_info_opt(info, "dsyevr");
444
+ lwork = (int)dopt; liwork = iopt;
445
+
446
+ KALLOCc(a->a, a->sa);
447
+ KALLOC(a->work, lwork);
448
+ KALLOC(a->iwork, liwork);
449
+ KALLOCn(a->w, a->sd);
450
+ dsyevr_(jobz, range, upto, &n, a->a, &n, NULL, NULL, NULL, NULL, &abstol, &m, a->w.d, NULL, &n, NULL, a->work, &lwork, a->iwork, &liwork, &info);
451
+ km_check_info(info, rb_eRuntimeError, "internal error occured while invoking dsyevr", "dsyevr");
452
+
453
+ return Qnil;
454
+ }
455
+ static VALUE
456
+ km_sym_ev_ensure(VALUE data)
457
+ {
458
+ struct km_sym_ev_arg *a = (struct km_sym_ev_arg *)data;
459
+
460
+ km_copy_and_free_if_needed(a->sd, &(a->w));
461
+ ruby_xfree(a->iwork);
462
+ ruby_xfree(a->work);
463
+ ruby_xfree(a->a);
464
+
465
+ return Qnil;
466
+ }
467
+ VALUE
468
+ kmm_mat_sym_eigen_values_destl(VALUE self, VALUE va)
469
+ {
470
+ km_check_frozen(self);
471
+ struct km_sym_ev_arg a; memset(&a, 0, sizeof(a));
472
+ a.sd = km_mat2smat(self); a.sa = km_mat2smat(va);
473
+ km_check_double(2, a.sd, a.sa);
474
+ km_check_finite(a.sa);
475
+
476
+ km_ensure(km_sym_ev_body, (VALUE)&a, km_sym_ev_ensure, (VALUE)&a);
477
+
478
+ return self;
479
+ }
480
+ VALUE
481
+ kmm_mat_sym_eigen_values(VALUE va)
482
+ {
483
+ return kmm_mat_sym_eigen_values_destl(km_Mat(km_mat2smat(va)->m, 1, VT_DOUBLE), va);
484
+ }
485
+
486
+ // invoke eigen-decomposition A=VDV' of a symmetric matrix A
487
+ // `self' is A and the arguments are outputs
488
+ struct km_sym_evd_arg {
489
+ SMAT *sa, *sv, *sd;
490
+ double *a, *work;
491
+ int *isuppz, *iwork;
492
+ LAWORK w, z;
493
+ };
494
+ static VALUE
495
+ km_sym_evd_body(VALUE data)
496
+ {
497
+ struct km_sym_evd_arg *a = (struct km_sym_evd_arg *)data;
498
+
499
+ int n = a->sa->m;
500
+ km_check_size(5, a->sa->n,n, a->sv->m,n, a->sv->n,n, a->sd->m,n, a->sd->n,n);
501
+
502
+ char cmach[] = "S";
503
+ double dopt, abstol = dlamch_(cmach);
504
+ int m, lwork=-1, liwork=-1, iopt, info;
505
+ char jobz[] = "V", range[] = "A", upto[] = "U";
506
+ dsyevr_(jobz, range, upto, &n, NULL, &n, NULL, NULL, NULL, NULL, &abstol, &m, NULL, NULL, &n, NULL, &dopt, &lwork, &iopt, &liwork, &info);
507
+ km_check_info_opt(info, "dsyevr");
508
+ lwork = (int)dopt; liwork = iopt;
509
+
510
+ KALLOCc(a->a, a->sa);
511
+ KALLOC(a->isuppz, 2*n);
512
+ KALLOC(a->work, lwork);
513
+ KALLOC(a->iwork, liwork);
514
+ KALLOCz(a->w, a->sd);
515
+ KALLOCn(a->z, a->sv);
516
+
517
+ dsyevr_(jobz, range, upto, &n, a->a, &n, NULL, NULL, NULL, NULL, &abstol, &m,
518
+ a->w.d, a->z.d, &(a->z.ld), a->isuppz, a->work, &lwork, a->iwork, &liwork, &info);
519
+ km_check_info(info, rb_eRuntimeError, "internal error occured while invoking dsyevr", "dsyevr");
520
+ km_vec2diag(n, n, a->w.d, a->w.ld);
521
+
522
+ return Qnil;
523
+ }
524
+ static VALUE
525
+ km_sym_evd_ensure(VALUE data)
526
+ {
527
+ struct km_sym_evd_arg *a = (struct km_sym_evd_arg *)data;
528
+
529
+ km_copy_and_free_if_needed(a->sv, &(a->z));
530
+ km_copy_and_free_if_needed(a->sd, &(a->w));
531
+ ruby_xfree(a->iwork);
532
+ ruby_xfree(a->work);
533
+ ruby_xfree(a->isuppz);
534
+ ruby_xfree(a->a);
535
+
536
+ return Qnil;
537
+ }
538
+ VALUE
539
+ kmm_mat_sym_evd_destl(VALUE self, VALUE vv, VALUE vd)
540
+ {
541
+ km_check_frozen(vv); km_check_frozen(vd);
542
+ struct km_sym_evd_arg a; memset(&a, 0, sizeof(a));
543
+ a.sa = km_mat2smat(self); a.sv = km_mat2smat(vv); a.sd = km_mat2smat(vd);
544
+ km_check_double(3, a.sa, a.sv, a.sd);
545
+ km_check_finite(a.sa);
546
+
547
+ km_ensure(km_sym_evd_body, (VALUE)&a, km_sym_evd_ensure, (VALUE)&a);
548
+
549
+ return Qnil;
550
+ }
551
+ VALUE
552
+ kmm_mat_sym_evd(VALUE self)
553
+ {
554
+ int n = km_mat2smat(self)->n;
555
+ VALUE vv = km_Mat(n, n, VT_DOUBLE);
556
+ VALUE vd = km_Mat(n, n, VT_DOUBLE);
557
+ kmm_mat_sym_evd_destl(self, vv, vd);
558
+ return rb_ary_new3(2, vv, vd);
559
+ }
560
+
561
+ // compute eigenvalues of a non-symmetric matrix A. the output `self' is a complex matrix
562
+ struct km_ge_eigen_values_arg {
563
+ SMAT *sd, *sa;
564
+ double *a, *wr, *wi, *scale, *work;
565
+ };
566
+ static void
567
+ km_ge_ev_cpack(COMPLEX *ent, int i, int j, void *data)
568
+ {
569
+ struct km_ge_eigen_values_arg *a = (struct km_ge_eigen_values_arg *)data;
570
+ *ent = cpack(a->wr[i+j], a->wr[i+j]); // i==0 for column-vector or j==0 for row-vector
571
+ }
572
+ static VALUE
573
+ km_ge_eigen_values_body(VALUE data)
574
+ {
575
+ struct km_ge_eigen_values_arg *a = (struct km_ge_eigen_values_arg *)data;
576
+
577
+ int n = LENGTH(a->sd);
578
+ km_check_size(3, MIN(a->sd->m, a->sd->n),1, a->sa->m,n, a->sa->n,n);
579
+
580
+ double opt;
581
+ int lwork=-1, ilo, ihi, info;
582
+ char balanc[]="B", jobvl[]="N", jobvr[]="N", sense[]="N";
583
+ dgeevx_(balanc, jobvl, jobvr, sense, &n, NULL, &n, NULL, NULL, NULL, &n, NULL, &n, &ilo, &ihi, NULL, NULL, NULL, NULL, &opt, &lwork, NULL, &info);
584
+ km_check_info_opt(info, "dgeevx");
585
+ lwork = (int)opt;
586
+
587
+ KALLOCc(a->a, a->sa);
588
+ KALLOC(a->wr, n);
589
+ KALLOC(a->wi, n);
590
+ KALLOC(a->scale, n);
591
+ KALLOC(a->work, lwork);
592
+
593
+ double abnrm;
594
+ dgeevx_(balanc, jobvl, jobvr, sense, &n, a->a, &n, a->wr, a->wi, NULL, &n, NULL, &n, &ilo, &ihi, a->scale, &abnrm, NULL, NULL, a->work, &lwork, NULL, &info);
595
+ km_check_info(info, rb_eRuntimeError, "the QR algorithm failed to compute all the eigenvalues", "dgeevx");
596
+ km_smat_each_with_index_z(a->sd, km_ge_ev_cpack, a);
597
+
598
+ return Qnil;
599
+ }
600
+ static VALUE
601
+ km_ge_eigen_values_ensure(VALUE data)
602
+ {
603
+ struct km_ge_eigen_values_arg *a = (struct km_ge_eigen_values_arg *)data;
604
+
605
+ ruby_xfree(a->work);
606
+ ruby_xfree(a->scale);
607
+ ruby_xfree(a->wi);
608
+ ruby_xfree(a->wr);
609
+ ruby_xfree(a->a);
610
+
611
+ return Qnil;
612
+ }
613
+ VALUE
614
+ kmm_mat_ge_eigen_values_destl(VALUE self, VALUE va)
615
+ {
616
+ km_check_frozen(self);
617
+ struct km_ge_eigen_values_arg a; memset(&a, 0, sizeof(a));
618
+ a.sd = km_mat2smat(self); a.sa = km_mat2smat(va);
619
+ km_check_complex(1, a.sd); km_check_double(1, a.sa);
620
+ km_check_finite(a.sa);
621
+
622
+ km_ensure(km_ge_eigen_values_body, (VALUE)&a, km_ge_eigen_values_ensure, (VALUE)&a);
623
+
624
+ return self;
625
+ }
626
+ VALUE
627
+ kmm_mat_ge_eigen_values(VALUE va)
628
+ {
629
+ return kmm_mat_ge_eigen_values_destl(km_Mat(km_mat2smat(va)->m, 1, VT_COMPLEX), va);
630
+ }
631
+
632
+ // compute a matrix consists of right-eigenvectors V and a diagonal matrix consists of right-eigenvalues D, AV=DV of a non-symmetric matrix A.
633
+ // the arguments are outputs
634
+ struct km_ge_evd_arg {
635
+ SMAT *sa, *sv, *sd;
636
+ double *a, *wr, *wi, *scale, *work;
637
+ LAWORK vr;
638
+ };
639
+ static VALUE
640
+ km_ge_evd_body(VALUE data)
641
+ {
642
+ struct km_ge_evd_arg *a = (struct km_ge_evd_arg *)data;
643
+
644
+ int n = a->sa->m;
645
+ km_check_size(5, a->sa->n,n, a->sv->m,n, a->sv->n,n, a->sd->m,n, a->sd->n,n);
646
+
647
+ double opt; int lwork=-1, ilo, ihi, info;
648
+ char balanc[]="B", jobvl[]="N", jobvr[]="V", sense[]="N";
649
+ dgeevx_(balanc, jobvl, jobvr, sense, &n, NULL, &n, NULL, NULL, NULL, &n, NULL, &n,
650
+ &ilo, &ihi, NULL, NULL, NULL, NULL, &opt, &lwork, NULL, &info);
651
+ km_check_info_opt(info, "dgeevx");
652
+ lwork = (int)opt;
653
+
654
+ KALLOCc(a->a, a->sa);
655
+ KALLOC(a->wr, n);
656
+ KALLOC(a->wi, n);
657
+ KALLOC(a->scale, n);
658
+ KALLOC(a->work, lwork);
659
+ KALLOCn(a->vr, a->sv);
660
+
661
+ double abnrm;
662
+ dgeevx_(balanc, jobvl, jobvr, sense, &n, a->a, &n, a->wr, a->wi, NULL, &n, a->vr.d, &n,
663
+ &ilo, &ihi, a->scale, &abnrm, NULL, NULL, a->work, &lwork, NULL, &info);
664
+ km_check_info(info, rb_eRuntimeError, "the QR algorithm failed to compute all the eigenvalues", "dgeevx");
665
+ if ( a->sd->stype == ST_RSUB ) {
666
+ for ( int i=0; i<n; i++ ) {
667
+ ENTITYr0(a->sd, z, i+i*(a->sd->ld)) = cpack(a->wr[i], a->wi[i]);
668
+ }
669
+ } else {
670
+ for ( int i=0; i<n; i++ ) {
671
+ ENTITYd0(a->sd, z, i+i*(a->sd->ld)) = cpack(a->wr[i], a->wi[i]);
672
+ }
673
+ }
674
+
675
+ return Qnil;
676
+ }
677
+ static VALUE
678
+ km_ge_evd_ensure(VALUE data)
679
+ {
680
+ struct km_ge_evd_arg *a = (struct km_ge_evd_arg *)data;
681
+
682
+ km_copy_and_free_if_needed(a->sv, &(a->vr));
683
+ ruby_xfree(a->work);
684
+ ruby_xfree(a->scale);
685
+ ruby_xfree(a->wi);
686
+ ruby_xfree(a->wr);
687
+ ruby_xfree(a->a);
688
+
689
+ return Qnil;
690
+ }
691
+ VALUE
692
+ kmm_mat_ge_evd_destl(VALUE self, VALUE vv, VALUE vd)
693
+ {
694
+ km_check_frozen(vv); km_check_frozen(vd);
695
+ struct km_ge_evd_arg a; memset(&a, 0, sizeof(a));
696
+ a.sa = km_mat2smat(self); a.sv = km_mat2smat(vv); a.sd = km_mat2smat(vd);
697
+ km_check_double(2, a.sa, a.sv); km_check_complex(1, a.sd);
698
+ km_check_finite(a.sa);
699
+
700
+ kmm_mat_zero(vd);
701
+ km_ensure(km_ge_evd_body, (VALUE)&a, km_ge_evd_ensure, (VALUE)&a);
702
+
703
+ return self;
704
+ }
705
+ VALUE
706
+ kmm_mat_ge_evd(VALUE self)
707
+ {
708
+ int n = km_mat2smat(self)->n;
709
+ VALUE vv = km_Mat(n, n, VT_DOUBLE);
710
+ VALUE vd = km_Mat(n, n, VT_COMPLEX);
711
+ kmm_mat_ge_evd_destl(self, vv, vd);
712
+ return rb_ary_new3(2, vv, vd);
713
+ }
714
+
715
+ // A の特異値を計算し,self に格納する
716
+ // compute singular values of A. `self' is output
717
+ struct km_singular_values_arg {
718
+ SMAT *ss, *sa;
719
+ double *a, *work;
720
+ int *iwork;
721
+ LAWORK s;
722
+ };
723
+ static VALUE
724
+ km_singular_values_body(VALUE data)
725
+ {
726
+ struct km_singular_values_arg *a = (struct km_singular_values_arg *)data;
727
+
728
+ int m = a->sa->m, n = a->sa->n;
729
+ km_check_size(2, LENGTH(a->ss),MIN(m, n), MIN(a->ss->m,a->ss->n),1);
730
+
731
+ double opt; int lwork=-1, info;
732
+ char jobz[] = "N";
733
+ dgesdd_(jobz, &m, &n, NULL, &m, NULL, NULL, &m, NULL, &n, &opt, &lwork, NULL, &info);
734
+ km_check_info_opt(info, "dgesdd");
735
+ lwork = (int)opt;
736
+
737
+ KALLOCc(a->a, a->sa);
738
+ KALLOC(a->work, lwork);
739
+ KALLOC(a->iwork, 8*MIN(m, n));
740
+ KALLOCn(a->s, a->ss);
741
+
742
+ dgesdd_(jobz, &m, &n, a->a, &m, a->s.d, NULL, &m, NULL, &n, a->work, &lwork, a->iwork, &info);
743
+ km_check_info(info, rb_eRuntimeError, "DBDSDC did not converge", "dgesdd");
744
+
745
+ return Qnil;
746
+ }
747
+ static VALUE
748
+ km_singular_values_ensure(VALUE data)
749
+ {
750
+ struct km_singular_values_arg *a = (struct km_singular_values_arg *)data;
751
+
752
+ km_copy_and_free_if_needed(a->ss, &(a->s));
753
+ ruby_xfree(a->iwork);
754
+ ruby_xfree(a->work);
755
+ ruby_xfree(a->a);
756
+
757
+ return Qnil;
758
+ }
759
+ VALUE
760
+ kmm_mat_singular_values_destl(VALUE self, VALUE va)
761
+ {
762
+ km_check_frozen(self);
763
+ struct km_singular_values_arg a; memset(&a, 0, sizeof(a));
764
+ a.ss = km_mat2smat(self); a.sa = km_mat2smat(va);
765
+ km_check_double(2, a.ss, a.sa);
766
+ km_check_finite(a.sa);
767
+
768
+ km_ensure(km_singular_values_body, (VALUE)&a, km_singular_values_ensure, (VALUE)&a);
769
+
770
+ return self;
771
+ }
772
+ VALUE
773
+ kmm_mat_singular_values(VALUE va)
774
+ {
775
+ SMAT *sa = km_mat2smat(va);
776
+ return kmm_mat_singular_values_destl(km_Mat(MIN(sa->m, sa->n), 1, VT_DOUBLE), va);
777
+ }
778
+
779
+ // invoke singular value decomposition A=USV'
780
+ // `self' is A and the arguments are the outputs U, S and V
781
+ struct km_svd_arg {
782
+ SMAT *sa, *ss, *su, *sv;
783
+ double *a, *work;
784
+ int *iwork;
785
+ LAWORK s, u, vt;
786
+ };
787
+ static VALUE
788
+ km_svd_body(VALUE data)
789
+ {
790
+ struct km_svd_arg *a = (struct km_svd_arg *)data;
791
+
792
+ int m = a->sa->m, n = a->sa->n;
793
+ km_check_size(6, a->su->m,m, a->su->n,m, a->ss->m,m, a->ss->n,n, a->sv->m,n, a->sv->n,n);
794
+
795
+ double opt; int lwork=-1, info;
796
+ char jobz[] = "A";
797
+ dgesdd_(jobz, &m, &n, NULL, &m, NULL, NULL, &m, NULL, &n, &opt, &lwork, NULL, &info);
798
+ km_check_info_opt(info, "dgesdd");
799
+ lwork = (int)opt;
800
+
801
+ KALLOCc(a->a, a->sa);
802
+ KALLOC(a->work, lwork);
803
+ KALLOC(a->iwork, 8*MIN(m, n));
804
+ KALLOCz(a->s, a->ss);
805
+ KALLOCn(a->u, a->su);
806
+ KALLOCn(a->vt, a->sv);
807
+
808
+ dgesdd_(jobz, &m, &n, a->a, &m, a->s.d, a->u.d, &(a->u.ld), a->vt.d, &(a->vt.ld), a->work, &lwork, a->iwork, &info);
809
+ km_check_info(info, rb_eRuntimeError, "DBDSDC did not converge", "dgesvd");
810
+ km_vec2diag(m, n, a->s.d, a->s.ld);
811
+
812
+ return Qnil;
813
+ }
814
+ static VALUE
815
+ km_svd_ensure(VALUE data)
816
+ {
817
+ struct km_svd_arg *a = (struct km_svd_arg *)data;
818
+
819
+ km_copy_and_free_if_needed(a->sv, &(a->vt));
820
+ a->sv->trans = !(a->sv->trans);
821
+ km_copy_and_free_if_needed(a->su, &(a->u));
822
+ km_copy_and_free_if_needed(a->ss, &(a->s));
823
+ ruby_xfree(a->iwork);
824
+ ruby_xfree(a->work);
825
+ ruby_xfree(a->a);
826
+
827
+ return Qnil;
828
+ }
829
+ VALUE
830
+ kmm_mat_svd_destl(VALUE self, VALUE vu, VALUE vs, VALUE vv)
831
+ {
832
+ km_check_frozen(vu); km_check_frozen(vs); km_check_frozen(vv);
833
+ struct km_svd_arg a; memset(&a, 0, sizeof(a));
834
+ a.sa = km_mat2smat(self); a.su = km_mat2smat(vu); a.ss = km_mat2smat(vs); a.sv = km_mat2smat(vv);
835
+ km_check_double(4, a.sa, a.su, a.ss, a.sv);
836
+ km_check_finite(a.sa);
837
+
838
+ km_ensure(km_svd_body, (VALUE)&a, km_svd_ensure, (VALUE)&a);
839
+
840
+ return self;
841
+ }
842
+ VALUE
843
+ kmm_mat_svd(VALUE self)
844
+ {
845
+ SMAT *sa = km_mat2smat(self);
846
+ int m = sa->m, n = sa->n;
847
+ VALUE vu = km_Mat(m, m, VT_DOUBLE);
848
+ VALUE vs = km_Mat(m, n, VT_DOUBLE);
849
+ VALUE vv = km_Mat(n, n, VT_DOUBLE);
850
+ kmm_mat_svd_destl(self, vu, vs, vv);
851
+ return rb_ary_new3(3, vu, vs, vv);
852
+ }
853
+
854
+ // symmetrize keeping AA' identical, where `self' is A.
855
+ // this is using singular vale decompsition A=USV' and returns USU'.
856
+ struct km_svd_symmetrize_arg {
857
+ SMAT *sa;
858
+ double *work, *u, *s, *vt;
859
+ int *iwork;
860
+ LAWORK a;
861
+ };
862
+ VALUE km_svd_symmetrize_body(VALUE data)
863
+ {
864
+ struct km_svd_symmetrize_arg *a = (struct km_svd_symmetrize_arg *)data;
865
+
866
+ int n = a->sa->m;
867
+ km_check_size(1, a->sa->n,n);
868
+
869
+ double opt; int lwork=-1, info;
870
+ char jobz[] = "A";
871
+ dgesdd_(jobz, &n, &n, NULL, &n, NULL, NULL, &n, NULL, &n, &opt, &lwork, NULL, &info);
872
+ km_check_info_opt(info, "dgesdd");
873
+ lwork = (int)opt;
874
+
875
+ KALLOCn(a->a, a->sa);
876
+ KALLOC(a->work, lwork);
877
+ KALLOC(a->iwork, 8*n);
878
+ KALLOC(a->s, n);
879
+ KALLOC(a->u, n*n);
880
+ KALLOC(a->vt, n*n);
881
+
882
+ dgesdd_(jobz, &n, &n, a->a.d, &(a->a.ld), a->s, a->u, &n, a->vt, &n, a->work, &lwork, a->iwork, &info);
883
+ km_check_info(info, rb_eRuntimeError, "DBDSDC did not converge", "dgesvd");
884
+
885
+ int one=1;
886
+ memset(a->vt, 0, sizeof(double)*(size_t)(n*n));
887
+ for (int i=0; i<n; i++) {
888
+ daxpy_(&n, a->s+i, a->u+i*n, &one, a->vt+i*n, &one);
889
+ }
890
+ char ta[] = "N";
891
+ char tb[] = "T";
892
+ double alpha = 1.0, beta=0.0;
893
+ dgemm_(ta, tb, &n, &n, &n, &alpha, a->vt, &n, a->u, &n, &beta, a->a.d, &(a->a.ld));
894
+
895
+ return Qnil;
896
+ }
897
+ VALUE km_svd_symmetrize_ensure(VALUE data)
898
+ {
899
+ struct km_svd_symmetrize_arg *a = (struct km_svd_symmetrize_arg *)data;
900
+
901
+ ruby_xfree(a->vt);
902
+ ruby_xfree(a->u);
903
+ ruby_xfree(a->s);
904
+ ruby_xfree(a->iwork);
905
+ ruby_xfree(a->work);
906
+ km_copy_and_free_if_needed(a->sa, &(a->a));
907
+
908
+ return Qnil;
909
+ }
910
+ VALUE
911
+ kmm_mat_svd_symmetrize_dest(VALUE self)
912
+ {
913
+ km_check_frozen(self);
914
+ struct km_svd_symmetrize_arg a;
915
+ a.sa = km_mat2smat(self);
916
+ km_check_double(1, a.sa);
917
+ km_check_finite(a.sa);
918
+
919
+ km_ensure(km_svd_symmetrize_body, (VALUE)&a, km_svd_symmetrize_ensure, (VALUE)&a);
920
+
921
+ return kmm_mat_symmetrize_dest(self);
922
+ }
923
+
924
+ // invoke a LU decomposition A=LU. `self' is A
925
+ // L is a permutated lower triangular matrix and U is a upper triangular matrix
926
+ struct km_lu_arg {
927
+ SMAT *sa, *spl, *su;
928
+ double *a;
929
+ int *ipiv, *perm;
930
+ LAWORK pl, u;
931
+ };
932
+ static VALUE
933
+ km_lu_body(VALUE data)
934
+ {
935
+ struct km_lu_arg *a = (struct km_lu_arg *)data;
936
+
937
+ int m = a->sa->m, n = a->sa->n;
938
+ int k = MIN(m, n);
939
+ km_check_size(4, a->spl->m,m, a->spl->n,k, a->su->m,k, a->su->n,n);
940
+
941
+ KALLOCc(a->a, a->sa);
942
+ KALLOC(a->ipiv, k);
943
+ KALLOC(a->perm, m);
944
+ KALLOCn(a->pl, a->spl);
945
+ KALLOCn(a->u, a->su);
946
+ for (int i=0; i<m; i++ ) {
947
+ (a->perm)[i] = i;
948
+ }
949
+
950
+ int info;
951
+ dgetrf_(&m, &n, a->a, &m, a->ipiv, &info);
952
+ km_check_info(info, Qnil, NULL, "dgetrf");
953
+ for ( int i=0; i<k; i++ ) {
954
+ int s = (a->ipiv)[i]-1;
955
+ if ( s != i ) {
956
+ SWAP(int, (a->perm)[i], (a->perm)[s]);
957
+ }
958
+ }
959
+ for ( int i=0; i<k; i++ ) {
960
+ int s = (a->perm)[i];
961
+ dcopy_(&i, (a->a)+i, &m, (a->pl.d)+s, &(a->pl.ld));
962
+ (a->pl.d)[s+i*(a->pl.ld)] = 1.0;
963
+ int izero = 0, len = k-i-1; double dzero = 0.0;
964
+ dcopy_(&len, &dzero, &izero, (a->pl.d)+(s+(i+1)*(a->pl.ld)), &(a->pl.ld));
965
+ dcopy_(&i, &dzero, &izero, (a->u.d)+i, &(a->u.ld));
966
+ len = n-i;
967
+ dcopy_(&len, (a->a)+(i+i*m), &m, (a->u.d)+(i+i*(a->u.ld)), &(a->u.ld));
968
+ }
969
+ for ( int i=m; i<k; i++ ) {
970
+ dcopy_(&n, a->a+i, &m, (a->pl.d)+(a->perm)[i], &(a->pl.ld));
971
+ }
972
+
973
+ return Qnil;
974
+ }
975
+ static VALUE
976
+ km_lu_ensure(VALUE data)
977
+ {
978
+ struct km_lu_arg *a = (struct km_lu_arg *)data;
979
+
980
+ km_copy_and_free_if_needed(a->su, &(a->u));
981
+ km_copy_and_free_if_needed(a->spl, &(a->pl));
982
+ ruby_xfree(a->perm);
983
+ ruby_xfree(a->ipiv);
984
+ ruby_xfree(a->a);
985
+
986
+ return Qnil;
987
+ }
988
+ VALUE
989
+ kmm_mat_lu_destl(VALUE self, VALUE vpl, VALUE vu)
990
+ {
991
+ km_check_frozen(vpl); km_check_frozen(vu);
992
+ struct km_lu_arg a; memset(&a, 0, sizeof(a));
993
+ a.sa = km_mat2smat(self); a.spl = km_mat2smat(vpl); a.su = km_mat2smat(vu);
994
+ km_check_double(3, a.sa, a.spl, a.su);
995
+ km_check_finite(a.sa);
996
+
997
+ km_ensure(km_lu_body, (VALUE)&a, km_lu_ensure, (VALUE)&a);
998
+
999
+ return Qnil;
1000
+ }
1001
+ VALUE
1002
+ kmm_mat_lu(VALUE self)
1003
+ {
1004
+ SMAT *sa = km_mat2smat(self);
1005
+ int m = sa->m, n = sa->n;
1006
+ int k = MIN(m, n);
1007
+ VALUE vpl = km_Mat(m, k, VT_DOUBLE);
1008
+ VALUE vu = km_Mat(k, n, VT_DOUBLE);
1009
+ kmm_mat_lu_destl(self, vpl, vu);
1010
+ return rb_ary_new3(2, vpl, vu);
1011
+ }
1012
+
1013
+ // invoke a LUP decomposition PA=LU. `self' is A
1014
+ // L is a lower triangular matrix, U is a upper triangular matrix and P is a permutation matrix
1015
+ struct km_lup_arg {
1016
+ SMAT *sa, *sl, *su, *sp;
1017
+ double *a;
1018
+ int *ipiv, *perm;
1019
+ LAWORK p, l, u;
1020
+ };
1021
+ static VALUE
1022
+ km_lup_body(VALUE data)
1023
+ {
1024
+ struct km_lup_arg *a = (struct km_lup_arg *)data;
1025
+
1026
+ int m = a->sa->m, n = a->sa->n;
1027
+ int k = MIN(m, n);
1028
+ km_check_size(6, a->sl->m,m, a->sl->n,k, a->su->m,k, a->su->n,n, a->sp->m,m, a->sp->n,m);
1029
+
1030
+ KALLOCc(a->a, a->sa);
1031
+ KALLOC(a->ipiv, k);
1032
+ KALLOC(a->perm, m);
1033
+ KALLOCz(a->p, a->sp);
1034
+ KALLOCn(a->l, a->sl);
1035
+ KALLOCn(a->u, a->su);
1036
+ for ( int i=0; i<m; i++ ) {
1037
+ (a->perm)[i] = i;
1038
+ }
1039
+
1040
+ int info;
1041
+ dgetrf_(&m, &n, a->a, &m, a->ipiv, &info);
1042
+ km_check_info(info, Qnil, NULL, "dgetrf");
1043
+ for ( int i=0; i<k; i++ ) {
1044
+ int s = (a->ipiv)[i]-1;
1045
+ if ( s != i ) {
1046
+ SWAP(int, (a->perm)[i], (a->perm)[s]);
1047
+ }
1048
+ }
1049
+ for ( int i=0; i<k; i++ ) {
1050
+ (a->p.d)[i+(a->perm)[i]*(a->p.ld)] = 1.0;
1051
+ dcopy_(&i, (a->a)+i, &m, (a->l.d)+i, &(a->l.ld));
1052
+ (a->l.d)[i+i*(a->l.ld)] = 1.0;
1053
+ int len = k-i-1, izero = 0; double dzero = 0.0;
1054
+ dcopy_(&len, &dzero, &izero, (a->l.d)+(i+(i+1)*(a->l.ld)), &(a->l.ld));
1055
+ dcopy_(&i, &dzero, &izero, (a->u.d)+i, &(a->u.ld));
1056
+ len = n-i;
1057
+ dcopy_(&len, (a->a)+(i+i*m), &m, (a->u.d)+(i+i*(a->u.ld)), &(a->u.ld));
1058
+ }
1059
+ for ( int i=m; i<k; i++ ) {
1060
+ (a->p.d)[i+(a->perm)[i]*(a->p.ld)] = 1.0;
1061
+ dcopy_(&n, (a->a)+i, &m, (a->l.d)+i, &(a->l.ld));
1062
+ }
1063
+
1064
+ return Qnil;
1065
+ }
1066
+ static VALUE
1067
+ km_lup_ensure(VALUE data)
1068
+ {
1069
+ struct km_lup_arg *a = (struct km_lup_arg *)data;
1070
+
1071
+ km_copy_and_free_if_needed(a->su, &(a->u));
1072
+ km_copy_and_free_if_needed(a->sl, &(a->l));
1073
+ km_copy_and_free_if_needed(a->sp, &(a->p));
1074
+ ruby_xfree(a->perm);
1075
+ ruby_xfree(a->ipiv);
1076
+ ruby_xfree(a->a);
1077
+
1078
+ return Qnil;
1079
+ }
1080
+ VALUE
1081
+ kmm_mat_lup_destl(VALUE self, VALUE vl, VALUE vu, VALUE vp)
1082
+ {
1083
+ km_check_frozen(vl); km_check_frozen(vu); km_check_frozen(vp);
1084
+ struct km_lup_arg a; memset(&a, 0, sizeof(a));
1085
+ a.sa = km_mat2smat(self); a.sl = km_mat2smat(vl);
1086
+ a.su = km_mat2smat(vu); a.sp = km_mat2smat(vp);
1087
+ km_check_double(4, a.sa, a.sl, a.su, a.sp);
1088
+ km_check_finite(a.sa);
1089
+
1090
+ km_ensure(km_lup_body, (VALUE)&a, km_lup_ensure, (VALUE)&a);
1091
+
1092
+ return Qnil;
1093
+ }
1094
+ VALUE
1095
+ kmm_mat_lup(VALUE self)
1096
+ {
1097
+ SMAT *sa = km_mat2smat(self);
1098
+ int m = sa->m, n = sa->n; int k = MIN(m, n);
1099
+ VALUE vl = km_Mat(m, k, VT_DOUBLE);
1100
+ VALUE vu = km_Mat(k, n, VT_DOUBLE);
1101
+ VALUE vp = km_Mat(m, m, VT_DOUBLE);
1102
+ kmm_mat_lup_destl(self, vl, vu, vp);
1103
+ return rb_ary_new3(3, vl, vu, vp);
1104
+ }
1105
+
1106
+ // compute the determinant via LU decomposition
1107
+ struct km_det_arg {
1108
+ SMAT *sa;
1109
+ double *a;
1110
+ int *ipiv;
1111
+ };
1112
+ static VALUE
1113
+ km_det_body(VALUE data)
1114
+ {
1115
+ struct km_det_arg *a = (struct km_det_arg *)data;
1116
+
1117
+ int n = a->sa->m;
1118
+ km_check_size(1, a->sa->n,n);
1119
+ if ( n == 0 ) { return rb_float_new(1.0); }
1120
+
1121
+ KALLOCc(a->a, a->sa);
1122
+ KALLOC(a->ipiv, n);
1123
+ int info;
1124
+ dgetrf_(&n, &n, a->a, &n, a->ipiv, &info);
1125
+ km_check_info(info, Qnil, NULL, "dgetrf");
1126
+ bool neg = ((a->ipiv)[0] != 1);
1127
+ for ( int i=1; i<n; i++ ) {
1128
+ (a->a)[0] *= (a->a)[i+i*n];
1129
+ if ( (a->ipiv)[i]-1 != i ) {
1130
+ neg = !neg;
1131
+ }
1132
+ }
1133
+
1134
+ return rb_float_new( neg ? -(a->a)[0] : (a->a)[0] );
1135
+ }
1136
+ static VALUE
1137
+ km_det_ensure(VALUE data)
1138
+ {
1139
+ struct km_det_arg *a = (struct km_det_arg *)data;
1140
+
1141
+ ruby_xfree(a->ipiv);
1142
+ ruby_xfree(a->a);
1143
+
1144
+ return Qnil;
1145
+ }
1146
+ VALUE
1147
+ kmm_mat_det(VALUE self)
1148
+ {
1149
+ struct km_det_arg a; memset(&a, 0, sizeof(a));
1150
+ a.sa = km_mat2smat(self);
1151
+ km_check_double(1, a.sa);
1152
+ km_check_finite(a.sa);
1153
+
1154
+ return km_ensure(km_det_body, (VALUE)&a, km_det_ensure, (VALUE)&a);
1155
+ }
1156
+
1157
+ // invoke a QR decomposition A=QR. the arguments are outputs
1158
+ struct km_qr_arg {
1159
+ SMAT *sa, *sq, *sr;
1160
+ double *a, *tau, *work;
1161
+ LAWORK q, r;
1162
+ };
1163
+ static VALUE
1164
+ km_qr_body(VALUE data)
1165
+ {
1166
+ struct km_qr_arg *a = (struct km_qr_arg *)data;
1167
+
1168
+ int m = a->sa->m, n = a->sa->n;
1169
+ int k = MIN(m, n);
1170
+ km_check_size(4, a->sq->m,m, a->sq->n,m, a->sr->m,m, a->sr->n,n);
1171
+
1172
+ double opt; int lwork=-1, info;
1173
+ dgeqrf_(&m, &n, NULL, &m, NULL, &opt, &lwork, &info);
1174
+ km_check_info_opt(info, "dgeqrf");
1175
+ lwork = (int)opt;
1176
+ KALLOCc(a->a, a->sa);
1177
+ KALLOC(a->tau, k);
1178
+ KALLOC(a->work, lwork);
1179
+ dgeqrf_(&m, &n, a->a, &m, a->tau, a->work, &lwork, &info);
1180
+ km_check_info(info, rb_eRuntimeError, "unexpected info value", "dgeqrf");
1181
+ ruby_xfree(a->work);
1182
+
1183
+ lwork = -1;
1184
+ dorgqr_(&m, &m, &k, NULL, &m, NULL, &opt, &lwork, &info); // the second argument N is &m
1185
+ km_check_info_opt(info, "dorgqr");
1186
+ lwork = (int)opt;
1187
+ KALLOC(a->work, lwork);
1188
+ KALLOCn(a->q, a->sq);
1189
+ KALLOCn(a->r, a->sr);
1190
+ for ( int i=m; i<k; i++ ) { // 0 clear (i>=k)-th rows of r and (i>=k)-th columns of q
1191
+ int izero=0; double dzero=0.0;
1192
+ dcopy_(&n, &dzero, &izero, (a->r.d)+i, &(a->r.ld));
1193
+ int ione=1;
1194
+ dcopy_(&m, &dzero, &izero, (a->q.d)+i*(a->q.ld), &(ione));
1195
+ }
1196
+ for ( int i=0; i<k; i++ ) { // copy the results of `dgeqrf' to q.d, rd, construct R and prepare to call `dorgqr'
1197
+ int izero=0; double dzero=0.0;
1198
+ dcopy_(&i, &dzero, &izero, (a->r.d)+i, &(a->r.ld));
1199
+ int l = n-i;
1200
+ dcopy_(&l, (a->a)+(i+i*m), &m, (a->r.d)+(i+i*(a->r.ld)), &(a->r).ld);
1201
+ int ione = 1;
1202
+ dcopy_(&i, &dzero, &izero, (a->q.d)+(i*(a->q.ld)), &ione);
1203
+ (a->q.d)[i+i*(a->q.ld)] = 1.0;
1204
+ l = m-i-1;
1205
+ dcopy_(&l, (a->a)+(i+i*m+1), &ione, (a->q.d)+(i+i*(a->q.ld)+1), &ione);
1206
+ }
1207
+ dorgqr_(&m, &m, &k, a->q.d, &(a->q.ld), a->tau, a->work, &lwork, &info);
1208
+ km_check_info(info, rb_eRuntimeError, "unexpected info value", "dorgqr");
1209
+
1210
+ return Qnil;
1211
+ }
1212
+ static VALUE
1213
+ km_qr_ensure(VALUE data)
1214
+ {
1215
+ struct km_qr_arg *a = (struct km_qr_arg *)data;
1216
+
1217
+ km_copy_and_free_if_needed(a->sr, &(a->r));
1218
+ km_copy_and_free_if_needed(a->sq, &(a->q));
1219
+ ruby_xfree(a->work);
1220
+ ruby_xfree(a->tau);
1221
+ ruby_xfree(a->a);
1222
+
1223
+ return Qnil;
1224
+ }
1225
+ VALUE
1226
+ kmm_mat_qr_destl(VALUE self, VALUE vq, VALUE vr)
1227
+ {
1228
+ km_check_frozen(vq); km_check_frozen(vr);
1229
+ struct km_qr_arg a; memset(&a, 0, sizeof(a));
1230
+ a.sa = km_mat2smat(self); a.sq = km_mat2smat(vq), a.sr = km_mat2smat(vr);
1231
+ km_check_double(3, a.sa, a.sq, a.sr);
1232
+ km_check_finite(a.sa);
1233
+
1234
+ km_ensure(km_qr_body, (VALUE)&a, km_qr_ensure, (VALUE)&a);
1235
+
1236
+ return Qnil;
1237
+ }
1238
+ VALUE
1239
+ kmm_mat_qr(VALUE self)
1240
+ {
1241
+ SMAT *sa = km_mat2smat(self);
1242
+ VALUE vq = km_Mat(sa->m, sa->m, VT_DOUBLE);
1243
+ VALUE vr = km_Mat(sa->m, sa->n, VT_DOUBLE);
1244
+ kmm_mat_qr_destl(self, vq, vr);
1245
+ return rb_ary_new3(2, vq, vr);
1246
+ }
1247
+
1248
+ // construct random orthogonal matrix via QR decomposition
1249
+ struct km_rand_orth_arg {
1250
+ SMAT *smat;
1251
+ VALUE random;
1252
+ double *tau, *work;
1253
+ int lwork;
1254
+ LAWORK a;
1255
+ };
1256
+ static VALUE
1257
+ km_rand_orth_body(VALUE data)
1258
+ {
1259
+ struct km_rand_orth_arg *a = (struct km_rand_orth_arg *)data;
1260
+
1261
+ int n = a->smat->m;
1262
+ km_check_size(1, a->smat->n,n);
1263
+
1264
+ double opt; int lwork=-1, info;
1265
+ dgeqrf_(&n, &n, NULL, &n, NULL, &opt, &lwork, &info);
1266
+ km_check_info_opt(info, "dgeqrf");
1267
+ lwork = (int)opt;
1268
+
1269
+ KALLOC(a->tau, n);
1270
+ KALLOC(a->work, lwork);
1271
+
1272
+ km_fill_normal(n*n, a->a.d, a->random);
1273
+ dgeqrf_(&n, &n, a->a.d, &n, a->tau, a->work, &lwork, &info);
1274
+ km_check_info(info, rb_eRuntimeError, "unexpected info value", "dgeqrf");
1275
+ ruby_xfree(a->work);
1276
+
1277
+ lwork = -1;
1278
+ dorgqr_(&n, &n, &n, NULL, &n, NULL, &opt, &lwork, &info);
1279
+ km_check_info_opt(info, "dorgqr");
1280
+ lwork = (int)opt;
1281
+
1282
+ KALLOC(a->work, lwork);
1283
+ KALLOCn(a->a, a->smat);
1284
+
1285
+ for (int i=0; i<n; i++ ) { // clear R to prepare to call `dorgqr'
1286
+ int l = n-i; int izero = 0; double dzero = 0.0; int x=i+i*(a->a.ld);
1287
+ dcopy_(&l, &dzero, &izero, (a->a.d)+x, &(a->a.ld));
1288
+ (a->a.d)[x] = 1.0;
1289
+ }
1290
+ dorgqr_(&n, &n, &n, a->a.d, &(a->a.ld), a->tau, a->work, &lwork, &info);
1291
+ km_check_info(info, rb_eRuntimeError, "unexpected info value", "dorgqr");
1292
+ // multiply by scalar -1 with probability 1/2
1293
+ if ( rb_funcall(a->random, id_rand, 1, INT2NUM(2)) == INT2NUM(0) ) {
1294
+ int l = n*n, ione=1; double m1=-1.0;
1295
+ dscal_(&l, &m1, a->a.d, &ione);
1296
+ }
1297
+
1298
+ return Qnil;
1299
+ }
1300
+ static VALUE
1301
+ km_rand_orth_ensure(VALUE data)
1302
+ {
1303
+ struct km_rand_orth_arg *a = (struct km_rand_orth_arg *)data;
1304
+
1305
+ km_copy_and_free_if_needed(a->smat, &(a->a));
1306
+ ruby_xfree(a->work);
1307
+ ruby_xfree(a->tau);
1308
+
1309
+ return Qnil;
1310
+ }
1311
+ VALUE
1312
+ kmm_mat__rand_orth(VALUE self, VALUE random)
1313
+ {
1314
+ km_check_frozen(self);
1315
+ struct km_rand_orth_arg a; memset(&a, 0, sizeof(a));
1316
+ a.smat = km_mat2smat(self);
1317
+ km_check_double(1, a.smat);
1318
+
1319
+ a.random = random;
1320
+ km_ensure(km_rand_orth_body, (VALUE)&a, km_rand_orth_ensure, (VALUE)&a);
1321
+
1322
+ return self;
1323
+ }
1324
+
1325
+ // ノルムのバランスを取った AA = DD \ A * DD となる AA および DD を計算し,それぞれ引数に格納する
1326
+ // compute norm balanced AA and factor DD which satisfy AA = DD \ A * DD
1327
+ // `self' is the input A and arguments are the outputs
1328
+ struct km_balance_arg {
1329
+ SMAT *sa, *sd, *saa;
1330
+ double *scale;
1331
+ int *perm;
1332
+ LAWORK a, d;
1333
+ };
1334
+ static VALUE
1335
+ km_balance_body(VALUE data)
1336
+ {
1337
+ struct km_balance_arg *a = (struct km_balance_arg *)data;
1338
+
1339
+ int n = a->sa->m;
1340
+ km_check_size(5, a->sa->n,n, a->sd->m,n, a->sd->n,n, a->saa->m,n, a->saa->n,n);
1341
+
1342
+ KALLOC(a->scale, n);
1343
+ KALLOC(a->perm, n);
1344
+ KALLOCn(a->a, a->saa);
1345
+ KALLOCz(a->d, a->sd);
1346
+ for ( int i=0; i<n; i++ ) {
1347
+ (a->perm)[i] = i;
1348
+ }
1349
+
1350
+ int ilo, ihi, info;
1351
+ char job[] = "B";
1352
+ dgebal_(job, &n, a->a.d, &(a->a.ld), &ilo, &ihi, a->scale, &info);
1353
+ km_check_info(info, rb_eRuntimeError, "unexpected info value", "dgebal");
1354
+ for ( int i=ilo-2; 0 <= i; i-- ) {
1355
+ SWAP(int, (a->perm)[i], (a->perm)[(int)((a->scale)[i])-1]);
1356
+ }
1357
+ for ( int i=n-1; ihi <= i; i-- ) {
1358
+ SWAP(int, (a->perm)[i], (a->perm)[(int)((a->scale)[i])-1]);
1359
+ }
1360
+ for ( int i=0; i<n; i++ ) {
1361
+ if ( i < ilo-1 || ihi <= i ) { // permutation
1362
+ (a->d.d)[(a->perm)[i]+i*(a->d.ld)] = 1.0;
1363
+ } else {
1364
+ (a->d.d)[(a->perm)[i]+i*(a->d.ld)] = (a->scale)[i];
1365
+ }
1366
+ }
1367
+
1368
+ return Qnil;
1369
+ }
1370
+ static VALUE
1371
+ km_balance_ensure(VALUE data)
1372
+ {
1373
+ struct km_balance_arg *a = (struct km_balance_arg *)data;
1374
+
1375
+ km_copy_and_free_if_needed(a->sd, &(a->d));
1376
+ km_copy_and_free_if_needed(a->saa, &(a->a));
1377
+ ruby_xfree(a->perm);
1378
+ ruby_xfree(a->scale);
1379
+
1380
+ return Qnil;
1381
+ }
1382
+ VALUE
1383
+ kmm_mat_balance_destl(VALUE self, VALUE vd, VALUE vaa)
1384
+ {
1385
+ km_check_frozen(vd); km_check_frozen(vaa);
1386
+ struct km_balance_arg a; memset(&a, 0, sizeof(a));
1387
+ a.sa = km_mat2smat(self); a.sd = km_mat2smat(vd); a.saa = km_mat2smat(vaa);
1388
+ km_check_double(3, a.sa, a.sd, a.saa);
1389
+ km_check_finite(a.sa);
1390
+
1391
+ km_ensure(km_balance_body, (VALUE)&a, km_balance_ensure, (VALUE)&a);
1392
+
1393
+ return Qnil;
1394
+ }
1395
+ VALUE
1396
+ kmm_mat_balance(VALUE self)
1397
+ {
1398
+ int n = km_mat2smat(self)->m;
1399
+ VALUE vd = km_Mat(n, n, VT_DOUBLE);
1400
+ VALUE vaa = km_Mat(n, n, VT_DOUBLE);
1401
+ kmm_mat_balance_destl(self, vd, vaa);
1402
+ return rb_ary_new3(2, vd, vaa);
1403
+ }
1404
+
1405
+ // compute matrix exponential of `self' and replace `self' by the result
1406
+ // this method is a manual translation of GNU Octave's expm.m
1407
+ struct km_expm_arg {
1408
+ SMAT *sa;
1409
+ LAWORK a;
1410
+ double *scale;
1411
+ double *a2, *x, *y, *foo;
1412
+ double *r, *c, *ferr, *berr, *work;
1413
+ int *ipiv, *iwork;
1414
+ VALUE self;
1415
+ };
1416
+ static VALUE
1417
+ km_expm_body(VALUE data)
1418
+ {
1419
+ struct km_expm_arg *a = (struct km_expm_arg *)data;
1420
+
1421
+ int n = a->sa->m; int n2 = n*n;
1422
+ km_check_size(1, a->sa->n,n);
1423
+
1424
+ // trace reduction
1425
+ double neg_max = -DBL_MAX;
1426
+ KALLOCn(a->a, a->sa);
1427
+ for ( int i=0; i<n; i++ ) { for ( int j=0; j<n; j++ ) {
1428
+ if ( (a->a.d)[i+j*(a->a.ld)] < neg_max ) { (a->a.d)[i+j*(a->a.ld)] = neg_max; }
1429
+ } }
1430
+ double trshift = 0.0;
1431
+ for ( int i=0; i<n; i++ ) {
1432
+ trshift += (a->a.d)[i+i*(a->a.ld)];
1433
+ }
1434
+ if ( 0 < trshift ) {
1435
+ trshift /= n;
1436
+ for ( int i=0; i<n; i++ ) {
1437
+ (a->a.d)[i+i*(a->a.ld)] -= trshift;
1438
+ }
1439
+ }
1440
+
1441
+ // balancing
1442
+ int ilo, ihi, info;
1443
+ char job[] = "B";
1444
+ KALLOC(a->scale, n);
1445
+ dgebal_(job, &n, a->a.d, &(a->a.ld), &ilo, &ihi, a->scale, &info);
1446
+ km_check_info(info, rb_eRuntimeError, "unexpected info value", "dgebal");
1447
+
1448
+ // scaling
1449
+ int ione=1;
1450
+ double s = 0.0;
1451
+ for ( int i=0; i<n; i++ ) {
1452
+ double foo = dasum_(&n, (a->a.d)+i, &(a->a.ld));
1453
+ if ( s < foo ) { s = foo; }
1454
+ }
1455
+ s = logb(s);
1456
+ if ( s < 0.0 ) {
1457
+ s = 0.0;
1458
+ } else if ( 1023.0 < s ) {
1459
+ s = 1023.0;
1460
+ }
1461
+ double ps = exp2(-s);
1462
+ for ( int i=0; i<n; i++ ) {
1463
+ dscal_(&n, &ps, (a->a.d)+(i*(a->a.ld)), &ione);
1464
+ }
1465
+
1466
+ // Pade approximation
1467
+ static const double c[] = { 5.0000000000000000e-1, 1.1666666666666667e-1, 1.6666666666666667e-2, 1.6025641025641026e-3,
1468
+ 1.0683760683760684e-4, 4.8562548562548563e-6, 1.3875013875013875e-7, 1.9270852604185938e-9 };
1469
+ int np1 = n+1;
1470
+ KALLOC(a->a2, n2);
1471
+ KALLOC(a->x, n2);
1472
+ KALLOC(a->y, n2);
1473
+ KALLOC(a->foo, n2);
1474
+ #define MPROD(_r, _a, _b) km_dmprod(n, n, n, _a, _b, _r)
1475
+ #define A a->a.d, a->a.ld
1476
+ #define A2 a->a2, n
1477
+ #define X a->x, n
1478
+ #define Y a->y, n
1479
+ #define FOO a->foo, n
1480
+ MPROD(A2, A, A); // a2 = a*a
1481
+ memcpy( a->foo, a->a2, sizeof(double)*((size_t)n2) ); // foo = a2
1482
+ dscal_( &n2, c+7, a->foo, &ione ); // foo *= c[7]
1483
+ for ( int i=0; i<n; i++ ) { a->foo[i*np1] += c[5]; } // foo += c[5]*I
1484
+ MPROD(X, FOO, A2); // x = foo*a2 = a^4*c[7]+a^2*c[5]
1485
+ for ( int i=0; i<n; i++ ) { a->x[i*np1] += c[3]; } // x += c[3]*I
1486
+ MPROD(FOO, X, A2); // foo = x*a2 = a^6*c[7]+a^4*c[5]+a^2*c[3]
1487
+ for ( int i=0; i<n; i++ ) { a->foo[i*np1] += c[1]; } // foo += c[1]*I
1488
+ MPROD(X, FOO, A2); // x = foo*a2 = a^8*c[7]+a^6*c[5]+a^4*c[3]+a^2*c[1]
1489
+ for ( int i=0; i<n; i++ ) { a->x[i*np1] += 1.0; } // x += I
1490
+ memcpy( a->foo, a->a2, sizeof(double)*((size_t)n2) ); // foo = a2
1491
+ dscal_(&n2, c+6, a->foo, &ione); // foo *= c[6]
1492
+ for ( int i=0; i<n; i++ ) { a->foo[i*np1] += c[4]; } // foo += c[4]*I
1493
+ MPROD(Y, FOO, A2); // y = foo*a2 = a^4*c[6]+a^2*c[4]
1494
+ for ( int i=0; i<n; i++ ) { a->y[i*np1] += c[2]; } // y += c[2]*I
1495
+ MPROD(FOO, Y, A2); // foo = y*a2 = a^6*c[6]+a^4*c[4]+a^2*c[2]
1496
+ for ( int i=0; i<n; i++ ) { a->foo[i*np1] += c[0]; } // foo += c[0]*I
1497
+ MPROD(Y, FOO, A); // y = foo*a = a^7*c[6]+a^5*c[4]+a^3*c[2]+a*c[0]
1498
+ double alp=1.0;
1499
+ daxpy_(&n2, &alp, a->y, &ione, a->x, &ione); // x = X+Y
1500
+ alp = -2.0;
1501
+ dscal_(&n2, &alp, a->y, &ione);
1502
+ alp = 1.0;
1503
+ daxpy_(&n2, &alp, a->x, &ione, a->y, &ione); // y = -2y+x i.e. -2Y+(X+Y) = X-Y
1504
+
1505
+ // r = y\x i.e. (X-Y)\(X+Y)
1506
+ KALLOC(a->ipiv, n);
1507
+ KALLOC(a->r, n);
1508
+ KALLOC(a->c, n);
1509
+ KALLOC(a->ferr, n);
1510
+ KALLOC(a->berr, n);
1511
+ KALLOC(a->work, 4*n);
1512
+ KALLOC(a->iwork, n);
1513
+ char equed[2];
1514
+ char fact[]="E";
1515
+ char trans[]="N";
1516
+ dgesvx_(fact, trans, &n, &n, a->y, &n, a->a2, &n, a->ipiv, equed, a->r, a->c, a->x, &n,
1517
+ a->a.d, &(a->a.ld), &alp, a->ferr, a->berr, a->work, a->iwork, &info);
1518
+ km_check_info(info, km_eUncomp, "an internal matrix is singular or near singular", "dgesvx");
1519
+
1520
+ // undo scaling by repeated squaring
1521
+ int is = (int)s;
1522
+ if ( is & 1 ) { // if is is odd, then r = r^2
1523
+ MPROD(FOO, A, A);
1524
+ dlacpy_(job, &n, &n, a->foo, &n, a->a.d, &(a->a.ld));
1525
+ }
1526
+ for ( int i=0; i<is/2; i++ ) {
1527
+ MPROD(FOO, A, A);
1528
+ MPROD(A, FOO, FOO);
1529
+ }
1530
+
1531
+ // inverse balancing
1532
+ km_copy_and_free_if_needed(a->sa, &(a->a));
1533
+ km_unbal(a->sa, a->scale, ilo, ihi);
1534
+
1535
+ // inverse trace reduction
1536
+ if ( 0 < trshift ) {
1537
+ kmm_mat_s_mul_destl(a->self, rb_float_new(exp(trshift)));
1538
+ }
1539
+
1540
+ return Qnil;
1541
+ }
1542
+ static VALUE
1543
+ km_expm_ensure(VALUE data)
1544
+ {
1545
+ struct km_expm_arg *a = (struct km_expm_arg *)data;
1546
+
1547
+ ruby_xfree(a->iwork);
1548
+ ruby_xfree(a->work);
1549
+ ruby_xfree(a->berr);
1550
+ ruby_xfree(a->ferr);
1551
+ ruby_xfree(a->c);
1552
+ ruby_xfree(a->r);
1553
+ ruby_xfree(a->ipiv);
1554
+
1555
+ ruby_xfree(a->foo);
1556
+ ruby_xfree(a->y);
1557
+ ruby_xfree(a->x);
1558
+ ruby_xfree(a->a2);
1559
+ km_free_if_needed(&(a->a));
1560
+ ruby_xfree(a->scale);
1561
+
1562
+ return Qnil;
1563
+ }
1564
+ VALUE
1565
+ kmm_mat_expm_dest(VALUE self)
1566
+ {
1567
+ km_check_frozen(self);
1568
+ struct km_expm_arg a; memset(&a, 0, sizeof(a));
1569
+ a.sa = km_mat2smat(self);
1570
+ km_check_double(1, a.sa);
1571
+ km_check_finite(a.sa);
1572
+
1573
+ a.self = self;
1574
+ km_ensure(km_expm_body, (VALUE)&a, km_expm_ensure, (VALUE)&a);
1575
+
1576
+ return self;
1577
+ }
1578
+
1579
+ // compute a Cholesky decomposition A = U' * U
1580
+ // on entry, `self' is A. on exit, `self' is U
1581
+ struct km_chol_arg {
1582
+ SMAT *sa;
1583
+ LAWORK a;
1584
+ };
1585
+ static VALUE
1586
+ km_chol_body(VALUE data)
1587
+ {
1588
+ struct km_chol_arg *a = (struct km_chol_arg *)data;
1589
+
1590
+ int n = a->sa->m, info;
1591
+
1592
+ KALLOCn(a->a, a->sa);
1593
+ char uplo[] = "U";
1594
+ dpotrf_(uplo, &n, a->a.d, &(a->a.ld), &info);
1595
+ km_check_info(info, km_eUncomp, "self is not positive definite", "dpotrf");
1596
+ int ione=1, izero=0; double dzero=0.0;
1597
+ for ( int i=0; i<n-1; i++ ) {
1598
+ const int len=n-i-1;
1599
+ dcopy_(&len, &dzero, &izero, (a->a.d)+(i+i*(a->a.ld)+1), &ione);
1600
+ }
1601
+
1602
+ return Qnil;
1603
+ }
1604
+ static VALUE
1605
+ km_chol_ensure(VALUE data)
1606
+ {
1607
+ struct km_chol_arg *a = (struct km_chol_arg *)data;
1608
+
1609
+ km_copy_and_free_if_needed(a->sa, &(a->a));
1610
+
1611
+ return Qnil;
1612
+ }
1613
+ VALUE
1614
+ kmm_mat_chol_dest(VALUE self)
1615
+ {
1616
+ km_check_frozen(self);
1617
+ if ( !kmm_mat_symmetry_p(0, NULL, self) ) {
1618
+ rb_raise(km_eUncomp, "self must be a symmetry matrix");
1619
+ }
1620
+ struct km_chol_arg a; memset(&a, 0, sizeof(a));
1621
+ a.sa = km_mat2smat(self);
1622
+ km_check_double(1, a.sa);
1623
+ km_check_finite(a.sa);
1624
+
1625
+ km_ensure(km_chol_body, (VALUE)&a, km_chol_ensure, (VALUE)&a);
1626
+
1627
+ return self;
1628
+ }
1629
+