nmatrix 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/Gemfile +5 -0
- data/History.txt +97 -0
- data/Manifest.txt +34 -7
- data/README.rdoc +13 -13
- data/Rakefile +36 -26
- data/ext/nmatrix/data/data.cpp +15 -2
- data/ext/nmatrix/data/data.h +4 -0
- data/ext/nmatrix/data/ruby_object.h +5 -14
- data/ext/nmatrix/extconf.rb +3 -2
- data/ext/nmatrix/{util/math.cpp → math.cpp} +296 -6
- data/ext/nmatrix/math/asum.h +143 -0
- data/ext/nmatrix/math/geev.h +82 -0
- data/ext/nmatrix/math/gemm.h +267 -0
- data/ext/nmatrix/math/gemv.h +208 -0
- data/ext/nmatrix/math/ger.h +96 -0
- data/ext/nmatrix/math/gesdd.h +80 -0
- data/ext/nmatrix/math/gesvd.h +78 -0
- data/ext/nmatrix/math/getf2.h +86 -0
- data/ext/nmatrix/math/getrf.h +240 -0
- data/ext/nmatrix/math/getri.h +107 -0
- data/ext/nmatrix/math/getrs.h +125 -0
- data/ext/nmatrix/math/idamax.h +86 -0
- data/ext/nmatrix/{util → math}/lapack.h +60 -356
- data/ext/nmatrix/math/laswp.h +165 -0
- data/ext/nmatrix/math/long_dtype.h +52 -0
- data/ext/nmatrix/math/math.h +1154 -0
- data/ext/nmatrix/math/nrm2.h +181 -0
- data/ext/nmatrix/math/potrs.h +125 -0
- data/ext/nmatrix/math/rot.h +141 -0
- data/ext/nmatrix/math/rotg.h +115 -0
- data/ext/nmatrix/math/scal.h +73 -0
- data/ext/nmatrix/math/swap.h +73 -0
- data/ext/nmatrix/math/trsm.h +383 -0
- data/ext/nmatrix/nmatrix.cpp +176 -152
- data/ext/nmatrix/nmatrix.h +1 -2
- data/ext/nmatrix/ruby_constants.cpp +9 -4
- data/ext/nmatrix/ruby_constants.h +1 -0
- data/ext/nmatrix/storage/dense.cpp +57 -41
- data/ext/nmatrix/storage/list.cpp +52 -50
- data/ext/nmatrix/storage/storage.cpp +59 -43
- data/ext/nmatrix/storage/yale.cpp +352 -333
- data/ext/nmatrix/storage/yale.h +4 -0
- data/lib/nmatrix.rb +2 -2
- data/lib/nmatrix/blas.rb +4 -4
- data/lib/nmatrix/enumerate.rb +241 -0
- data/lib/nmatrix/lapack.rb +54 -1
- data/lib/nmatrix/math.rb +462 -0
- data/lib/nmatrix/nmatrix.rb +210 -486
- data/lib/nmatrix/nvector.rb +0 -62
- data/lib/nmatrix/rspec.rb +75 -0
- data/lib/nmatrix/shortcuts.rb +136 -108
- data/lib/nmatrix/version.rb +1 -1
- data/spec/blas_spec.rb +20 -12
- data/spec/elementwise_spec.rb +22 -13
- data/spec/io_spec.rb +1 -0
- data/spec/lapack_spec.rb +197 -0
- data/spec/nmatrix_spec.rb +39 -38
- data/spec/nvector_spec.rb +3 -9
- data/spec/rspec_monkeys.rb +29 -0
- data/spec/rspec_spec.rb +34 -0
- data/spec/shortcuts_spec.rb +14 -16
- data/spec/slice_spec.rb +242 -186
- data/spec/spec_helper.rb +19 -0
- metadata +33 -5
- data/ext/nmatrix/util/math.h +0 -2612
data/ext/nmatrix/extconf.rb
CHANGED
@@ -105,7 +105,7 @@ $srcs = [
|
|
105
105
|
'ruby_constants.cpp',
|
106
106
|
|
107
107
|
'data/data.cpp',
|
108
|
-
'
|
108
|
+
'math.cpp',
|
109
109
|
'util/sl_list.cpp',
|
110
110
|
'util/io.cpp',
|
111
111
|
'storage/common.cpp',
|
@@ -148,6 +148,7 @@ have_header("clapack.h")
|
|
148
148
|
have_header("cblas.h")
|
149
149
|
|
150
150
|
have_func("clapack_dgetrf", "clapack.h")
|
151
|
+
have_func("dgesvd_", "clapack.h")
|
151
152
|
|
152
153
|
|
153
154
|
#find_library("cblas", "cblas_dgemm")
|
@@ -158,7 +159,7 @@ have_func("cblas_dgemm", "cblas.h")
|
|
158
159
|
# Order matters here: ATLAS has to go after LAPACK: http://mail.scipy.org/pipermail/scipy-user/2007-January/010717.html
|
159
160
|
$libs += " -llapack -lcblas -latlas "
|
160
161
|
|
161
|
-
$objs = %w{nmatrix ruby_constants data/data util/io
|
162
|
+
$objs = %w{nmatrix ruby_constants data/data util/io math util/sl_list storage/common storage/storage storage/dense storage/yale storage/list}.map { |i| i + ".o" }
|
162
163
|
|
163
164
|
#CONFIG['CXX'] = 'clang++'
|
164
165
|
CONFIG['CXX'] = 'g++'
|
@@ -112,8 +112,33 @@
|
|
112
112
|
* Project Includes
|
113
113
|
*/
|
114
114
|
|
115
|
-
#include
|
116
|
-
#include
|
115
|
+
#include <algorithm>
|
116
|
+
#include <limits>
|
117
|
+
|
118
|
+
#include "data/data.h"
|
119
|
+
#include "math/gesdd.h"
|
120
|
+
#include "math/gesvd.h"
|
121
|
+
#include "math/geev.h"
|
122
|
+
#include "math/swap.h"
|
123
|
+
#include "math/idamax.h"
|
124
|
+
#include "math/scal.h"
|
125
|
+
#include "math/ger.h"
|
126
|
+
#include "math/getf2.h"
|
127
|
+
#include "math/laswp.h"
|
128
|
+
#include "math/trsm.h"
|
129
|
+
#include "math/long_dtype.h" // for gemm.h
|
130
|
+
#include "math/gemm.h"
|
131
|
+
#include "math/gemv.h"
|
132
|
+
#include "math/asum.h"
|
133
|
+
#include "math/nrm2.h"
|
134
|
+
#include "math/getrf.h"
|
135
|
+
#include "math/getri.h"
|
136
|
+
#include "math/getrs.h"
|
137
|
+
#include "math/potrs.h"
|
138
|
+
#include "math/rot.h"
|
139
|
+
#include "math/rotg.h"
|
140
|
+
#include "math/math.h"
|
141
|
+
#include "storage/dense.h"
|
117
142
|
|
118
143
|
#include "nmatrix.h"
|
119
144
|
#include "ruby_constants.h"
|
@@ -155,6 +180,9 @@ extern "C" {
|
|
155
180
|
static VALUE nm_clapack_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx);
|
156
181
|
static VALUE nm_clapack_lauum(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
|
157
182
|
|
183
|
+
static VALUE nm_lapack_gesvd(VALUE self, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lworkspace_size);
|
184
|
+
static VALUE nm_lapack_gesdd(VALUE self, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lworkspace_size);
|
185
|
+
static VALUE nm_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr, VALUE lwork);
|
158
186
|
} // end of extern "C" block
|
159
187
|
|
160
188
|
////////////////////
|
@@ -191,7 +219,21 @@ void det_exact(const int M, const void* A_elements, const int lda, void* result_
|
|
191
219
|
}
|
192
220
|
|
193
221
|
|
222
|
+
/*
|
223
|
+
* Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
|
224
|
+
*/
|
225
|
+
template <typename DType, typename CType>
|
226
|
+
inline static int lapack_gesvd(char jobu, char jobvt, int m, int n, void* a, int lda, void* s, void* u, int ldu, void* vt, int ldvt, void* work, int lwork, void* rwork) {
|
227
|
+
return gesvd<DType,CType>(jobu, jobvt, m, n, reinterpret_cast<DType*>(a), lda, reinterpret_cast<DType*>(s), reinterpret_cast<DType*>(u), ldu, reinterpret_cast<DType*>(vt), ldvt, reinterpret_cast<DType*>(work), lwork, reinterpret_cast<CType*>(rwork));
|
228
|
+
}
|
194
229
|
|
230
|
+
/*
|
231
|
+
* Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
|
232
|
+
*/
|
233
|
+
template <typename DType, typename CType>
|
234
|
+
inline static int lapack_gesdd(char jobz, int m, int n, void* a, int lda, void* s, void* u, int ldu, void* vt, int ldvt, void* work, int lwork, int* iwork, void* rwork) {
|
235
|
+
return gesdd<DType,CType>(jobz, m, n, reinterpret_cast<DType*>(a), lda, reinterpret_cast<DType*>(s), reinterpret_cast<DType*>(u), ldu, reinterpret_cast<DType*>(vt), ldvt, reinterpret_cast<DType*>(work), lwork, iwork, reinterpret_cast<CType*>(rwork));
|
236
|
+
}
|
195
237
|
|
196
238
|
/*
|
197
239
|
* Function signature conversion for calling CBLAS' gemm functions as directly as possible.
|
@@ -297,6 +339,7 @@ extern "C" {
|
|
297
339
|
void nm_math_init_blas() {
|
298
340
|
cNMatrix_LAPACK = rb_define_module_under(cNMatrix, "LAPACK");
|
299
341
|
|
342
|
+
/* ATLAS-CLAPACK Functions */
|
300
343
|
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getrf", (METHOD)nm_clapack_getrf, 5);
|
301
344
|
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potrf", (METHOD)nm_clapack_potrf, 5);
|
302
345
|
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getrs", (METHOD)nm_clapack_getrs, 9);
|
@@ -307,6 +350,11 @@ void nm_math_init_blas() {
|
|
307
350
|
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_scal", (METHOD)nm_clapack_scal, 4);
|
308
351
|
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_lauum", (METHOD)nm_clapack_lauum, 5);
|
309
352
|
|
353
|
+
/* Non-ATLAS regular LAPACK Functions called via Fortran interface */
|
354
|
+
rb_define_singleton_method(cNMatrix_LAPACK, "lapack_gesvd", (METHOD)nm_lapack_gesvd, 12);
|
355
|
+
rb_define_singleton_method(cNMatrix_LAPACK, "lapack_gesdd", (METHOD)nm_lapack_gesdd, 11);
|
356
|
+
rb_define_singleton_method(cNMatrix_LAPACK, "lapack_geev", (METHOD)nm_lapack_geev, 12);
|
357
|
+
|
310
358
|
cNMatrix_BLAS = rb_define_module_under(cNMatrix, "BLAS");
|
311
359
|
|
312
360
|
rb_define_singleton_method(cNMatrix_BLAS, "cblas_nrm2", (METHOD)nm_cblas_nrm2, 3);
|
@@ -322,6 +370,31 @@ void nm_math_init_blas() {
|
|
322
370
|
rb_define_singleton_method(cNMatrix_BLAS, "cblas_herk", (METHOD)nm_cblas_herk, 11);
|
323
371
|
}
|
324
372
|
|
373
|
+
/*
|
374
|
+
* Interprets lapack jobu and jobvt arguments, for which LAPACK needs character values A, S, O, or N.
|
375
|
+
*
|
376
|
+
* Called by lapack_gesvd -- basically inline. svd stands for singular value decomposition.
|
377
|
+
*/
|
378
|
+
static inline char lapack_svd_job_sym(VALUE op) {
|
379
|
+
if (rb_to_id(op) == rb_intern("all") || rb_to_id(op) == rb_intern("a")) return 'A';
|
380
|
+
else if (rb_to_id(op) == rb_intern("return") || rb_to_id(op) == rb_intern("s")) return 'S';
|
381
|
+
else if (rb_to_id(op) == rb_intern("overwrite") || rb_to_id(op) == rb_intern("o")) return 'O';
|
382
|
+
else if (rb_to_id(op) == rb_intern("none") || rb_to_id(op) == rb_intern("n")) return 'N';
|
383
|
+
else rb_raise(rb_eArgError, "Expected :all, :return, :overwrite, :none (or :a, :s, :o, :n, respectively)");
|
384
|
+
return 'a';
|
385
|
+
}
|
386
|
+
|
387
|
+
|
388
|
+
/*
|
389
|
+
* Interprets lapack jobvl and jobvr arguments, for which LAPACK needs character values N or V.
|
390
|
+
*
|
391
|
+
* Called by lapack_geev -- basically inline. evd stands for eigenvalue decomposition.
|
392
|
+
*/
|
393
|
+
static inline char lapack_evd_job_sym(VALUE op) {
|
394
|
+
if (op == Qfalse || op == Qnil || rb_to_id(op) == rb_intern("n")) return 'N';
|
395
|
+
else return 'V';
|
396
|
+
}
|
397
|
+
|
325
398
|
|
326
399
|
/* Interprets cblas argument which could be any of false/:no_transpose, :transpose, or :complex_conjugate,
|
327
400
|
* into an enum recognized by cblas.
|
@@ -337,6 +410,7 @@ static inline enum CBLAS_TRANSPOSE blas_transpose_sym(VALUE op) {
|
|
337
410
|
return CblasNoTrans;
|
338
411
|
}
|
339
412
|
|
413
|
+
|
340
414
|
/*
|
341
415
|
* Interprets cblas argument which could be :left or :right
|
342
416
|
*
|
@@ -417,7 +491,7 @@ static VALUE nm_cblas_rotg(VALUE self, VALUE ab) {
|
|
417
491
|
nm::math::cblas_rotg<nm::Complex64>,
|
418
492
|
nm::math::cblas_rotg<nm::Complex128>,
|
419
493
|
NULL, NULL, NULL, // no rationals
|
420
|
-
nm::math::cblas_rotg<nm::RubyObject>
|
494
|
+
NULL //nm::math::cblas_rotg<nm::RubyObject>
|
421
495
|
};
|
422
496
|
|
423
497
|
nm::dtype_t dtype = NM_DTYPE(ab);
|
@@ -438,8 +512,14 @@ static VALUE nm_cblas_rotg(VALUE self, VALUE ab) {
|
|
438
512
|
ttable[dtype](pA, pB, pC, pS);
|
439
513
|
|
440
514
|
VALUE result = rb_ary_new2(2);
|
441
|
-
|
442
|
-
|
515
|
+
|
516
|
+
if (dtype == nm::RUBYOBJ) {
|
517
|
+
rb_ary_store(result, 0, *reinterpret_cast<VALUE*>(pC));
|
518
|
+
rb_ary_store(result, 1, *reinterpret_cast<VALUE*>(pS));
|
519
|
+
} else {
|
520
|
+
rb_ary_store(result, 0, rubyobj_from_cval(pC, dtype).rval);
|
521
|
+
rb_ary_store(result, 1, rubyobj_from_cval(pS, dtype).rval);
|
522
|
+
}
|
443
523
|
|
444
524
|
return result;
|
445
525
|
}
|
@@ -852,9 +932,219 @@ static VALUE nm_cblas_herk(VALUE self,
|
|
852
932
|
cblas_zherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
|
853
933
|
} else
|
854
934
|
rb_raise(rb_eNotImpError, "this matrix operation undefined for non-complex dtypes");
|
935
|
+
return Qtrue;
|
936
|
+
}
|
855
937
|
|
856
938
|
|
857
|
-
|
939
|
+
/*
|
940
|
+
* Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
|
941
|
+
*
|
942
|
+
* xGESVD computes the singular value decomposition (SVD) of a real
|
943
|
+
* M-by-N matrix A, optionally computing the left and/or right singular
|
944
|
+
* vectors. The SVD is written
|
945
|
+
*
|
946
|
+
* A = U * SIGMA * transpose(V)
|
947
|
+
*
|
948
|
+
* where SIGMA is an M-by-N matrix which is zero except for its
|
949
|
+
* min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
|
950
|
+
* V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
|
951
|
+
* are the singular values of A; they are real and non-negative, and
|
952
|
+
* are returned in descending order. The first min(m,n) columns of
|
953
|
+
* U and V are the left and right singular vectors of A.
|
954
|
+
*
|
955
|
+
* Note that the routine returns V**T, not V.
|
956
|
+
*/
|
957
|
+
static VALUE nm_lapack_gesvd(VALUE self, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lwork) {
|
958
|
+
static int (*gesvd_table[nm::NUM_DTYPES])(char, char, int, int, void* a, int, void* s, void* u, int, void* vt, int, void* work, int, void* rwork) = {
|
959
|
+
NULL, NULL, NULL, NULL, NULL, // no integer ops
|
960
|
+
nm::math::lapack_gesvd<float,float>,
|
961
|
+
nm::math::lapack_gesvd<double,double>,
|
962
|
+
nm::math::lapack_gesvd<nm::Complex64,float>,
|
963
|
+
nm::math::lapack_gesvd<nm::Complex128,double>,
|
964
|
+
NULL, NULL, NULL, NULL // no rationals or Ruby objects
|
965
|
+
};
|
966
|
+
|
967
|
+
nm::dtype_t dtype = NM_DTYPE(a);
|
968
|
+
|
969
|
+
|
970
|
+
if (!gesvd_table[dtype]) {
|
971
|
+
rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
|
972
|
+
return Qfalse;
|
973
|
+
} else {
|
974
|
+
int M = FIX2INT(m),
|
975
|
+
N = FIX2INT(n);
|
976
|
+
|
977
|
+
int min_mn = NM_MIN(M,N);
|
978
|
+
int max_mn = NM_MAX(M,N);
|
979
|
+
|
980
|
+
char JOBU = lapack_svd_job_sym(jobu),
|
981
|
+
JOBVT = lapack_svd_job_sym(jobvt);
|
982
|
+
|
983
|
+
// only need rwork for complex matrices
|
984
|
+
int rwork_size = (dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128) ? 5 * min_mn : 0;
|
985
|
+
void* rwork = rwork_size > 0 ? ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size) : NULL;
|
986
|
+
int work_size = FIX2INT(lwork);
|
987
|
+
|
988
|
+
// ignore user argument for lwork if it's too small.
|
989
|
+
work_size = NM_MAX((dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128 ? 2 * min_mn + max_mn : NM_MAX(3*min_mn + max_mn, 5*min_mn)), work_size);
|
990
|
+
void* work = ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);
|
991
|
+
|
992
|
+
int info = gesvd_table[dtype](JOBU, JOBVT, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
|
993
|
+
NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
|
994
|
+
work, work_size, rwork);
|
995
|
+
return INT2FIX(info);
|
996
|
+
}
|
997
|
+
}
|
998
|
+
|
999
|
+
/*
|
1000
|
+
* Function signature conversion for calling CBLAS' gesdd functions as directly as possible.
|
1001
|
+
*
|
1002
|
+
* xGESDD uses a divide-and-conquer strategy to compute the singular value decomposition (SVD) of a real
|
1003
|
+
* M-by-N matrix A, optionally computing the left and/or right singular
|
1004
|
+
* vectors. The SVD is written
|
1005
|
+
*
|
1006
|
+
* A = U * SIGMA * transpose(V)
|
1007
|
+
*
|
1008
|
+
* where SIGMA is an M-by-N matrix which is zero except for its
|
1009
|
+
* min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
|
1010
|
+
* V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
|
1011
|
+
* are the singular values of A; they are real and non-negative, and
|
1012
|
+
* are returned in descending order. The first min(m,n) columns of
|
1013
|
+
* U and V are the left and right singular vectors of A.
|
1014
|
+
*
|
1015
|
+
* Note that the routine returns V**T, not V.
|
1016
|
+
*/
|
1017
|
+
static VALUE nm_lapack_gesdd(VALUE self, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lwork) {
|
1018
|
+
static int (*gesdd_table[nm::NUM_DTYPES])(char, int, int, void* a, int, void* s, void* u, int, void* vt, int, void* work, int, int* iwork, void* rwork) = {
|
1019
|
+
NULL, NULL, NULL, NULL, NULL, // no integer ops
|
1020
|
+
nm::math::lapack_gesdd<float,float>,
|
1021
|
+
nm::math::lapack_gesdd<double,double>,
|
1022
|
+
nm::math::lapack_gesdd<nm::Complex64,float>,
|
1023
|
+
nm::math::lapack_gesdd<nm::Complex128,double>,
|
1024
|
+
NULL, NULL, NULL, NULL // no rationals or Ruby objects
|
1025
|
+
};
|
1026
|
+
|
1027
|
+
nm::dtype_t dtype = NM_DTYPE(a);
|
1028
|
+
|
1029
|
+
if (!gesdd_table[dtype]) {
|
1030
|
+
rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
|
1031
|
+
return Qfalse;
|
1032
|
+
} else {
|
1033
|
+
int M = FIX2INT(m),
|
1034
|
+
N = FIX2INT(n);
|
1035
|
+
|
1036
|
+
int min_mn = NM_MIN(M,N);
|
1037
|
+
int max_mn = NM_MAX(M,N);
|
1038
|
+
|
1039
|
+
char JOBZ = lapack_svd_job_sym(jobz);
|
1040
|
+
|
1041
|
+
// only need rwork for complex matrices
|
1042
|
+
void* rwork = NULL;
|
1043
|
+
|
1044
|
+
int work_size = FIX2INT(lwork); // Make sure we allocate enough work, regardless of the user request.
|
1045
|
+
if (dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128) {
|
1046
|
+
int rwork_size = min_mn * (JOBZ == 'N' ? 5 : NM_MAX(5*min_mn + 7, 2*max_mn + 2*min_mn + 1));
|
1047
|
+
rwork = ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size);
|
1048
|
+
|
1049
|
+
if (JOBZ == 'N') work_size = NM_MAX(work_size, 3*min_mn + NM_MAX(max_mn, 6*min_mn));
|
1050
|
+
else if (JOBZ == 'O') work_size = NM_MAX(work_size, 3*min_mn*min_mn + NM_MAX(max_mn, 5*min_mn*min_mn + 4*min_mn));
|
1051
|
+
else work_size = NM_MAX(work_size, 3*min_mn*min_mn + NM_MAX(max_mn, 4*min_mn*min_mn + 4*min_mn));
|
1052
|
+
} else {
|
1053
|
+
if (JOBZ == 'N') work_size = NM_MAX(work_size, 2*min_mn + max_mn);
|
1054
|
+
else if (JOBZ == 'O') work_size = NM_MAX(work_size, 2*min_mn*min_mn + max_mn + 2*min_mn);
|
1055
|
+
else work_size = NM_MAX(work_size, min_mn*min_mn + max_mn + 2*min_mn);
|
1056
|
+
}
|
1057
|
+
void* work = ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);
|
1058
|
+
int* iwork = ALLOCA_N(int, 8*min_mn);
|
1059
|
+
|
1060
|
+
int info = gesdd_table[dtype](JOBZ, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
|
1061
|
+
NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
|
1062
|
+
work, work_size, iwork, rwork);
|
1063
|
+
return INT2FIX(info);
|
1064
|
+
}
|
1065
|
+
}
|
1066
|
+
|
1067
|
+
|
1068
|
+
/*
|
1069
|
+
* Function signature conversion for calling CBLAS' geev functions as directly as possible.
|
1070
|
+
*
|
1071
|
+
* GEEV computes for an N-by-N real nonsymmetric matrix A, the
|
1072
|
+
* eigenvalues and, optionally, the left and/or right eigenvectors.
|
1073
|
+
*
|
1074
|
+
* The right eigenvector v(j) of A satisfies
|
1075
|
+
* A * v(j) = lambda(j) * v(j)
|
1076
|
+
* where lambda(j) is its eigenvalue.
|
1077
|
+
*
|
1078
|
+
* The left eigenvector u(j) of A satisfies
|
1079
|
+
* u(j)**H * A = lambda(j) * u(j)**H
|
1080
|
+
* where u(j)**H denotes the conjugate transpose of u(j).
|
1081
|
+
*
|
1082
|
+
* The computed eigenvectors are normalized to have Euclidean norm
|
1083
|
+
* equal to 1 and largest component real.
|
1084
|
+
*/
|
1085
|
+
static VALUE nm_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr, VALUE lwork) {
|
1086
|
+
static int (*geev_table[nm::NUM_DTYPES])(char, char, int, void* a, int, void* w, void* wi, void* vl, int, void* vr, int, void* work, int, void* rwork) = {
|
1087
|
+
NULL, NULL, NULL, NULL, NULL, // no integer ops
|
1088
|
+
nm::math::lapack_geev<float,float>,
|
1089
|
+
nm::math::lapack_geev<double,double>,
|
1090
|
+
nm::math::lapack_geev<nm::Complex64,float>,
|
1091
|
+
nm::math::lapack_geev<nm::Complex128,double>,
|
1092
|
+
NULL, NULL, NULL, NULL // no rationals or Ruby objects
|
1093
|
+
};
|
1094
|
+
|
1095
|
+
nm::dtype_t dtype = NM_DTYPE(a);
|
1096
|
+
|
1097
|
+
|
1098
|
+
if (!geev_table[dtype]) {
|
1099
|
+
rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
|
1100
|
+
return Qfalse;
|
1101
|
+
} else {
|
1102
|
+
int N = FIX2INT(n);
|
1103
|
+
|
1104
|
+
char JOBVL = lapack_evd_job_sym(compute_left),
|
1105
|
+
JOBVR = lapack_evd_job_sym(compute_right);
|
1106
|
+
|
1107
|
+
void* A = NM_STORAGE_DENSE(a)->elements;
|
1108
|
+
void* WR = NM_STORAGE_DENSE(w)->elements;
|
1109
|
+
void* WI = wi == Qnil ? NULL : NM_STORAGE_DENSE(wi)->elements;
|
1110
|
+
void* VL = NM_STORAGE_DENSE(vl)->elements;
|
1111
|
+
void* VR = NM_STORAGE_DENSE(vr)->elements;
|
1112
|
+
|
1113
|
+
// only need rwork for complex matrices (wi == Qnil for complex)
|
1114
|
+
int rwork_size = dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128 ? N * DTYPE_SIZES[dtype] : 0; // 2*N*floattype for complex only, otherwise 0
|
1115
|
+
void* rwork = rwork_size > 0 ? ALLOCA_N(char, rwork_size) : NULL;
|
1116
|
+
int work_size = FIX2INT(lwork);
|
1117
|
+
void* work;
|
1118
|
+
|
1119
|
+
int info;
|
1120
|
+
|
1121
|
+
// if work size is 0 or -1, query.
|
1122
|
+
if (work_size <= 0) {
|
1123
|
+
work_size = -1;
|
1124
|
+
work = ALLOC_N(char, DTYPE_SIZES[dtype]); //2*N * DTYPE_SIZES[dtype]);
|
1125
|
+
info = geev_table[dtype](JOBVL, JOBVR, N, A, FIX2INT(lda), WR, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr), work, work_size, rwork);
|
1126
|
+
work_size = (int)(dtype == nm::COMPLEX64 || dtype == nm::FLOAT32 ? reinterpret_cast<float*>(work)[0] : reinterpret_cast<double*>(work)[0]);
|
1127
|
+
// line above is basically: work_size = (int)(work[0]); // now have new work_size
|
1128
|
+
xfree(work);
|
1129
|
+
if (info == 0)
|
1130
|
+
rb_warn("geev: calculated optimal lwork of %d; to eliminate this message, use a positive value for lwork (at least 2*shape[i])", work_size);
|
1131
|
+
else return INT2FIX(info); // error of some kind on query!
|
1132
|
+
}
|
1133
|
+
|
1134
|
+
// if work size is < 2*N, just set it to 2*N
|
1135
|
+
if (work_size < 2*N) work_size = 2*N;
|
1136
|
+
if (work_size < 3*N && (dtype == nm::FLOAT32 || dtype == nm::FLOAT64)) {
|
1137
|
+
work_size = JOBVL == 'V' || JOBVR == 'V' ? 4*N : 3*N;
|
1138
|
+
}
|
1139
|
+
|
1140
|
+
// Allocate work array for actual run
|
1141
|
+
work = ALLOCA_N(char, work_size * DTYPE_SIZES[dtype]);
|
1142
|
+
|
1143
|
+
// Perform the actual calculation.
|
1144
|
+
info = geev_table[dtype](JOBVL, JOBVR, N, A, FIX2INT(lda), WR, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr), work, work_size, rwork);
|
1145
|
+
|
1146
|
+
return INT2FIX(info);
|
1147
|
+
}
|
858
1148
|
}
|
859
1149
|
|
860
1150
|
|
@@ -0,0 +1,143 @@
|
|
1
|
+
/////////////////////////////////////////////////////////////////////
|
2
|
+
// = NMatrix
|
3
|
+
//
|
4
|
+
// A linear algebra library for scientific computation in Ruby.
|
5
|
+
// NMatrix is part of SciRuby.
|
6
|
+
//
|
7
|
+
// NMatrix was originally inspired by and derived from NArray, by
|
8
|
+
// Masahiro Tanaka: http://narray.rubyforge.org
|
9
|
+
//
|
10
|
+
// == Copyright Information
|
11
|
+
//
|
12
|
+
// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
|
13
|
+
// NMatrix is Copyright (c) 2013, Ruby Science Foundation
|
14
|
+
//
|
15
|
+
// Please see LICENSE.txt for additional copyright notices.
|
16
|
+
//
|
17
|
+
// == Contributing
|
18
|
+
//
|
19
|
+
// By contributing source code to SciRuby, you agree to be bound by
|
20
|
+
// our Contributor Agreement:
|
21
|
+
//
|
22
|
+
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
|
23
|
+
//
|
24
|
+
// == asum.h
|
25
|
+
//
|
26
|
+
// CBLAS asum function
|
27
|
+
//
|
28
|
+
|
29
|
+
/*
|
30
|
+
* Automatically Tuned Linear Algebra Software v3.8.4
|
31
|
+
* (C) Copyright 1999 R. Clint Whaley
|
32
|
+
*
|
33
|
+
* Redistribution and use in source and binary forms, with or without
|
34
|
+
* modification, are permitted provided that the following conditions
|
35
|
+
* are met:
|
36
|
+
* 1. Redistributions of source code must retain the above copyright
|
37
|
+
* notice, this list of conditions and the following disclaimer.
|
38
|
+
* 2. Redistributions in binary form must reproduce the above copyright
|
39
|
+
* notice, this list of conditions, and the following disclaimer in the
|
40
|
+
* documentation and/or other materials provided with the distribution.
|
41
|
+
* 3. The name of the ATLAS group or the names of its contributers may
|
42
|
+
* not be used to endorse or promote products derived from this
|
43
|
+
* software without specific written permission.
|
44
|
+
*
|
45
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
46
|
+
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
47
|
+
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
48
|
+
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
|
49
|
+
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
50
|
+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
51
|
+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
52
|
+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
53
|
+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
54
|
+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
55
|
+
* POSSIBILITY OF SUCH DAMAGE.
|
56
|
+
*
|
57
|
+
*/
|
58
|
+
|
59
|
+
#ifndef ASUM_H
|
60
|
+
# define ASUM_H
|
61
|
+
|
62
|
+
|
63
|
+
namespace nm { namespace math {
|
64
|
+
|
65
|
+
/*
|
66
|
+
* Level 1 BLAS routine which sums the absolute values of a vector's contents. If the vector consists of complex values,
|
67
|
+
* the routine sums the absolute values of the real and imaginary components as well.
|
68
|
+
*
|
69
|
+
* So, based on input types, these are the valid return types:
|
70
|
+
* int -> int
|
71
|
+
* float -> float or double
|
72
|
+
* double -> double
|
73
|
+
* complex64 -> float or double
|
74
|
+
* complex128 -> double
|
75
|
+
* rational -> rational
|
76
|
+
*/
|
77
|
+
template <typename ReturnDType, typename DType>
|
78
|
+
inline ReturnDType asum(const int N, const DType* X, const int incX) {
|
79
|
+
ReturnDType sum = 0;
|
80
|
+
if ((N > 0) && (incX > 0)) {
|
81
|
+
for (int i = 0; i < N; ++i) {
|
82
|
+
sum += std::abs(X[i*incX]);
|
83
|
+
}
|
84
|
+
}
|
85
|
+
return sum;
|
86
|
+
}
|
87
|
+
|
88
|
+
|
89
|
+
#ifdef HAVE_CBLAS_H
|
90
|
+
template <>
|
91
|
+
inline float asum(const int N, const float* X, const int incX) {
|
92
|
+
return cblas_sasum(N, X, incX);
|
93
|
+
}
|
94
|
+
|
95
|
+
template <>
|
96
|
+
inline double asum(const int N, const double* X, const int incX) {
|
97
|
+
return cblas_dasum(N, X, incX);
|
98
|
+
}
|
99
|
+
|
100
|
+
template <>
|
101
|
+
inline float asum(const int N, const Complex64* X, const int incX) {
|
102
|
+
return cblas_scasum(N, X, incX);
|
103
|
+
}
|
104
|
+
|
105
|
+
template <>
|
106
|
+
inline double asum(const int N, const Complex128* X, const int incX) {
|
107
|
+
return cblas_dzasum(N, X, incX);
|
108
|
+
}
|
109
|
+
#else
|
110
|
+
template <>
|
111
|
+
inline float asum(const int N, const Complex64* X, const int incX) {
|
112
|
+
float sum = 0;
|
113
|
+
if ((N > 0) && (incX > 0)) {
|
114
|
+
for (int i = 0; i < N; ++i) {
|
115
|
+
sum += std::abs(X[i*incX].r) + std::abs(X[i*incX].i);
|
116
|
+
}
|
117
|
+
}
|
118
|
+
return sum;
|
119
|
+
}
|
120
|
+
|
121
|
+
template <>
|
122
|
+
inline double asum(const int N, const Complex128* X, const int incX) {
|
123
|
+
double sum = 0;
|
124
|
+
if ((N > 0) && (incX > 0)) {
|
125
|
+
for (int i = 0; i < N; ++i) {
|
126
|
+
sum += std::abs(X[i*incX].r) + std::abs(X[i*incX].i);
|
127
|
+
}
|
128
|
+
}
|
129
|
+
return sum;
|
130
|
+
}
|
131
|
+
#endif
|
132
|
+
|
133
|
+
|
134
|
+
template <typename ReturnDType, typename DType>
|
135
|
+
inline void cblas_asum(const int N, const void* X, const int incX, void* sum) {
|
136
|
+
*reinterpret_cast<ReturnDType*>( sum ) = asum<ReturnDType, DType>( N, reinterpret_cast<const DType*>(X), incX );
|
137
|
+
}
|
138
|
+
|
139
|
+
|
140
|
+
|
141
|
+
}} // end of namespace nm::math
|
142
|
+
|
143
|
+
#endif // NRM2_H
|