nmatrix 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -1
- data/History.txt +31 -3
- data/Manifest.txt +5 -0
- data/README.rdoc +29 -27
- data/ext/nmatrix/binary_format.txt +53 -0
- data/ext/nmatrix/data/data.cpp +18 -18
- data/ext/nmatrix/data/data.h +38 -7
- data/ext/nmatrix/data/rational.h +13 -0
- data/ext/nmatrix/data/ruby_object.h +10 -0
- data/ext/nmatrix/extconf.rb +2 -0
- data/ext/nmatrix/nmatrix.cpp +655 -103
- data/ext/nmatrix/nmatrix.h +26 -14
- data/ext/nmatrix/ruby_constants.cpp +4 -0
- data/ext/nmatrix/ruby_constants.h +2 -0
- data/ext/nmatrix/storage/dense.cpp +99 -41
- data/ext/nmatrix/storage/dense.h +3 -3
- data/ext/nmatrix/storage/list.cpp +36 -14
- data/ext/nmatrix/storage/list.h +4 -4
- data/ext/nmatrix/storage/storage.cpp +19 -19
- data/ext/nmatrix/storage/storage.h +11 -11
- data/ext/nmatrix/storage/yale.cpp +17 -20
- data/ext/nmatrix/storage/yale.h +13 -11
- data/ext/nmatrix/util/io.cpp +25 -23
- data/ext/nmatrix/util/io.h +5 -5
- data/ext/nmatrix/util/math.cpp +634 -17
- data/ext/nmatrix/util/math.h +958 -9
- data/ext/nmatrix/util/sl_list.cpp +7 -7
- data/ext/nmatrix/util/sl_list.h +2 -2
- data/lib/nmatrix.rb +9 -0
- data/lib/nmatrix/blas.rb +4 -4
- data/lib/nmatrix/io/market.rb +227 -0
- data/lib/nmatrix/io/mat_reader.rb +7 -7
- data/lib/nmatrix/lapack.rb +80 -0
- data/lib/nmatrix/nmatrix.rb +78 -52
- data/lib/nmatrix/shortcuts.rb +486 -0
- data/lib/nmatrix/version.rb +1 -1
- data/spec/2x2_dense_double.mat +0 -0
- data/spec/blas_spec.rb +59 -9
- data/spec/elementwise_spec.rb +25 -12
- data/spec/io_spec.rb +69 -1
- data/spec/lapack_spec.rb +53 -4
- data/spec/math_spec.rb +9 -0
- data/spec/nmatrix_list_spec.rb +95 -0
- data/spec/nmatrix_spec.rb +10 -53
- data/spec/nmatrix_yale_spec.rb +17 -15
- data/spec/shortcuts_spec.rb +154 -0
- metadata +22 -15
data/ext/nmatrix/util/io.h
CHANGED
@@ -73,11 +73,11 @@ extern "C" {
|
|
73
73
|
/*
|
74
74
|
* C accessors.
|
75
75
|
*/
|
76
|
-
dtype_t nm_dtype_from_rbsymbol(VALUE sym);
|
77
|
-
dtype_t nm_dtype_from_rbstring(VALUE str);
|
78
|
-
stype_t nm_stype_from_rbsymbol(VALUE sym);
|
79
|
-
stype_t nm_stype_from_rbstring(VALUE str);
|
80
|
-
itype_t nm_itype_from_rbsymbol(VALUE sym);
|
76
|
+
nm::dtype_t nm_dtype_from_rbsymbol(VALUE sym);
|
77
|
+
nm::dtype_t nm_dtype_from_rbstring(VALUE str);
|
78
|
+
nm::stype_t nm_stype_from_rbsymbol(VALUE sym);
|
79
|
+
nm::stype_t nm_stype_from_rbstring(VALUE str);
|
80
|
+
nm::itype_t nm_itype_from_rbsymbol(VALUE sym);
|
81
81
|
|
82
82
|
void nm_init_io(void);
|
83
83
|
|
data/ext/nmatrix/util/math.cpp
CHANGED
@@ -127,18 +127,31 @@ extern "C" {
|
|
127
127
|
#include <clapack.h>
|
128
128
|
#endif
|
129
129
|
|
130
|
+
static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s);
|
131
|
+
static VALUE nm_cblas_rotg(VALUE self, VALUE ab);
|
132
|
+
|
130
133
|
static VALUE nm_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha,
|
131
134
|
VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc);
|
132
|
-
|
133
135
|
static VALUE nm_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
|
134
136
|
VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);
|
135
|
-
|
136
137
|
static VALUE nm_cblas_trsm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
|
137
138
|
VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
|
139
|
+
static VALUE nm_cblas_trmm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
|
140
|
+
VALUE alpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
|
141
|
+
static VALUE nm_cblas_herk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
|
142
|
+
VALUE lda, VALUE beta, VALUE c, VALUE ldc);
|
143
|
+
static VALUE nm_cblas_syrk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
|
144
|
+
VALUE lda, VALUE beta, VALUE c, VALUE ldc);
|
138
145
|
|
139
146
|
static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda);
|
140
|
-
|
147
|
+
static VALUE nm_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
|
148
|
+
static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb);
|
149
|
+
static VALUE nm_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb);
|
150
|
+
static VALUE nm_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv);
|
151
|
+
static VALUE nm_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
|
152
|
+
static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx);
|
141
153
|
static VALUE nm_clapack_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx);
|
154
|
+
static VALUE nm_clapack_lauum(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
|
142
155
|
|
143
156
|
} // end of extern "C" block
|
144
157
|
|
@@ -238,6 +251,37 @@ inline static void cblas_trsm(const enum CBLAS_ORDER order, const enum CBLAS_SID
|
|
238
251
|
}
|
239
252
|
|
240
253
|
|
254
|
+
/*
|
255
|
+
* Function signature conversion for calling CBLAS' trmm functions as directly as possible.
|
256
|
+
*
|
257
|
+
* For documentation: http://www.netlib.org/blas/dtrmm.f
|
258
|
+
*/
|
259
|
+
template <typename DType>
|
260
|
+
inline static void cblas_trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
261
|
+
const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const void* alpha,
|
262
|
+
const void* A, const int lda, void* B, const int ldb)
|
263
|
+
{
|
264
|
+
trmm<DType>(order, side, uplo, ta, diag, m, n, reinterpret_cast<const DType*>(alpha),
|
265
|
+
reinterpret_cast<const DType*>(A), lda, reinterpret_cast<DType*>(B), ldb);
|
266
|
+
}
|
267
|
+
|
268
|
+
|
269
|
+
/*
|
270
|
+
* Function signature conversion for calling CBLAS' syrk functions as directly as possible.
|
271
|
+
*
|
272
|
+
* For documentation: http://www.netlib.org/blas/dsyrk.f
|
273
|
+
*/
|
274
|
+
template <typename DType>
|
275
|
+
inline static void cblas_syrk(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const enum CBLAS_TRANSPOSE trans,
|
276
|
+
const int n, const int k, const void* alpha,
|
277
|
+
const void* A, const int lda, const void* beta, void* C, const int ldc)
|
278
|
+
{
|
279
|
+
syrk<DType>(order, uplo, trans, n, k, reinterpret_cast<const DType*>(alpha),
|
280
|
+
reinterpret_cast<const DType*>(A), lda, reinterpret_cast<const DType*>(beta), reinterpret_cast<DType*>(C), ldc);
|
281
|
+
}
|
282
|
+
|
283
|
+
|
284
|
+
|
241
285
|
|
242
286
|
}} // end of namespace nm::math
|
243
287
|
|
@@ -252,13 +296,26 @@ void nm_math_init_blas() {
|
|
252
296
|
cNMatrix_LAPACK = rb_define_module_under(cNMatrix, "LAPACK");
|
253
297
|
|
254
298
|
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getrf", (METHOD)nm_clapack_getrf, 5);
|
255
|
-
rb_define_singleton_method(cNMatrix_LAPACK, "
|
299
|
+
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potrf", (METHOD)nm_clapack_potrf, 5);
|
300
|
+
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getrs", (METHOD)nm_clapack_getrs, 9);
|
301
|
+
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potrs", (METHOD)nm_clapack_potrs, 8);
|
302
|
+
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getri", (METHOD)nm_clapack_getri, 5);
|
303
|
+
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potri", (METHOD)nm_clapack_potri, 5);
|
304
|
+
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_laswp", (METHOD)nm_clapack_laswp, 7);
|
305
|
+
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_scal", (METHOD)nm_clapack_scal, 4);
|
306
|
+
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_lauum", (METHOD)nm_clapack_lauum, 5);
|
256
307
|
|
257
308
|
cNMatrix_BLAS = rb_define_module_under(cNMatrix, "BLAS");
|
258
309
|
|
310
|
+
rb_define_singleton_method(cNMatrix_BLAS, "cblas_rot", (METHOD)nm_cblas_rot, 7);
|
311
|
+
rb_define_singleton_method(cNMatrix_BLAS, "cblas_rotg", (METHOD)nm_cblas_rotg, 1);
|
312
|
+
|
259
313
|
rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemm", (METHOD)nm_cblas_gemm, 14);
|
260
314
|
rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11);
|
261
315
|
rb_define_singleton_method(cNMatrix_BLAS, "cblas_trsm", (METHOD)nm_cblas_trsm, 12);
|
316
|
+
rb_define_singleton_method(cNMatrix_BLAS, "cblas_trmm", (METHOD)nm_cblas_trmm, 12);
|
317
|
+
rb_define_singleton_method(cNMatrix_BLAS, "cblas_syrk", (METHOD)nm_cblas_syrk, 11);
|
318
|
+
rb_define_singleton_method(cNMatrix_BLAS, "cblas_herk", (METHOD)nm_cblas_herk, 11);
|
262
319
|
}
|
263
320
|
|
264
321
|
|
@@ -325,6 +382,139 @@ static inline enum CBLAS_ORDER blas_order_sym(VALUE op) {
|
|
325
382
|
}
|
326
383
|
|
327
384
|
|
385
|
+
/*
|
386
|
+
* Call any of the cblas_xrotg functions as directly as possible.
|
387
|
+
*
|
388
|
+
* xROTG computes the elements of a Givens plane rotation matrix such that:
|
389
|
+
*
|
390
|
+
* | c s | | a | | r |
|
391
|
+
* | -s c | * | b | = | 0 |
|
392
|
+
*
|
393
|
+
* where r = +- sqrt( a**2 + b**2 ) and c**2 + s**2 = 1.
|
394
|
+
*
|
395
|
+
* The Givens plane rotation can be used to introduce zero elements into a matrix selectively.
|
396
|
+
*
|
397
|
+
* This function differs from most of the other raw BLAS accessors. Instead of providing a, b, c, s as arguments, you
|
398
|
+
* should only provide a and b (the inputs), and you should provide them as a single NVector (or the first two elements
|
399
|
+
* of any dense NMatrix or NVector type, specifically).
|
400
|
+
*
|
401
|
+
* The outputs [c,s] will be returned in a Ruby Array at the end; the input NVector will also be modified in-place.
|
402
|
+
*
|
403
|
+
* If you provide rationals, be aware that there's a high probability of an error, since rotg includes a square root --
|
404
|
+
* and most rationals' square roots are irrational. You're better off converting to Float first.
|
405
|
+
*
|
406
|
+
* This function, like the other cblas_ functions, does minimal type-checking.
|
407
|
+
*/
|
408
|
+
static VALUE nm_cblas_rotg(VALUE self, VALUE ab) {
|
409
|
+
static void (*ttable[nm::NUM_DTYPES])(void* a, void* b, void* c, void* s) = {
|
410
|
+
NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
|
411
|
+
nm::math::cblas_rotg<float>,
|
412
|
+
nm::math::cblas_rotg<double>,
|
413
|
+
nm::math::cblas_rotg<nm::Complex64>,
|
414
|
+
nm::math::cblas_rotg<nm::Complex128>,
|
415
|
+
nm::math::cblas_rotg<nm::Rational32>,
|
416
|
+
nm::math::cblas_rotg<nm::Rational64>,
|
417
|
+
nm::math::cblas_rotg<nm::Rational128>,
|
418
|
+
nm::math::cblas_rotg<nm::RubyObject>
|
419
|
+
};
|
420
|
+
|
421
|
+
nm::dtype_t dtype = NM_DTYPE(ab);
|
422
|
+
|
423
|
+
if (!ttable[dtype]) {
|
424
|
+
rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
425
|
+
return Qnil;
|
426
|
+
|
427
|
+
} else {
|
428
|
+
void *pC = ALLOCA_N(char, DTYPE_SIZES[dtype]),
|
429
|
+
*pS = ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
430
|
+
|
431
|
+
// extract A and B from the NVector (first two elements)
|
432
|
+
void* pA = NM_STORAGE_DENSE(ab)->elements;
|
433
|
+
void* pB = (char*)(NM_STORAGE_DENSE(ab)->elements) + DTYPE_SIZES[dtype];
|
434
|
+
// c and s are output
|
435
|
+
|
436
|
+
ttable[dtype](pA, pB, pC, pS);
|
437
|
+
|
438
|
+
VALUE result = rb_ary_new2(2);
|
439
|
+
rb_ary_store(result, 0, rubyobj_from_cval(pC, dtype).rval);
|
440
|
+
rb_ary_store(result, 1, rubyobj_from_cval(pS, dtype).rval);
|
441
|
+
|
442
|
+
return result;
|
443
|
+
}
|
444
|
+
}
|
445
|
+
|
446
|
+
|
447
|
+
/*
|
448
|
+
* Call any of the cblas_xrot functions as directly as possible.
|
449
|
+
*
|
450
|
+
* xROT is a BLAS level 1 routine (taking two vectors) which applies a plane rotation.
|
451
|
+
*
|
452
|
+
* It's tough to find documentation on xROT. Here are what we think the arguments are for:
|
453
|
+
* * n :: number of elements to consider in x and y
|
454
|
+
* * x :: a vector (expects an NVector)
|
455
|
+
* * incx :: stride of x
|
456
|
+
* * y :: a vector (expects an NVector)
|
457
|
+
* * incy :: stride of y
|
458
|
+
* * c :: cosine of the angle of rotation
|
459
|
+
* * s :: sine of the angle of rotation
|
460
|
+
*
|
461
|
+
* Note that c and s will be the same dtype as x and y, except when x and y are complex. If x and y are complex, c and s
|
462
|
+
* will be float for Complex64 or double for Complex128.
|
463
|
+
*
|
464
|
+
* You probably don't want to call this function. Instead, why don't you try rot, which is more flexible
|
465
|
+
* with its arguments?
|
466
|
+
*
|
467
|
+
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
468
|
+
* handling, so you can easily crash Ruby!
|
469
|
+
*/
|
470
|
+
static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s) {
|
471
|
+
static void (*ttable[nm::NUM_DTYPES])(const int N, void*, const int, void*, const int, const void*, const void*) = {
|
472
|
+
NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
|
473
|
+
nm::math::cblas_rot<float,float>,
|
474
|
+
nm::math::cblas_rot<double,double>,
|
475
|
+
nm::math::cblas_rot<nm::Complex64,float>,
|
476
|
+
nm::math::cblas_rot<nm::Complex128,double>,
|
477
|
+
nm::math::cblas_rot<nm::Rational32,nm::Rational32>,
|
478
|
+
nm::math::cblas_rot<nm::Rational64,nm::Rational64>,
|
479
|
+
nm::math::cblas_rot<nm::Rational128,nm::Rational128>,
|
480
|
+
nm::math::cblas_rot<nm::RubyObject,nm::RubyObject>
|
481
|
+
};
|
482
|
+
|
483
|
+
nm::dtype_t dtype = NM_DTYPE(x);
|
484
|
+
|
485
|
+
|
486
|
+
if (!ttable[dtype]) {
|
487
|
+
rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
488
|
+
return Qfalse;
|
489
|
+
} else {
|
490
|
+
void *pC, *pS;
|
491
|
+
|
492
|
+
// We need to ensure the cosine and sine arguments are the correct dtype -- which may differ from the actual dtype.
|
493
|
+
if (dtype == nm::COMPLEX64) {
|
494
|
+
pC = ALLOCA_N(float,1);
|
495
|
+
pS = ALLOCA_N(float,1);
|
496
|
+
rubyval_to_cval(c, nm::FLOAT32, pC);
|
497
|
+
rubyval_to_cval(s, nm::FLOAT32, pS);
|
498
|
+
} else if (dtype == nm::COMPLEX128) {
|
499
|
+
pC = ALLOCA_N(double,1);
|
500
|
+
pS = ALLOCA_N(double,1);
|
501
|
+
rubyval_to_cval(c, nm::FLOAT64, pC);
|
502
|
+
rubyval_to_cval(s, nm::FLOAT64, pS);
|
503
|
+
} else {
|
504
|
+
pC = ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
505
|
+
pS = ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
506
|
+
rubyval_to_cval(c, dtype, pC);
|
507
|
+
rubyval_to_cval(s, dtype, pS);
|
508
|
+
}
|
509
|
+
|
510
|
+
|
511
|
+
ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), NM_STORAGE_DENSE(y)->elements, FIX2INT(incy), pC, pS);
|
512
|
+
|
513
|
+
return Qtrue;
|
514
|
+
}
|
515
|
+
}
|
516
|
+
|
517
|
+
|
328
518
|
/* Call any of the cblas_xgemm functions as directly as possible.
|
329
519
|
*
|
330
520
|
* The cblas_xgemm functions (dgemm, sgemm, cgemm, and zgemm) define the following operation:
|
@@ -340,7 +530,7 @@ static inline enum CBLAS_ORDER blas_order_sym(VALUE op) {
|
|
340
530
|
* == Arguments
|
341
531
|
* See: http://www.netlib.org/blas/dgemm.f
|
342
532
|
*
|
343
|
-
* You probably don't want to call this function. Instead, why don't you try
|
533
|
+
* You probably don't want to call this function. Instead, why don't you try gemm, which is more flexible
|
344
534
|
* with its arguments?
|
345
535
|
*
|
346
536
|
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
@@ -358,7 +548,7 @@ static VALUE nm_cblas_gemm(VALUE self,
|
|
358
548
|
{
|
359
549
|
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemm, void, const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, int m, int n, int k, void* alpha, void* a, int lda, void* b, int ldb, void* beta, void* c, int ldc);
|
360
550
|
|
361
|
-
dtype_t dtype = NM_DTYPE(a);
|
551
|
+
nm::dtype_t dtype = NM_DTYPE(a);
|
362
552
|
|
363
553
|
void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]),
|
364
554
|
*pBeta = ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
@@ -403,7 +593,7 @@ static VALUE nm_cblas_gemv(VALUE self,
|
|
403
593
|
{
|
404
594
|
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemv, bool, const enum CBLAS_TRANSPOSE trans_a, int m, int n, void* alpha, void* a, int lda, void* x, int incx, void* beta, void* y, int incy);
|
405
595
|
|
406
|
-
dtype_t dtype = NM_DTYPE(a);
|
596
|
+
nm::dtype_t dtype = NM_DTYPE(a);
|
407
597
|
|
408
598
|
void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]),
|
409
599
|
*pBeta = ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
@@ -425,7 +615,7 @@ static VALUE nm_cblas_trsm(VALUE self,
|
|
425
615
|
{
|
426
616
|
static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_SIDE, const enum CBLAS_UPLO,
|
427
617
|
const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
|
428
|
-
const int, const int, const void* alpha, const void* a,
|
618
|
+
const int m, const int n, const void* alpha, const void* a,
|
429
619
|
const int lda, void* b, const int ldb) = {
|
430
620
|
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
431
621
|
nm::math::cblas_trsm<float>,
|
@@ -437,12 +627,121 @@ static VALUE nm_cblas_trsm(VALUE self,
|
|
437
627
|
nm::math::cblas_trsm<nm::RubyObject>
|
438
628
|
};
|
439
629
|
|
440
|
-
dtype_t dtype = NM_DTYPE(a);
|
630
|
+
nm::dtype_t dtype = NM_DTYPE(a);
|
441
631
|
|
442
|
-
|
443
|
-
|
632
|
+
if (!ttable[dtype]) {
|
633
|
+
rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
634
|
+
} else {
|
635
|
+
void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
636
|
+
rubyval_to_cval(alpha, dtype, pAlpha);
|
637
|
+
|
638
|
+
ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
|
639
|
+
}
|
640
|
+
|
641
|
+
return Qtrue;
|
642
|
+
}
|
643
|
+
|
644
|
+
|
645
|
+
static VALUE nm_cblas_trmm(VALUE self,
|
646
|
+
VALUE order,
|
647
|
+
VALUE side, VALUE uplo,
|
648
|
+
VALUE trans_a, VALUE diag,
|
649
|
+
VALUE m, VALUE n,
|
650
|
+
VALUE alpha,
|
651
|
+
VALUE a, VALUE lda,
|
652
|
+
VALUE b, VALUE ldb)
|
653
|
+
{
|
654
|
+
static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER,
|
655
|
+
const enum CBLAS_SIDE, const enum CBLAS_UPLO,
|
656
|
+
const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
|
657
|
+
const int m, const int n, const void* alpha, const void* a,
|
658
|
+
const int lda, void* b, const int ldb) = {
|
659
|
+
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
660
|
+
nm::math::cblas_trmm<float>,
|
661
|
+
nm::math::cblas_trmm<double>,
|
662
|
+
cblas_ctrmm, cblas_ztrmm // call directly, same function signature!
|
663
|
+
/*
|
664
|
+
nm::math::cblas_trmm<nm::Rational32>,
|
665
|
+
nm::math::cblas_trmm<nm::Rational64>,
|
666
|
+
nm::math::cblas_trmm<nm::Rational128>,
|
667
|
+
nm::math::cblas_trmm<nm::RubyObject>*/
|
668
|
+
};
|
669
|
+
|
670
|
+
nm::dtype_t dtype = NM_DTYPE(a);
|
671
|
+
|
672
|
+
if (!ttable[dtype]) {
|
673
|
+
rb_raise(nm_eDataTypeError, "this matrix operation not yet defined for non-BLAS dtypes");
|
674
|
+
} else {
|
675
|
+
void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
676
|
+
rubyval_to_cval(alpha, dtype, pAlpha);
|
677
|
+
|
678
|
+
ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
|
679
|
+
}
|
680
|
+
|
681
|
+
return b;
|
682
|
+
}
|
683
|
+
|
684
|
+
|
685
|
+
static VALUE nm_cblas_syrk(VALUE self,
|
686
|
+
VALUE order,
|
687
|
+
VALUE uplo,
|
688
|
+
VALUE trans,
|
689
|
+
VALUE n, VALUE k,
|
690
|
+
VALUE alpha,
|
691
|
+
VALUE a, VALUE lda,
|
692
|
+
VALUE beta,
|
693
|
+
VALUE c, VALUE ldc)
|
694
|
+
{
|
695
|
+
static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE,
|
696
|
+
const int n, const int k, const void* alpha, const void* a,
|
697
|
+
const int lda, const void* beta, void* c, const int ldc) = {
|
698
|
+
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
699
|
+
nm::math::cblas_syrk<float>,
|
700
|
+
nm::math::cblas_syrk<double>,
|
701
|
+
cblas_csyrk, cblas_zsyrk// call directly, same function signature!
|
702
|
+
/*nm::math::cblas_trsm<nm::Rational32>,
|
703
|
+
nm::math::cblas_trsm<nm::Rational64>,
|
704
|
+
nm::math::cblas_trsm<nm::Rational128>,
|
705
|
+
nm::math::cblas_trsm<nm::RubyObject>*/
|
706
|
+
};
|
707
|
+
|
708
|
+
nm::dtype_t dtype = NM_DTYPE(a);
|
709
|
+
|
710
|
+
if (!ttable[dtype]) {
|
711
|
+
rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
712
|
+
} else {
|
713
|
+
void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]),
|
714
|
+
*pBeta = ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
715
|
+
rubyval_to_cval(alpha, dtype, pAlpha);
|
716
|
+
rubyval_to_cval(beta, dtype, pBeta);
|
717
|
+
|
718
|
+
ttable[dtype](blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
|
719
|
+
}
|
720
|
+
|
721
|
+
return Qtrue;
|
722
|
+
}
|
723
|
+
|
724
|
+
|
725
|
+
static VALUE nm_cblas_herk(VALUE self,
|
726
|
+
VALUE order,
|
727
|
+
VALUE uplo,
|
728
|
+
VALUE trans,
|
729
|
+
VALUE n, VALUE k,
|
730
|
+
VALUE alpha,
|
731
|
+
VALUE a, VALUE lda,
|
732
|
+
VALUE beta,
|
733
|
+
VALUE c, VALUE ldc)
|
734
|
+
{
|
735
|
+
|
736
|
+
nm::dtype_t dtype = NM_DTYPE(a);
|
737
|
+
|
738
|
+
if (dtype == nm::COMPLEX64) {
|
739
|
+
cblas_cherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
|
740
|
+
} else if (dtype == nm::COMPLEX128) {
|
741
|
+
cblas_zherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
|
742
|
+
} else
|
743
|
+
rb_raise(rb_eNotImpError, "this matrix operation undefined for non-complex dtypes");
|
444
744
|
|
445
|
-
ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
|
446
745
|
|
447
746
|
return Qtrue;
|
448
747
|
}
|
@@ -454,7 +753,7 @@ static VALUE nm_cblas_trsm(VALUE self,
|
|
454
753
|
* In-place modification; returns the modified vector as well.
|
455
754
|
*/
|
456
755
|
static VALUE nm_clapack_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx) {
|
457
|
-
dtype_t dtype = NM_DTYPE(vector);
|
756
|
+
nm::dtype_t dtype = NM_DTYPE(vector);
|
458
757
|
|
459
758
|
void* da = ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
460
759
|
rubyval_to_cval(scale, dtype, da);
|
@@ -467,7 +766,43 @@ static VALUE nm_clapack_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VAL
|
|
467
766
|
}
|
468
767
|
|
469
768
|
|
470
|
-
|
769
|
+
static VALUE nm_clapack_lauum(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
|
770
|
+
static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
|
771
|
+
/*nm::math::clapack_lauum<uint8_t, false>,
|
772
|
+
nm::math::clapack_lauum<int8_t, false>,
|
773
|
+
nm::math::clapack_lauum<int16_t, false>,
|
774
|
+
nm::math::clapack_lauum<uint32_t, false>,
|
775
|
+
nm::math::clapack_lauum<uint64_t, false>,*/
|
776
|
+
NULL, NULL, NULL, NULL, NULL,
|
777
|
+
nm::math::clapack_lauum<false, float>,
|
778
|
+
nm::math::clapack_lauum<false, double>,
|
779
|
+
#ifdef HAVE_CLAPACK_H
|
780
|
+
clapack_clauum, clapack_zlauum, // call directly, same function signature!
|
781
|
+
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
782
|
+
nm::math::clapack_lauum<true, nm::Complex64>,
|
783
|
+
nm::math::clapack_lauum<true, nm::Complex128>,
|
784
|
+
#endif
|
785
|
+
/*
|
786
|
+
nm::math::clapack_lauum<nm::Rational32, false>,
|
787
|
+
nm::math::clapack_lauum<nm::Rational64, false>,
|
788
|
+
nm::math::clapack_lauum<nm::Rational128, false>,
|
789
|
+
nm::math::clapack_lauum<nm::RubyObject, false>
|
790
|
+
|
791
|
+
*/
|
792
|
+
};
|
793
|
+
|
794
|
+
if (!ttable[NM_DTYPE(a)]) {
|
795
|
+
rb_raise(rb_eNotImpError, "does not yet work for non-BLAS dtypes (needs herk, syrk, trmm)");
|
796
|
+
} else {
|
797
|
+
// Call either our version of lauum or the LAPACK version.
|
798
|
+
ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
|
799
|
+
}
|
800
|
+
|
801
|
+
return a;
|
802
|
+
}
|
803
|
+
|
804
|
+
|
805
|
+
/* Call any of the clapack_xgetrf functions as directly as possible.
|
471
806
|
*
|
472
807
|
* The clapack_getrf functions (dgetrf, sgetrf, cgetrf, and zgetrf) compute an LU factorization of a general M-by-N
|
473
808
|
* matrix A using partial pivoting with row interchanges.
|
@@ -515,8 +850,12 @@ static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a
|
|
515
850
|
size_t ipiv_size = std::min(M,N);
|
516
851
|
int* ipiv = ALLOCA_N(int, ipiv_size);
|
517
852
|
|
518
|
-
|
519
|
-
|
853
|
+
if (!ttable[NM_DTYPE(a)]) {
|
854
|
+
rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
855
|
+
} else {
|
856
|
+
// Call either our version of getrf or the LAPACK version.
|
857
|
+
ttable[NM_DTYPE(a)](blas_order_sym(order), M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv);
|
858
|
+
}
|
520
859
|
|
521
860
|
// Result will be stored in a. We return ipiv as an array.
|
522
861
|
VALUE ipiv_array = rb_ary_new2(ipiv_size);
|
@@ -528,10 +867,288 @@ static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a
|
|
528
867
|
}
|
529
868
|
|
530
869
|
|
870
|
+
/* Call any of the clapack_xpotrf functions as directly as possible.
|
871
|
+
*
|
872
|
+
* You probably don't want to call this function. Instead, why don't you try clapack_potrf, which is more flexible
|
873
|
+
* with its arguments?
|
874
|
+
*
|
875
|
+
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
876
|
+
* handling, so you can easily crash Ruby!
|
877
|
+
*
|
878
|
+
* Returns an array giving the pivot indices (normally these are argument #5).
|
879
|
+
*/
|
880
|
+
static VALUE nm_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
|
881
|
+
#ifndef HAVE_CLAPACK_H
|
882
|
+
rb_raise(rb_eNotImpError, "potrf currently requires LAPACK");
|
883
|
+
#endif
|
884
|
+
|
885
|
+
static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
|
886
|
+
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
887
|
+
nm::math::clapack_potrf<float>,
|
888
|
+
nm::math::clapack_potrf<double>,
|
889
|
+
#ifdef HAVE_CLAPACK_H
|
890
|
+
clapack_cpotrf, clapack_zpotrf, // call directly, same function signature!
|
891
|
+
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
892
|
+
nm::math::clapack_potrf<nm::Complex64>,
|
893
|
+
nm::math::clapack_potrf<nm::Complex128>,
|
894
|
+
#endif
|
895
|
+
NULL, NULL, NULL, NULL /*
|
896
|
+
nm::math::clapack_potrf<nm::Rational32>,
|
897
|
+
nm::math::clapack_potrf<nm::Rational64>,
|
898
|
+
nm::math::clapack_potrf<nm::Rational128>,
|
899
|
+
nm::math::clapack_potrf<nm::RubyObject> */
|
900
|
+
};
|
901
|
+
|
902
|
+
if (!ttable[NM_DTYPE(a)]) {
|
903
|
+
rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
|
904
|
+
// FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
|
905
|
+
//rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
906
|
+
} else {
|
907
|
+
// Call either our version of potrf or the LAPACK version.
|
908
|
+
ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
|
909
|
+
}
|
910
|
+
|
911
|
+
return a;
|
912
|
+
}
|
913
|
+
|
914
|
+
|
915
|
+
/*
|
916
|
+
* Call any of the clapack_xgetrs functions as directly as possible.
|
917
|
+
*/
|
918
|
+
static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb) {
|
919
|
+
static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE Trans, const int N,
|
920
|
+
const int NRHS, const void* A, const int lda, const int* ipiv, void* B,
|
921
|
+
const int ldb) = {
|
922
|
+
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
923
|
+
nm::math::clapack_getrs<float>,
|
924
|
+
nm::math::clapack_getrs<double>,
|
925
|
+
#ifdef HAVE_CLAPACK_H
|
926
|
+
clapack_cgetrs, clapack_zgetrs, // call directly, same function signature!
|
927
|
+
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
928
|
+
nm::math::clapack_getrs<nm::Complex64>,
|
929
|
+
nm::math::clapack_getrs<nm::Complex128>,
|
930
|
+
#endif
|
931
|
+
nm::math::clapack_getrs<nm::Rational32>,
|
932
|
+
nm::math::clapack_getrs<nm::Rational64>,
|
933
|
+
nm::math::clapack_getrs<nm::Rational128>,
|
934
|
+
nm::math::clapack_getrs<nm::RubyObject>
|
935
|
+
};
|
936
|
+
|
937
|
+
// Allocate the C version of the pivot index array
|
938
|
+
// TODO: Allow for an NVector here also, maybe?
|
939
|
+
int* ipiv_;
|
940
|
+
if (TYPE(ipiv) != T_ARRAY) {
|
941
|
+
rb_raise(rb_eArgError, "ipiv must be of type Array");
|
942
|
+
} else {
|
943
|
+
ipiv_ = ALLOCA_N(int, RARRAY_LEN(ipiv));
|
944
|
+
for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
|
945
|
+
ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
|
946
|
+
}
|
947
|
+
}
|
948
|
+
|
949
|
+
if (!ttable[NM_DTYPE(a)]) {
|
950
|
+
rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
951
|
+
} else {
|
952
|
+
|
953
|
+
// Call either our version of getrs or the LAPACK version.
|
954
|
+
ttable[NM_DTYPE(a)](blas_order_sym(order), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
|
955
|
+
ipiv_, NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
|
956
|
+
}
|
957
|
+
|
958
|
+
// b is both returned and modified directly in the argument list.
|
959
|
+
return b;
|
960
|
+
}
|
961
|
+
|
962
|
+
|
963
|
+
/*
|
964
|
+
* Call any of the clapack_xpotrs functions as directly as possible.
|
965
|
+
*/
|
966
|
+
static VALUE nm_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb) {
|
967
|
+
static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
|
968
|
+
const int NRHS, const void* A, const int lda, void* B, const int ldb) = {
|
969
|
+
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
970
|
+
nm::math::clapack_potrs<float,false>,
|
971
|
+
nm::math::clapack_potrs<double,false>,
|
972
|
+
#ifdef HAVE_CLAPACK_H
|
973
|
+
clapack_cpotrs, clapack_zpotrs, // call directly, same function signature!
|
974
|
+
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
975
|
+
nm::math::clapack_potrs<nm::Complex64,true>,
|
976
|
+
nm::math::clapack_potrs<nm::Complex128,true>,
|
977
|
+
#endif
|
978
|
+
nm::math::clapack_potrs<nm::Rational32,false>,
|
979
|
+
nm::math::clapack_potrs<nm::Rational64,false>,
|
980
|
+
nm::math::clapack_potrs<nm::Rational128,false>,
|
981
|
+
nm::math::clapack_potrs<nm::RubyObject,false>
|
982
|
+
};
|
983
|
+
|
984
|
+
|
985
|
+
if (!ttable[NM_DTYPE(a)]) {
|
986
|
+
rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
987
|
+
} else {
|
988
|
+
|
989
|
+
// Call either our version of potrs or the LAPACK version.
|
990
|
+
ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
|
991
|
+
NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
|
992
|
+
}
|
993
|
+
|
994
|
+
// b is both returned and modified directly in the argument list.
|
995
|
+
return b;
|
996
|
+
}
|
997
|
+
|
998
|
+
|
999
|
+
/* Call any of the clapack_xgetri functions as directly as possible.
|
1000
|
+
*
|
1001
|
+
* You probably don't want to call this function. Instead, why don't you try clapack_getri, which is more flexible
|
1002
|
+
* with its arguments?
|
1003
|
+
*
|
1004
|
+
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
1005
|
+
* handling, so you can easily crash Ruby!
|
1006
|
+
*
|
1007
|
+
* Returns an array giving the pivot indices (normally these are argument #5).
|
1008
|
+
*/
|
1009
|
+
static VALUE nm_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv) {
|
1010
|
+
#ifndef HAVE_CLAPACK_H
|
1011
|
+
rb_raise(rb_eNotImpError, "getri currently requires LAPACK");
|
1012
|
+
#endif
|
1013
|
+
|
1014
|
+
static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int n, void* a, const int lda, const int* ipiv) = {
|
1015
|
+
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
1016
|
+
nm::math::clapack_getri<float>,
|
1017
|
+
nm::math::clapack_getri<double>,
|
1018
|
+
#ifdef HAVE_CLAPACK_H
|
1019
|
+
clapack_cgetri, clapack_zgetri, // call directly, same function signature!
|
1020
|
+
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
1021
|
+
nm::math::clapack_getri<nm::Complex64>,
|
1022
|
+
nm::math::clapack_getri<nm::Complex128>,
|
1023
|
+
#endif
|
1024
|
+
NULL, NULL, NULL, NULL /*
|
1025
|
+
nm::math::clapack_getri<nm::Rational32>,
|
1026
|
+
nm::math::clapack_getri<nm::Rational64>,
|
1027
|
+
nm::math::clapack_getri<nm::Rational128>,
|
1028
|
+
nm::math::clapack_getri<nm::RubyObject> */
|
1029
|
+
};
|
1030
|
+
|
1031
|
+
// Allocate the C version of the pivot index array
|
1032
|
+
// TODO: Allow for an NVector here also, maybe?
|
1033
|
+
int* ipiv_;
|
1034
|
+
if (TYPE(ipiv) != T_ARRAY) {
|
1035
|
+
rb_raise(rb_eArgError, "ipiv must be of type Array");
|
1036
|
+
} else {
|
1037
|
+
ipiv_ = ALLOCA_N(int, RARRAY_LEN(ipiv));
|
1038
|
+
for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
|
1039
|
+
ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
|
1040
|
+
}
|
1041
|
+
}
|
1042
|
+
|
1043
|
+
if (!ttable[NM_DTYPE(a)]) {
|
1044
|
+
rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
|
1045
|
+
// FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
|
1046
|
+
//rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
1047
|
+
} else {
|
1048
|
+
// Call either our version of getri or the LAPACK version.
|
1049
|
+
ttable[NM_DTYPE(a)](blas_order_sym(order), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv_);
|
1050
|
+
}
|
1051
|
+
|
1052
|
+
return a;
|
1053
|
+
}
|
1054
|
+
|
1055
|
+
|
1056
|
+
/* Call any of the clapack_xpotri functions as directly as possible.
|
1057
|
+
*
|
1058
|
+
* You probably don't want to call this function. Instead, why don't you try clapack_potri, which is more flexible
|
1059
|
+
* with its arguments?
|
1060
|
+
*
|
1061
|
+
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
1062
|
+
* handling, so you can easily crash Ruby!
|
1063
|
+
*
|
1064
|
+
* Returns an array giving the pivot indices (normally these are argument #5).
|
1065
|
+
*/
|
1066
|
+
static VALUE nm_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
|
1067
|
+
#ifndef HAVE_CLAPACK_H
|
1068
|
+
rb_raise(rb_eNotImpError, "getri currently requires LAPACK");
|
1069
|
+
#endif
|
1070
|
+
|
1071
|
+
static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
|
1072
|
+
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
1073
|
+
nm::math::clapack_potri<float>,
|
1074
|
+
nm::math::clapack_potri<double>,
|
1075
|
+
#ifdef HAVE_CLAPACK_H
|
1076
|
+
clapack_cpotri, clapack_zpotri, // call directly, same function signature!
|
1077
|
+
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
1078
|
+
nm::math::clapack_potri<nm::Complex64>,
|
1079
|
+
nm::math::clapack_potri<nm::Complex128>,
|
1080
|
+
#endif
|
1081
|
+
NULL, NULL, NULL, NULL /*
|
1082
|
+
nm::math::clapack_getri<nm::Rational32>,
|
1083
|
+
nm::math::clapack_getri<nm::Rational64>,
|
1084
|
+
nm::math::clapack_getri<nm::Rational128>,
|
1085
|
+
nm::math::clapack_getri<nm::RubyObject> */
|
1086
|
+
};
|
1087
|
+
|
1088
|
+
if (!ttable[NM_DTYPE(a)]) {
|
1089
|
+
rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
|
1090
|
+
// FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
|
1091
|
+
//rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
1092
|
+
} else {
|
1093
|
+
// Call either our version of getri or the LAPACK version.
|
1094
|
+
ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
|
1095
|
+
}
|
1096
|
+
|
1097
|
+
return a;
|
1098
|
+
}
|
1099
|
+
|
1100
|
+
|
1101
|
+
/*
|
1102
|
+
* Call any of the clapack_xlaswp functions as directly as possible.
|
1103
|
+
*
|
1104
|
+
* Note that LAPACK's xlaswp functions accept a column-order matrix, but NMatrix uses row-order. Thus, n should be the
|
1105
|
+
* number of rows and lda should be the number of columns, no matter what it says in the documentation for dlaswp.f.
|
1106
|
+
*/
|
1107
|
+
static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx) {
|
1108
|
+
static void (*ttable[nm::NUM_DTYPES])(const int n, void* a, const int lda, const int k1, const int k2, const int* ipiv, const int incx) = {
|
1109
|
+
nm::math::clapack_laswp<uint8_t>,
|
1110
|
+
nm::math::clapack_laswp<int8_t>,
|
1111
|
+
nm::math::clapack_laswp<int16_t>,
|
1112
|
+
nm::math::clapack_laswp<int32_t>,
|
1113
|
+
nm::math::clapack_laswp<int64_t>,
|
1114
|
+
nm::math::clapack_laswp<float>,
|
1115
|
+
nm::math::clapack_laswp<double>,
|
1116
|
+
//#ifdef HAVE_CLAPACK_H // laswp doesn't actually exist in clapack.h!
|
1117
|
+
// clapack_claswp, clapack_zlaswp, // call directly, same function signature!
|
1118
|
+
//#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
1119
|
+
nm::math::clapack_laswp<nm::Complex64>,
|
1120
|
+
nm::math::clapack_laswp<nm::Complex128>,
|
1121
|
+
//#endif
|
1122
|
+
nm::math::clapack_laswp<nm::Rational32>,
|
1123
|
+
nm::math::clapack_laswp<nm::Rational64>,
|
1124
|
+
nm::math::clapack_laswp<nm::Rational128>,
|
1125
|
+
nm::math::clapack_laswp<nm::RubyObject>
|
1126
|
+
};
|
1127
|
+
|
1128
|
+
// Allocate the C version of the pivot index array
|
1129
|
+
// TODO: Allow for an NVector here also, maybe?
|
1130
|
+
int* ipiv_;
|
1131
|
+
if (TYPE(ipiv) != T_ARRAY) {
|
1132
|
+
rb_raise(rb_eArgError, "ipiv must be of type Array");
|
1133
|
+
} else {
|
1134
|
+
ipiv_ = ALLOCA_N(int, RARRAY_LEN(ipiv));
|
1135
|
+
for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
|
1136
|
+
ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
|
1137
|
+
}
|
1138
|
+
}
|
1139
|
+
|
1140
|
+
// Call either our version of laswp or the LAPACK version.
|
1141
|
+
ttable[NM_DTYPE(a)](FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), FIX2INT(k1), FIX2INT(k2), ipiv_, FIX2INT(incx));
|
1142
|
+
|
1143
|
+
// a is both returned and modified directly in the argument list.
|
1144
|
+
return a;
|
1145
|
+
}
|
1146
|
+
|
1147
|
+
|
531
1148
|
/*
|
532
1149
|
* C accessor for calculating an exact determinant.
|
533
1150
|
*/
|
534
|
-
void nm_math_det_exact(const int M, const void* elements, const int lda, dtype_t dtype, void* result) {
|
1151
|
+
void nm_math_det_exact(const int M, const void* elements, const int lda, nm::dtype_t dtype, void* result) {
|
535
1152
|
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::det_exact, void, const int M, const void* A_elements, const int lda, void* result_arg);
|
536
1153
|
|
537
1154
|
ttable[dtype](M, elements, lda, result);
|