pnmatrix 1.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ext/nmatrix/binary_format.txt +53 -0
- data/ext/nmatrix/data/complex.h +388 -0
- data/ext/nmatrix/data/data.cpp +274 -0
- data/ext/nmatrix/data/data.h +651 -0
- data/ext/nmatrix/data/meta.h +64 -0
- data/ext/nmatrix/data/ruby_object.h +386 -0
- data/ext/nmatrix/extconf.rb +70 -0
- data/ext/nmatrix/math/asum.h +99 -0
- data/ext/nmatrix/math/cblas_enums.h +36 -0
- data/ext/nmatrix/math/cblas_templates_core.h +507 -0
- data/ext/nmatrix/math/gemm.h +241 -0
- data/ext/nmatrix/math/gemv.h +178 -0
- data/ext/nmatrix/math/getrf.h +255 -0
- data/ext/nmatrix/math/getrs.h +121 -0
- data/ext/nmatrix/math/imax.h +82 -0
- data/ext/nmatrix/math/laswp.h +165 -0
- data/ext/nmatrix/math/long_dtype.h +62 -0
- data/ext/nmatrix/math/magnitude.h +54 -0
- data/ext/nmatrix/math/math.h +751 -0
- data/ext/nmatrix/math/nrm2.h +165 -0
- data/ext/nmatrix/math/rot.h +117 -0
- data/ext/nmatrix/math/rotg.h +106 -0
- data/ext/nmatrix/math/scal.h +71 -0
- data/ext/nmatrix/math/trsm.h +336 -0
- data/ext/nmatrix/math/util.h +162 -0
- data/ext/nmatrix/math.cpp +1368 -0
- data/ext/nmatrix/nm_memory.h +60 -0
- data/ext/nmatrix/nmatrix.cpp +285 -0
- data/ext/nmatrix/nmatrix.h +476 -0
- data/ext/nmatrix/ruby_constants.cpp +151 -0
- data/ext/nmatrix/ruby_constants.h +106 -0
- data/ext/nmatrix/ruby_nmatrix.c +3130 -0
- data/ext/nmatrix/storage/common.cpp +77 -0
- data/ext/nmatrix/storage/common.h +183 -0
- data/ext/nmatrix/storage/dense/dense.cpp +1096 -0
- data/ext/nmatrix/storage/dense/dense.h +129 -0
- data/ext/nmatrix/storage/list/list.cpp +1628 -0
- data/ext/nmatrix/storage/list/list.h +138 -0
- data/ext/nmatrix/storage/storage.cpp +730 -0
- data/ext/nmatrix/storage/storage.h +99 -0
- data/ext/nmatrix/storage/yale/class.h +1139 -0
- data/ext/nmatrix/storage/yale/iterators/base.h +143 -0
- data/ext/nmatrix/storage/yale/iterators/iterator.h +131 -0
- data/ext/nmatrix/storage/yale/iterators/row.h +450 -0
- data/ext/nmatrix/storage/yale/iterators/row_stored.h +140 -0
- data/ext/nmatrix/storage/yale/iterators/row_stored_nd.h +169 -0
- data/ext/nmatrix/storage/yale/iterators/stored_diagonal.h +124 -0
- data/ext/nmatrix/storage/yale/math/transpose.h +110 -0
- data/ext/nmatrix/storage/yale/yale.cpp +2074 -0
- data/ext/nmatrix/storage/yale/yale.h +203 -0
- data/ext/nmatrix/types.h +55 -0
- data/ext/nmatrix/util/io.cpp +279 -0
- data/ext/nmatrix/util/io.h +115 -0
- data/ext/nmatrix/util/sl_list.cpp +627 -0
- data/ext/nmatrix/util/sl_list.h +144 -0
- data/ext/nmatrix/util/util.h +78 -0
- data/lib/nmatrix/blas.rb +378 -0
- data/lib/nmatrix/cruby/math.rb +744 -0
- data/lib/nmatrix/enumerate.rb +253 -0
- data/lib/nmatrix/homogeneous.rb +241 -0
- data/lib/nmatrix/io/fortran_format.rb +138 -0
- data/lib/nmatrix/io/harwell_boeing.rb +221 -0
- data/lib/nmatrix/io/market.rb +263 -0
- data/lib/nmatrix/io/point_cloud.rb +189 -0
- data/lib/nmatrix/jruby/decomposition.rb +24 -0
- data/lib/nmatrix/jruby/enumerable.rb +13 -0
- data/lib/nmatrix/jruby/error.rb +4 -0
- data/lib/nmatrix/jruby/math.rb +501 -0
- data/lib/nmatrix/jruby/nmatrix_java.rb +840 -0
- data/lib/nmatrix/jruby/operators.rb +283 -0
- data/lib/nmatrix/jruby/slice.rb +264 -0
- data/lib/nmatrix/lapack_core.rb +181 -0
- data/lib/nmatrix/lapack_plugin.rb +44 -0
- data/lib/nmatrix/math.rb +953 -0
- data/lib/nmatrix/mkmf.rb +100 -0
- data/lib/nmatrix/monkeys.rb +137 -0
- data/lib/nmatrix/nmatrix.rb +1172 -0
- data/lib/nmatrix/rspec.rb +75 -0
- data/lib/nmatrix/shortcuts.rb +1163 -0
- data/lib/nmatrix/version.rb +39 -0
- data/lib/nmatrix/yale_functions.rb +118 -0
- data/lib/nmatrix.rb +28 -0
- data/spec/00_nmatrix_spec.rb +892 -0
- data/spec/01_enum_spec.rb +196 -0
- data/spec/02_slice_spec.rb +407 -0
- data/spec/03_nmatrix_monkeys_spec.rb +80 -0
- data/spec/2x2_dense_double.mat +0 -0
- data/spec/4x4_sparse.mat +0 -0
- data/spec/4x5_dense.mat +0 -0
- data/spec/blas_spec.rb +215 -0
- data/spec/elementwise_spec.rb +311 -0
- data/spec/homogeneous_spec.rb +100 -0
- data/spec/io/fortran_format_spec.rb +88 -0
- data/spec/io/harwell_boeing_spec.rb +98 -0
- data/spec/io/test.rua +9 -0
- data/spec/io_spec.rb +159 -0
- data/spec/lapack_core_spec.rb +482 -0
- data/spec/leakcheck.rb +16 -0
- data/spec/math_spec.rb +1363 -0
- data/spec/nmatrix_yale_resize_test_associations.yaml +2802 -0
- data/spec/nmatrix_yale_spec.rb +286 -0
- data/spec/rspec_monkeys.rb +56 -0
- data/spec/rspec_spec.rb +35 -0
- data/spec/shortcuts_spec.rb +474 -0
- data/spec/slice_set_spec.rb +162 -0
- data/spec/spec_helper.rb +172 -0
- data/spec/stat_spec.rb +214 -0
- data/spec/test.pcd +20 -0
- data/spec/utm5940.mtx +83844 -0
- metadata +295 -0
|
@@ -0,0 +1,1368 @@
|
|
|
1
|
+
/////////////////////////////////////////////////////////////////////
|
|
2
|
+
// = NMatrix
|
|
3
|
+
//
|
|
4
|
+
// A linear algebra library for scientific computation in Ruby.
|
|
5
|
+
// NMatrix is part of SciRuby.
|
|
6
|
+
//
|
|
7
|
+
// NMatrix was originally inspired by and derived from NArray, by
|
|
8
|
+
// Masahiro Tanaka: http://narray.rubyforge.org
|
|
9
|
+
//
|
|
10
|
+
// == Copyright Information
|
|
11
|
+
//
|
|
12
|
+
// SciRuby is Copyright (c) 2010 - present, Ruby Science Foundation
|
|
13
|
+
// NMatrix is Copyright (c) 2012 - present, John Woods and the Ruby Science Foundation
|
|
14
|
+
//
|
|
15
|
+
// Please see LICENSE.txt for additional copyright notices.
|
|
16
|
+
//
|
|
17
|
+
// == Contributing
|
|
18
|
+
//
|
|
19
|
+
// By contributing source code to SciRuby, you agree to be bound by
|
|
20
|
+
// our Contributor Agreement:
|
|
21
|
+
//
|
|
22
|
+
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
|
|
23
|
+
//
|
|
24
|
+
// == math.cpp
|
|
25
|
+
//
|
|
26
|
+
// Ruby-exposed CBLAS and LAPACK functions that are available without
|
|
27
|
+
// an external library.
|
|
28
|
+
//
|
|
29
|
+
// === Procedure for adding CBLAS functions to math.cpp/math.h:
|
|
30
|
+
//
|
|
31
|
+
// This procedure is written as if for a fictional function with double
|
|
32
|
+
// version dbacon, which we'll say is from CBLAS.
|
|
33
|
+
//
|
|
34
|
+
// 1. Write a default templated version which probably returns a boolean.
|
|
35
|
+
// Call it bacon, and put it in math.h.
|
|
36
|
+
//
|
|
37
|
+
// template <typename DType>
|
|
38
|
+
// bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, DType* A, ...) {
|
|
39
|
+
// rb_raise(rb_eNotImpError, "only implemented for ATLAS types (float32, float64, complex64, complex128)");
|
|
40
|
+
// }
|
|
41
|
+
//
|
|
42
|
+
// Make sure this is in namespace nm::math
|
|
43
|
+
//
|
|
44
|
+
// 2. In math.cpp, add a templated inline static version of the function which takes
|
|
45
|
+
// only void* pointers and uses static_cast to convert them to the
|
|
46
|
+
// proper dtype. This should also be in namespace nm::math
|
|
47
|
+
//
|
|
48
|
+
// This function may also need to switch m and n if these arguments are given.
|
|
49
|
+
//
|
|
50
|
+
// For an example, see cblas_gemm. This function should do nothing other than cast
|
|
51
|
+
// appropriately. If cblas_dbacon, cblas_sbacon, cblas_cbacon, and cblas_zbacon
|
|
52
|
+
// all take void* only, and no other pointers that vary between functions, you can skip
|
|
53
|
+
// this particular step -- as we can call them directly using a custom function pointer
|
|
54
|
+
// array (same function signature!).
|
|
55
|
+
//
|
|
56
|
+
// This version of the function will be the one exposed through NMatrix::BLAS. We
|
|
57
|
+
// want it to be as close to the actual BLAS version of the function as possible,
|
|
58
|
+
// and with as few checks as possible.
|
|
59
|
+
//
|
|
60
|
+
// You will probably need a forward declaration in the extern "C" block.
|
|
61
|
+
//
|
|
62
|
+
// Note: In that case, the function you wrote in Step 1 should also take exactly the
|
|
63
|
+
// same arguments as cblas_xbacon. Otherwise Bad Things will happen.
|
|
64
|
+
//
|
|
65
|
+
// 3. In cblas_templates_core.h, add a default template like in step 1 (which will just
|
|
66
|
+
// call nm::math::bacon()) and also
|
|
67
|
+
// inline specialized versions of bacon for the different BLAS types.
|
|
68
|
+
// This will allow both nmatrix-atlas and nmatrix-lapacke to use the optimized version
|
|
69
|
+
// of bacon from whatever external library is available, as well as the internal version
|
|
70
|
+
// if an external version is not available. These functions will end up in a namsespace
|
|
71
|
+
// like nm::math::atlas, but don't explicitly put them in a namespace, they will get
|
|
72
|
+
// put in the appropriate namespace when cblas_templates_core.h is included.
|
|
73
|
+
//
|
|
74
|
+
// template <typename DType>
|
|
75
|
+
// inline bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, DType* A, ...) {
|
|
76
|
+
// nm::math::bacon(trans, M, N, A, ...);
|
|
77
|
+
// }
|
|
78
|
+
//
|
|
79
|
+
// template <>
|
|
80
|
+
// inline bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, float* A, ...) {
|
|
81
|
+
// cblas_sbacon(trans, M, N, A, ...);
|
|
82
|
+
// return true;
|
|
83
|
+
// }
|
|
84
|
+
//
|
|
85
|
+
// Note that you should do everything in your power here to parse any return values
|
|
86
|
+
// cblas_sbacon may give you. We're not trying very hard in this example, but you might
|
|
87
|
+
// look at getrf to see how it might be done.
|
|
88
|
+
//
|
|
89
|
+
// 4. Write the C function nm_cblas_bacon, which is what Ruby will call. Use the example
|
|
90
|
+
// of nm_cblas_gemm below. Also you must add a similar function in math_atlas.cpp
|
|
91
|
+
// and math_lapacke.cpp
|
|
92
|
+
//
|
|
93
|
+
// 5. Expose the function in nm_math_init_blas(), in math.cpp:
|
|
94
|
+
//
|
|
95
|
+
// rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_bacon", (METHOD)nm_cblas_bacon, 5);
|
|
96
|
+
//
|
|
97
|
+
// Do something similar in math_atlas.cpp and math_lapacke.cpp to add the function
|
|
98
|
+
// to the plugin gems.
|
|
99
|
+
//
|
|
100
|
+
// Here, we're telling Ruby that nm_cblas_bacon takes five arguments as a Ruby function.
|
|
101
|
+
//
|
|
102
|
+
// 6. In blas.rb, write a bacon function which accesses cblas_bacon, but does all the
|
|
103
|
+
// sanity checks we left out in step 2.
|
|
104
|
+
//
|
|
105
|
+
// 7. Write tests for NMatrix::BLAS::bacon, confirming that it works for the ATLAS dtypes.
|
|
106
|
+
//
|
|
107
|
+
// 8. After you get it working properly with CBLAS, download dbacon.f from NETLIB, and use
|
|
108
|
+
// f2c to convert it to C. Clean it up so it's readable. Remove the extra indices -- f2c
|
|
109
|
+
// inserts a lot of unnecessary stuff.
|
|
110
|
+
//
|
|
111
|
+
// Copy and paste the output into the default templated function you wrote in Step 1.
|
|
112
|
+
// Fix it so it works as a template instead of just for doubles.
|
|
113
|
+
//
|
|
114
|
+
// Because of step 3, this will automatically also work for the nmatrix-atlas
|
|
115
|
+
// and nmatrix-lapacke implementations.
|
|
116
|
+
//
|
|
117
|
+
// 9. Write tests to confirm that it works for all data types.
|
|
118
|
+
//
|
|
119
|
+
// 10. See about adding a Ruby-like interface, such as matrix_matrix_multiply for cblas_gemm,
|
|
120
|
+
// or matrix_vector_multiply for cblas_gemv. This step is not mandatory.
|
|
121
|
+
//
|
|
122
|
+
// 11. Pull request!
|
|
123
|
+
|
|
124
|
+
/*
|
|
125
|
+
* Project Includes
|
|
126
|
+
*/
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
#include <ruby.h>
|
|
130
|
+
#include <algorithm>
|
|
131
|
+
#include <limits>
|
|
132
|
+
#include <cmath>
|
|
133
|
+
|
|
134
|
+
#include "math/cblas_enums.h"
|
|
135
|
+
|
|
136
|
+
#include "data/data.h"
|
|
137
|
+
#include "math/magnitude.h"
|
|
138
|
+
#include "math/imax.h"
|
|
139
|
+
#include "math/scal.h"
|
|
140
|
+
#include "math/laswp.h"
|
|
141
|
+
#include "math/trsm.h"
|
|
142
|
+
#include "math/gemm.h"
|
|
143
|
+
#include "math/gemv.h"
|
|
144
|
+
#include "math/asum.h"
|
|
145
|
+
#include "math/nrm2.h"
|
|
146
|
+
#include "math/getrf.h"
|
|
147
|
+
#include "math/getrs.h"
|
|
148
|
+
#include "math/rot.h"
|
|
149
|
+
#include "math/rotg.h"
|
|
150
|
+
#include "math/math.h"
|
|
151
|
+
#include "math/util.h"
|
|
152
|
+
#include "storage/dense/dense.h"
|
|
153
|
+
|
|
154
|
+
#include "nmatrix.h"
|
|
155
|
+
#include "ruby_constants.h"
|
|
156
|
+
|
|
157
|
+
/*
|
|
158
|
+
* Forward Declarations
|
|
159
|
+
*/
|
|
160
|
+
|
|
161
|
+
extern "C" {
|
|
162
|
+
/* BLAS Level 1. */
|
|
163
|
+
static VALUE nm_cblas_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx);
|
|
164
|
+
static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx);
|
|
165
|
+
static VALUE nm_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx);
|
|
166
|
+
static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s);
|
|
167
|
+
static VALUE nm_cblas_rotg(VALUE self, VALUE ab);
|
|
168
|
+
static VALUE nm_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx);
|
|
169
|
+
|
|
170
|
+
/* BLAS Level 2. */
|
|
171
|
+
static VALUE nm_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
|
|
172
|
+
VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);
|
|
173
|
+
|
|
174
|
+
/* BLAS Level 3. */
|
|
175
|
+
static VALUE nm_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha,
|
|
176
|
+
VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc);
|
|
177
|
+
static VALUE nm_cblas_trsm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
|
|
178
|
+
VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
|
|
179
|
+
|
|
180
|
+
/* LAPACK. */
|
|
181
|
+
static VALUE nm_has_clapack(VALUE self);
|
|
182
|
+
static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda);
|
|
183
|
+
static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb);
|
|
184
|
+
static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx);
|
|
185
|
+
} // end of extern "C" block
|
|
186
|
+
|
|
187
|
+
////////////////////
|
|
188
|
+
// Math Functions //
|
|
189
|
+
////////////////////
|
|
190
|
+
|
|
191
|
+
namespace nm {
|
|
192
|
+
namespace math {
|
|
193
|
+
|
|
194
|
+
/*
|
|
195
|
+
* Calculate the determinant for a dense matrix (A [elements]) of size 2 or 3. Return the result.
|
|
196
|
+
*/
|
|
197
|
+
template <typename DType>
|
|
198
|
+
void det_exact_from_dense(const int M, const void* A_elements, const int lda, void* result_arg) {
|
|
199
|
+
DType* result = reinterpret_cast<DType*>(result_arg);
|
|
200
|
+
const DType* A = reinterpret_cast<const DType*>(A_elements);
|
|
201
|
+
|
|
202
|
+
typename LongDType<DType>::type x, y;
|
|
203
|
+
|
|
204
|
+
if (M == 2) {
|
|
205
|
+
*result = A[0] * A[lda+1] - A[1] * A[lda];
|
|
206
|
+
} else if (M == 3) {
|
|
207
|
+
x = A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]; // ei - fh
|
|
208
|
+
y = A[lda] * A[2*lda+2] - A[lda+2] * A[2*lda]; // fg - di
|
|
209
|
+
x = A[0]*x - A[1]*y ; // a*(ei-fh) - b*(fg-di)
|
|
210
|
+
|
|
211
|
+
y = A[lda] * A[2*lda+1] - A[lda+1] * A[2*lda]; // dh - eg
|
|
212
|
+
*result = A[2]*y + x; // c*(dh-eg) + _
|
|
213
|
+
} else if (M < 2) {
|
|
214
|
+
rb_raise(rb_eArgError, "can only calculate exact determinant of a square matrix of size 2 or larger");
|
|
215
|
+
} else {
|
|
216
|
+
rb_raise(rb_eNotImpError, "exact determinant calculation needed for matrices larger than 3x3");
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
//we can't do det_exact on byte, because it will want to return a byte (unsigned), but determinants can be negative, even if all elements of the matrix are positive
|
|
221
|
+
template <>
|
|
222
|
+
void det_exact_from_dense<uint8_t>(const int M, const void* A_elements, const int lda, void* result_arg) {
|
|
223
|
+
rb_raise(nm_eDataTypeError, "cannot call det_exact on unsigned type");
|
|
224
|
+
}
|
|
225
|
+
/*
|
|
226
|
+
* Calculate the determinant for a yale matrix (storage) of size 2 or 3. Return the result.
|
|
227
|
+
*/
|
|
228
|
+
template <typename DType>
|
|
229
|
+
void det_exact_from_yale(const int M, const YALE_STORAGE* storage, const int lda, void* result_arg) {
|
|
230
|
+
DType* result = reinterpret_cast<DType*>(result_arg);
|
|
231
|
+
IType* ija = reinterpret_cast<IType *>(storage->ija);
|
|
232
|
+
DType* a = reinterpret_cast<DType*>(storage->a);
|
|
233
|
+
IType col_pos = storage->shape[0] + 1;
|
|
234
|
+
if (M == 2) {
|
|
235
|
+
if (ija[2] - ija[0] == 2) {
|
|
236
|
+
*result = a[0] * a[1] - a[col_pos] * a[col_pos+1];
|
|
237
|
+
}
|
|
238
|
+
else { *result = a[0] * a[1]; }
|
|
239
|
+
} else if (M == 3) {
|
|
240
|
+
DType m[3][3];
|
|
241
|
+
for (int i = 0; i < 3; ++i) {
|
|
242
|
+
m[i][i] = a[i];
|
|
243
|
+
switch(ija[i+1] - ija[i]) {
|
|
244
|
+
case 2:
|
|
245
|
+
m[i][ija[col_pos]] = a[col_pos];
|
|
246
|
+
m[i][ija[col_pos+1]] = a[col_pos+1];
|
|
247
|
+
col_pos += 2;
|
|
248
|
+
break;
|
|
249
|
+
case 1:
|
|
250
|
+
m[i][(i+1)%3] = m[i][(i+2)%3] = 0;
|
|
251
|
+
m[i][ija[col_pos]] = a[col_pos];
|
|
252
|
+
++col_pos;
|
|
253
|
+
break;
|
|
254
|
+
case 0:
|
|
255
|
+
m[i][(i+1)%3] = m[i][(i+2)%3] = 0;
|
|
256
|
+
break;
|
|
257
|
+
default:
|
|
258
|
+
rb_raise(rb_eArgError, "some value in IJA is incorrect!");
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
*result =
|
|
262
|
+
m[0][0] * m[1][1] * m[2][2] + m[0][1] * m[1][2] * m[2][0] + m[0][2] * m[1][0] * m[2][1]
|
|
263
|
+
- m[0][0] * m[1][2] * m[2][1] - m[0][1] * m[1][0] * m[2][2] - m[0][2] * m[1][1] * m[2][0];
|
|
264
|
+
|
|
265
|
+
} else if (M < 2) {
|
|
266
|
+
rb_raise(rb_eArgError, "can only calculate exact determinant of a square matrix of size 2 or larger");
|
|
267
|
+
} else {
|
|
268
|
+
rb_raise(rb_eNotImpError, "exact determinant calculation needed for matrices larger than 3x3");
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/*
|
|
273
|
+
* Solve a system of linear equations using forward-substution followed by
|
|
274
|
+
* back substution from the LU factorization of the matrix of co-efficients.
|
|
275
|
+
* Replaces x_elements with the result. Works only with non-integer, non-object
|
|
276
|
+
* data types.
|
|
277
|
+
*
|
|
278
|
+
* args - r -> The number of rows of the matrix.
|
|
279
|
+
* lu_elements -> Elements of the LU decomposition of the co-efficients
|
|
280
|
+
* matrix, as a contiguos array.
|
|
281
|
+
* b_elements -> Elements of the the right hand sides, as a contiguous array.
|
|
282
|
+
* x_elements -> The array that will contain the results of the computation.
|
|
283
|
+
* pivot -> Positions of permuted rows.
|
|
284
|
+
*/
|
|
285
|
+
template <typename DType>
|
|
286
|
+
void solve(const int r, const void* lu_elements, const void* b_elements, void* x_elements, const int* pivot) {
|
|
287
|
+
int ii = 0, ip;
|
|
288
|
+
DType sum;
|
|
289
|
+
|
|
290
|
+
const DType* matrix = reinterpret_cast<const DType*>(lu_elements);
|
|
291
|
+
const DType* b = reinterpret_cast<const DType*>(b_elements);
|
|
292
|
+
DType* x = reinterpret_cast<DType*>(x_elements);
|
|
293
|
+
|
|
294
|
+
for (int i = 0; i < r; ++i) { x[i] = b[i]; }
|
|
295
|
+
for (int i = 0; i < r; ++i) { // forward substitution loop
|
|
296
|
+
ip = pivot[i];
|
|
297
|
+
sum = x[ip];
|
|
298
|
+
x[ip] = x[i];
|
|
299
|
+
|
|
300
|
+
if (ii != 0) {
|
|
301
|
+
for (int j = ii - 1;j < i; ++j) { sum = sum - matrix[i * r + j] * x[j]; }
|
|
302
|
+
}
|
|
303
|
+
else if (sum != 0.0) {
|
|
304
|
+
ii = i + 1;
|
|
305
|
+
}
|
|
306
|
+
x[i] = sum;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
for (int i = r - 1; i >= 0; --i) { // back substitution loop
|
|
310
|
+
sum = x[i];
|
|
311
|
+
for (int j = i + 1; j < r; j++) { sum = sum - matrix[i * r + j] * x[j]; }
|
|
312
|
+
x[i] = sum/matrix[i * r + i];
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
/*
|
|
317
|
+
* Calculates in-place inverse of A_elements. Uses Gauss-Jordan elimination technique.
|
|
318
|
+
* In-place inversion of the matrix saves on memory and time.
|
|
319
|
+
*
|
|
320
|
+
* args - M - Shape of the matrix
|
|
321
|
+
* a_elements - A duplicate of the original expressed as a contiguos array
|
|
322
|
+
*/
|
|
323
|
+
template <typename DType>
|
|
324
|
+
void inverse(const int M, void* a_elements) {
|
|
325
|
+
DType* matrix = reinterpret_cast<DType*>(a_elements);
|
|
326
|
+
int row_index[M]; // arrays for keeping track of column scrambling
|
|
327
|
+
int col_index[M];
|
|
328
|
+
|
|
329
|
+
for (int k = 0;k < M; ++k) {
|
|
330
|
+
typename MagnitudeDType<DType>::type akk;
|
|
331
|
+
akk = magnitude( matrix[k * (M + 1)] ); // diagonal element
|
|
332
|
+
|
|
333
|
+
int interchange = k;
|
|
334
|
+
|
|
335
|
+
for (int row = k + 1; row < M; ++row) {
|
|
336
|
+
typename MagnitudeDType<DType>::type big;
|
|
337
|
+
big = magnitude( matrix[M*row + k] ); // element below the temp pivot
|
|
338
|
+
|
|
339
|
+
if ( big > akk ) {
|
|
340
|
+
interchange = row;
|
|
341
|
+
akk = big;
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
if (interchange != k) { // check if rows need flipping
|
|
346
|
+
DType temp;
|
|
347
|
+
|
|
348
|
+
for (int col = 0; col < M; ++col) {
|
|
349
|
+
NM_SWAP(matrix[interchange*M + col], matrix[k*M + col], temp);
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
row_index[k] = interchange;
|
|
354
|
+
col_index[k] = k;
|
|
355
|
+
|
|
356
|
+
if (matrix[k * (M + 1)] == (DType)(0)) {
|
|
357
|
+
rb_raise(rb_eZeroDivError, "Expected Non-Singular Matrix.");
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
DType pivot = matrix[k * (M + 1)];
|
|
361
|
+
matrix[k * (M + 1)] = (DType)(1); // set diagonal as 1 for in-place inversion
|
|
362
|
+
|
|
363
|
+
for (int col = 0; col < M; ++col) {
|
|
364
|
+
// divide each element in the kth row with the pivot
|
|
365
|
+
matrix[k*M + col] = matrix[k*M + col] / pivot;
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
for (int kk = 0; kk < M; ++kk) { // iterate and reduce all rows
|
|
369
|
+
if (kk == k) continue;
|
|
370
|
+
|
|
371
|
+
DType dum = matrix[k + M*kk];
|
|
372
|
+
matrix[k + M*kk] = (DType)(0); // prepare for inplace inversion
|
|
373
|
+
for (int col = 0; col < M; ++col) {
|
|
374
|
+
matrix[M*kk + col] = matrix[M*kk + col] - matrix[M*k + col] * dum;
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
// Unscramble columns
|
|
380
|
+
DType temp;
|
|
381
|
+
|
|
382
|
+
for (int k = M - 1; k >= 0; --k) {
|
|
383
|
+
if (row_index[k] != col_index[k]) {
|
|
384
|
+
|
|
385
|
+
for (int row = 0; row < M; ++row) {
|
|
386
|
+
NM_SWAP(matrix[row * M + row_index[k]], matrix[row * M + col_index[k]],
|
|
387
|
+
temp);
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
/*
|
|
394
|
+
* Reduce a square matrix to hessenberg form with householder transforms
|
|
395
|
+
*
|
|
396
|
+
* == Arguments
|
|
397
|
+
*
|
|
398
|
+
* nrows - The number of rows present in matrix a.
|
|
399
|
+
* a_elements - Elements of the matrix to be reduced in 1D array form.
|
|
400
|
+
*
|
|
401
|
+
* == References
|
|
402
|
+
*
|
|
403
|
+
* http://www.mymathlib.com/c_source/matrices/eigen/hessenberg_orthog.c
|
|
404
|
+
* This code has been included by permission of the author.
|
|
405
|
+
*/
|
|
406
|
+
template <typename DType>
|
|
407
|
+
void hessenberg(const int nrows, void* a_elements) {
|
|
408
|
+
DType* a = reinterpret_cast<DType*>(a_elements);
|
|
409
|
+
DType* u = new DType[nrows]; // auxillary storage for the chosen vector
|
|
410
|
+
DType sum_of_squares, *p_row, *psubdiag, *p_a, scale, innerproduct;
|
|
411
|
+
int i, k, col;
|
|
412
|
+
|
|
413
|
+
// For each column use a Householder transformation to zero all entries
|
|
414
|
+
// below the subdiagonal.
|
|
415
|
+
for (psubdiag = a + nrows, col = 0; col < nrows - 2; psubdiag += nrows + 1,
|
|
416
|
+
col++) {
|
|
417
|
+
// Calculate the signed square root of the sum of squares of the
|
|
418
|
+
// elements below the diagonal.
|
|
419
|
+
|
|
420
|
+
for (p_a = psubdiag, sum_of_squares = 0.0, i = col + 1; i < nrows;
|
|
421
|
+
p_a += nrows, i++) {
|
|
422
|
+
sum_of_squares += *p_a * *p_a;
|
|
423
|
+
}
|
|
424
|
+
if (sum_of_squares == 0.0) { continue; }
|
|
425
|
+
sum_of_squares = std::sqrt(sum_of_squares);
|
|
426
|
+
|
|
427
|
+
if ( *psubdiag >= 0.0 ) { sum_of_squares = -sum_of_squares; }
|
|
428
|
+
|
|
429
|
+
// Calculate the Householder transformation Q = I - 2uu'/u'u.
|
|
430
|
+
u[col + 1] = *psubdiag - sum_of_squares;
|
|
431
|
+
*psubdiag = sum_of_squares;
|
|
432
|
+
|
|
433
|
+
for (p_a = psubdiag + nrows, i = col + 2; i < nrows; p_a += nrows, i++) {
|
|
434
|
+
u[i] = *p_a;
|
|
435
|
+
*p_a = 0.0;
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
// Premultiply A by Q
|
|
439
|
+
scale = -1.0 / (sum_of_squares * u[col+1]);
|
|
440
|
+
for (p_row = psubdiag - col, i = col + 1; i < nrows; i++) {
|
|
441
|
+
p_a = a + nrows * (col + 1) + i;
|
|
442
|
+
for (innerproduct = 0.0, k = col + 1; k < nrows; p_a += nrows, k++) {
|
|
443
|
+
innerproduct += u[k] * *p_a;
|
|
444
|
+
}
|
|
445
|
+
innerproduct *= scale;
|
|
446
|
+
for (p_a = p_row + i, k = col + 1; k < nrows; p_a += nrows, k++) {
|
|
447
|
+
*p_a -= u[k] * innerproduct;
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
// Postmultiply QA by Q
|
|
452
|
+
for (p_row = a, i = 0; i < nrows; p_row += nrows, i++) {
|
|
453
|
+
for (innerproduct = 0.0, k = col + 1; k < nrows; k++) {
|
|
454
|
+
innerproduct += u[k] * *(p_row + k);
|
|
455
|
+
}
|
|
456
|
+
innerproduct *= scale;
|
|
457
|
+
|
|
458
|
+
for (k = col + 1; k < nrows; k++) {
|
|
459
|
+
*(p_row + k) -= u[k] * innerproduct;
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
delete[] u;
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
void raise_not_invertible_error() {
|
|
468
|
+
rb_raise(nm_eNotInvertibleError,
|
|
469
|
+
"matrix must have non-zero determinant to be invertible (not getting this error does not mean matrix is invertible if you're dealing with floating points)");
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
/*
|
|
473
|
+
* Calculate the exact inverse for a dense matrix (A [elements]) of size 2 or 3. Places the result in B_elements.
|
|
474
|
+
*/
|
|
475
|
+
template <typename DType>
|
|
476
|
+
void inverse_exact_from_dense(const int M, const void* A_elements,
|
|
477
|
+
const int lda, void* B_elements, const int ldb) {
|
|
478
|
+
|
|
479
|
+
const DType* A = reinterpret_cast<const DType*>(A_elements);
|
|
480
|
+
DType* B = reinterpret_cast<DType*>(B_elements);
|
|
481
|
+
|
|
482
|
+
if (M == 2) {
|
|
483
|
+
DType det = A[0] * A[lda+1] - A[1] * A[lda];
|
|
484
|
+
if (det == 0) { raise_not_invertible_error(); }
|
|
485
|
+
B[0] = A[lda+1] / det;
|
|
486
|
+
B[1] = -A[1] / det;
|
|
487
|
+
B[ldb] = -A[lda] / det;
|
|
488
|
+
B[ldb+1] = A[0] / det;
|
|
489
|
+
|
|
490
|
+
} else if (M == 3) {
|
|
491
|
+
// Calculate the exact determinant.
|
|
492
|
+
DType det;
|
|
493
|
+
det_exact_from_dense<DType>(M, A_elements, lda, reinterpret_cast<void*>(&det));
|
|
494
|
+
if (det == 0) { raise_not_invertible_error(); }
|
|
495
|
+
|
|
496
|
+
B[0] = ( A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]) / det; // A = ei - fh
|
|
497
|
+
B[1] = (- A[1] * A[2*lda+2] + A[2] * A[2*lda+1]) / det; // D = -bi + ch
|
|
498
|
+
B[2] = ( A[1] * A[lda+2] - A[2] * A[lda+1]) / det; // G = bf - ce
|
|
499
|
+
B[ldb] = (- A[lda] * A[2*lda+2] + A[lda+2] * A[2*lda]) / det; // B = -di + fg
|
|
500
|
+
B[ldb+1] = ( A[0] * A[2*lda+2] - A[2] * A[2*lda]) / det; // E = ai - cg
|
|
501
|
+
B[ldb+2] = (- A[0] * A[lda+2] + A[2] * A[lda]) / det; // H = -af + cd
|
|
502
|
+
B[2*ldb] = ( A[lda] * A[2*lda+1] - A[lda+1] * A[2*lda]) / det; // C = dh - eg
|
|
503
|
+
B[2*ldb+1]= ( -A[0] * A[2*lda+1] + A[1] * A[2*lda]) / det; // F = -ah + bg
|
|
504
|
+
B[2*ldb+2]= ( A[0] * A[lda+1] - A[1] * A[lda]) / det; // I = ae - bd
|
|
505
|
+
} else if (M == 1) {
|
|
506
|
+
B[0] = 1 / A[0];
|
|
507
|
+
} else {
|
|
508
|
+
rb_raise(rb_eNotImpError, "exact inverse calculation needed for matrices larger than 3x3");
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
template <typename DType>
|
|
513
|
+
void inverse_exact_from_yale(const int M, const YALE_STORAGE* storage,
|
|
514
|
+
const int lda, YALE_STORAGE* inverse, const int ldb) {
|
|
515
|
+
|
|
516
|
+
// inverse is a clone of storage
|
|
517
|
+
const DType* a = reinterpret_cast<const DType*>(storage->a);
|
|
518
|
+
const IType* ija = reinterpret_cast<const IType *>(storage->ija);
|
|
519
|
+
DType* b = reinterpret_cast<DType*>(inverse->a);
|
|
520
|
+
IType* ijb = reinterpret_cast<IType *>(inverse->ija);
|
|
521
|
+
IType col_pos = storage->shape[0] + 1;
|
|
522
|
+
// Calculate the exact determinant.
|
|
523
|
+
DType det;
|
|
524
|
+
|
|
525
|
+
if (M == 2) {
|
|
526
|
+
IType ndnz = ija[2] - ija[0];
|
|
527
|
+
if (ndnz == 2) {
|
|
528
|
+
det = a[0] * a[1] - a[col_pos] * a[col_pos+1];
|
|
529
|
+
}
|
|
530
|
+
else { det = a[0] * a[1]; }
|
|
531
|
+
if (det == 0) { raise_not_invertible_error(); }
|
|
532
|
+
b[0] = a[1] / det;
|
|
533
|
+
b[1] = a[0] / det;
|
|
534
|
+
if (ndnz == 2) {
|
|
535
|
+
b[col_pos] = -a[col_pos] / det;
|
|
536
|
+
b[col_pos+1] = -a[col_pos+1] / det;
|
|
537
|
+
}
|
|
538
|
+
else if (ndnz == 1) {
|
|
539
|
+
b[col_pos] = -a[col_pos] / det;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
} else if (M == 3) {
|
|
543
|
+
DType *A = new DType[lda*3];
|
|
544
|
+
for (int i = 0; i < lda; ++i) {
|
|
545
|
+
A[i*3+i] = a[i];
|
|
546
|
+
switch (ija[i+1] - ija[i]) {
|
|
547
|
+
case 2:
|
|
548
|
+
A[i*3 + ija[col_pos]] = a[col_pos];
|
|
549
|
+
A[i*3 + ija[col_pos+1]] = a[col_pos+1];
|
|
550
|
+
col_pos += 2;
|
|
551
|
+
break;
|
|
552
|
+
case 1:
|
|
553
|
+
A[i*3 + (i+1)%3] = A[i*3 + (i+2)%3] = 0;
|
|
554
|
+
A[i*3 + ija[col_pos]] = a[col_pos];
|
|
555
|
+
col_pos += 1;
|
|
556
|
+
break;
|
|
557
|
+
case 0:
|
|
558
|
+
A[i*3 + (i+1)%3] = A[i*3 + (i+2)%3] = 0;
|
|
559
|
+
break;
|
|
560
|
+
default:
|
|
561
|
+
rb_raise(rb_eArgError, "some value in IJA is incorrect!");
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
det =
|
|
565
|
+
A[0] * A[lda+1] * A[2*lda+2] + A[1] * A[lda+2] * A[2*lda] + A[2] * A[lda] * A[2*lda+1]
|
|
566
|
+
- A[0] * A[lda+2] * A[2*lda+1] - A[1] * A[lda] * A[2*lda+2] - A[2] * A[lda+1] * A[2*lda];
|
|
567
|
+
if (det == 0) { raise_not_invertible_error(); }
|
|
568
|
+
|
|
569
|
+
DType *B = new DType[3*ldb];
|
|
570
|
+
B[0] = ( A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]) / det; // A = ei - fh
|
|
571
|
+
B[1] = (- A[1] * A[2*lda+2] + A[2] * A[2*lda+1]) / det; // D = -bi + ch
|
|
572
|
+
B[2] = ( A[1] * A[lda+2] - A[2] * A[lda+1]) / det; // G = bf - ce
|
|
573
|
+
B[ldb] = (- A[lda] * A[2*lda+2] + A[lda+2] * A[2*lda]) / det; // B = -di + fg
|
|
574
|
+
B[ldb+1] = ( A[0] * A[2*lda+2] - A[2] * A[2*lda]) / det; // E = ai - cg
|
|
575
|
+
B[ldb+2] = (- A[0] * A[lda+2] + A[2] * A[lda]) / det; // H = -af + cd
|
|
576
|
+
B[2*ldb] = ( A[lda] * A[2*lda+1] - A[lda+1] * A[2*lda]) / det; // C = dh - eg
|
|
577
|
+
B[2*ldb+1]= ( -A[0] * A[2*lda+1] + A[1] * A[2*lda]) / det; // F = -ah + bg
|
|
578
|
+
B[2*ldb+2]= ( A[0] * A[lda+1] - A[1] * A[lda]) / det; // I = ae - bd
|
|
579
|
+
|
|
580
|
+
// Calculate the size of ijb and b, then reallocate them.
|
|
581
|
+
IType ndnz = 0;
|
|
582
|
+
for (int i = 0; i < 3; ++i) {
|
|
583
|
+
for (int j = 0; j < 3; ++j) {
|
|
584
|
+
if (j != i && B[i*ldb + j] != 0) { ++ndnz; }
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
inverse->ndnz = ndnz;
|
|
588
|
+
col_pos = 4; // shape[0] + 1
|
|
589
|
+
inverse->capacity = 4 + ndnz;
|
|
590
|
+
NM_REALLOC_N(inverse->a, DType, 4 + ndnz);
|
|
591
|
+
NM_REALLOC_N(inverse->ija, IType, 4 + ndnz);
|
|
592
|
+
b = reinterpret_cast<DType*>(inverse->a);
|
|
593
|
+
ijb = reinterpret_cast<IType *>(inverse->ija);
|
|
594
|
+
|
|
595
|
+
for (int i = 0; i < 3; ++i) {
|
|
596
|
+
ijb[i] = col_pos;
|
|
597
|
+
for (int j = 0; j < 3; ++j) {
|
|
598
|
+
if (j == i) {
|
|
599
|
+
b[i] = B[i*ldb + j];
|
|
600
|
+
}
|
|
601
|
+
else if (B[i*ldb + j] != 0) {
|
|
602
|
+
b[col_pos] = B[i*ldb + j];
|
|
603
|
+
ijb[col_pos] = j;
|
|
604
|
+
++col_pos;
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
b[3] = 0;
|
|
609
|
+
ijb[3] = col_pos;
|
|
610
|
+
delete [] B;
|
|
611
|
+
delete [] A;
|
|
612
|
+
} else if (M == 1) {
|
|
613
|
+
b[0] = 1 / a[0];
|
|
614
|
+
} else {
|
|
615
|
+
rb_raise(rb_eNotImpError, "exact inverse calculation needed for matrices larger than 3x3");
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
/*
|
|
620
|
+
* Function signature conversion for calling CBLAS' gemm functions as directly as possible.
|
|
621
|
+
*
|
|
622
|
+
* For documentation: http://www.netlib.org/blas/dgemm.f
|
|
623
|
+
*/
|
|
624
|
+
template <typename DType>
|
|
625
|
+
inline static void cblas_gemm(const enum CBLAS_ORDER order,
|
|
626
|
+
const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b,
|
|
627
|
+
int m, int n, int k,
|
|
628
|
+
void* alpha,
|
|
629
|
+
void* a, int lda,
|
|
630
|
+
void* b, int ldb,
|
|
631
|
+
void* beta,
|
|
632
|
+
void* c, int ldc)
|
|
633
|
+
{
|
|
634
|
+
gemm<DType>(order, trans_a, trans_b, m, n, k, reinterpret_cast<DType*>(alpha),
|
|
635
|
+
reinterpret_cast<DType*>(a), lda,
|
|
636
|
+
reinterpret_cast<DType*>(b), ldb, reinterpret_cast<DType*>(beta),
|
|
637
|
+
reinterpret_cast<DType*>(c), ldc);
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
/*
|
|
642
|
+
* Function signature conversion for calling CBLAS's gemv functions as directly as possible.
|
|
643
|
+
*
|
|
644
|
+
* For documentation: http://www.netlib.org/lapack/double/dgetrf.f
|
|
645
|
+
*/
|
|
646
|
+
template <typename DType>
|
|
647
|
+
inline static bool cblas_gemv(const enum CBLAS_TRANSPOSE trans,
|
|
648
|
+
const int m, const int n,
|
|
649
|
+
const void* alpha,
|
|
650
|
+
const void* a, const int lda,
|
|
651
|
+
const void* x, const int incx,
|
|
652
|
+
const void* beta,
|
|
653
|
+
void* y, const int incy)
|
|
654
|
+
{
|
|
655
|
+
return gemv<DType>(trans,
|
|
656
|
+
m, n, reinterpret_cast<const DType*>(alpha),
|
|
657
|
+
reinterpret_cast<const DType*>(a), lda,
|
|
658
|
+
reinterpret_cast<const DType*>(x), incx, reinterpret_cast<const DType*>(beta),
|
|
659
|
+
reinterpret_cast<DType*>(y), incy);
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
|
|
663
|
+
/*
|
|
664
|
+
* Function signature conversion for calling CBLAS' trsm functions as directly as possible.
|
|
665
|
+
*
|
|
666
|
+
* For documentation: http://www.netlib.org/blas/dtrsm.f
|
|
667
|
+
*/
|
|
668
|
+
template <typename DType>
|
|
669
|
+
inline static void cblas_trsm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
|
670
|
+
const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_DIAG diag,
|
|
671
|
+
const int m, const int n, const void* alpha, const void* a,
|
|
672
|
+
const int lda, void* b, const int ldb)
|
|
673
|
+
{
|
|
674
|
+
trsm<DType>(order, side, uplo, trans_a, diag, m, n, *reinterpret_cast<const DType*>(alpha),
|
|
675
|
+
reinterpret_cast<const DType*>(a), lda, reinterpret_cast<DType*>(b), ldb);
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
}
|
|
679
|
+
} // end of namespace nm::math
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
extern "C" {
|
|
683
|
+
|
|
684
|
+
///////////////////
|
|
685
|
+
// Ruby Bindings //
|
|
686
|
+
///////////////////
|
|
687
|
+
|
|
688
|
+
void nm_math_init_blas() {
|
|
689
|
+
VALUE cNMatrix_Internal = rb_define_module_under(cNMatrix, "Internal");
|
|
690
|
+
|
|
691
|
+
rb_define_singleton_method(cNMatrix, "has_clapack?", (METHOD)nm_has_clapack, 0);
|
|
692
|
+
|
|
693
|
+
VALUE cNMatrix_Internal_LAPACK = rb_define_module_under(cNMatrix_Internal, "LAPACK");
|
|
694
|
+
|
|
695
|
+
/* ATLAS-CLAPACK Functions that are implemented internally */
|
|
696
|
+
rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_getrf", (METHOD)nm_clapack_getrf, 5);
|
|
697
|
+
rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_getrs", (METHOD)nm_clapack_getrs, 9);
|
|
698
|
+
rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_laswp", (METHOD)nm_clapack_laswp, 7);
|
|
699
|
+
|
|
700
|
+
VALUE cNMatrix_Internal_BLAS = rb_define_module_under(cNMatrix_Internal, "BLAS");
|
|
701
|
+
|
|
702
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_scal", (METHOD)nm_cblas_scal, 4);
|
|
703
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_nrm2", (METHOD)nm_cblas_nrm2, 3);
|
|
704
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_asum", (METHOD)nm_cblas_asum, 3);
|
|
705
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_rot", (METHOD)nm_cblas_rot, 7);
|
|
706
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_rotg", (METHOD)nm_cblas_rotg, 1);
|
|
707
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_imax", (METHOD)nm_cblas_imax, 3);
|
|
708
|
+
|
|
709
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_gemm", (METHOD)nm_cblas_gemm, 14);
|
|
710
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11);
|
|
711
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_trsm", (METHOD)nm_cblas_trsm, 12);
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
/*
|
|
715
|
+
* call-seq:
|
|
716
|
+
* NMatrix::BLAS.cblas_scal(n, alpha, vector, inc) -> NMatrix
|
|
717
|
+
*
|
|
718
|
+
* BLAS level 1 function +scal+. Works with all dtypes.
|
|
719
|
+
*
|
|
720
|
+
* Scale +vector+ in-place by +alpha+ and also return it. The operation is as
|
|
721
|
+
* follows:
|
|
722
|
+
* x <- alpha * x
|
|
723
|
+
*
|
|
724
|
+
* - +n+ -> Number of elements of +vector+.
|
|
725
|
+
* - +alpha+ -> Scalar value used in the operation.
|
|
726
|
+
* - +vector+ -> NMatrix of shape [n,1] or [1,n]. Modified in-place.
|
|
727
|
+
* - +inc+ -> Increment used in the scaling function. Should generally be 1.
|
|
728
|
+
*/
|
|
729
|
+
static VALUE nm_cblas_scal(VALUE self, VALUE n, VALUE alpha, VALUE vector, VALUE incx) {
|
|
730
|
+
nm::dtype_t dtype = NM_DTYPE(vector);
|
|
731
|
+
|
|
732
|
+
void* scalar = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
|
733
|
+
rubyval_to_cval(alpha, dtype, scalar);
|
|
734
|
+
|
|
735
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_scal, void, const int n,
|
|
736
|
+
const void* scalar, void* x, const int incx);
|
|
737
|
+
|
|
738
|
+
ttable[dtype](FIX2INT(n), scalar, NM_STORAGE_DENSE(vector)->elements,
|
|
739
|
+
FIX2INT(incx));
|
|
740
|
+
|
|
741
|
+
return vector;
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
/*
|
|
745
|
+
* Call any of the cblas_xrotg functions as directly as possible.
|
|
746
|
+
*
|
|
747
|
+
* xROTG computes the elements of a Givens plane rotation matrix such that:
|
|
748
|
+
*
|
|
749
|
+
* | c s | | a | | r |
|
|
750
|
+
* | -s c | * | b | = | 0 |
|
|
751
|
+
*
|
|
752
|
+
* where r = +- sqrt( a**2 + b**2 ) and c**2 + s**2 = 1.
|
|
753
|
+
*
|
|
754
|
+
* The Givens plane rotation can be used to introduce zero elements into a matrix selectively.
|
|
755
|
+
*
|
|
756
|
+
* This function differs from most of the other raw BLAS accessors. Instead of
|
|
757
|
+
* providing a, b, c, s as arguments, you should only provide a and b (the
|
|
758
|
+
* inputs), and you should provide them as the first two elements of any dense
|
|
759
|
+
* NMatrix type.
|
|
760
|
+
*
|
|
761
|
+
* The outputs [c,s] will be returned in a Ruby Array at the end; the input
|
|
762
|
+
* NMatrix will also be modified in-place.
|
|
763
|
+
*
|
|
764
|
+
* This function, like the other cblas_ functions, does minimal type-checking.
|
|
765
|
+
*/
|
|
766
|
+
static VALUE nm_cblas_rotg(VALUE self, VALUE ab) {
|
|
767
|
+
static void (*ttable[nm::NUM_DTYPES])(void* a, void* b, void* c, void* s) = {
|
|
768
|
+
NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
|
|
769
|
+
nm::math::cblas_rotg<float>,
|
|
770
|
+
nm::math::cblas_rotg<double>,
|
|
771
|
+
nm::math::cblas_rotg<nm::Complex64>,
|
|
772
|
+
nm::math::cblas_rotg<nm::Complex128>,
|
|
773
|
+
NULL //nm::math::cblas_rotg<nm::RubyObject>
|
|
774
|
+
};
|
|
775
|
+
|
|
776
|
+
nm::dtype_t dtype = NM_DTYPE(ab);
|
|
777
|
+
|
|
778
|
+
if (!ttable[dtype]) {
|
|
779
|
+
rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
|
|
780
|
+
return Qnil;
|
|
781
|
+
|
|
782
|
+
} else {
|
|
783
|
+
NM_CONSERVATIVE(nm_register_value(&self));
|
|
784
|
+
NM_CONSERVATIVE(nm_register_value(&ab));
|
|
785
|
+
void *pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
|
|
786
|
+
*pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
|
787
|
+
|
|
788
|
+
// extract A and B from the NVector (first two elements)
|
|
789
|
+
void* pA = NM_STORAGE_DENSE(ab)->elements;
|
|
790
|
+
void* pB = (char*)(NM_STORAGE_DENSE(ab)->elements) + DTYPE_SIZES[dtype];
|
|
791
|
+
// c and s are output
|
|
792
|
+
|
|
793
|
+
ttable[dtype](pA, pB, pC, pS);
|
|
794
|
+
|
|
795
|
+
VALUE result = rb_ary_new2(2);
|
|
796
|
+
|
|
797
|
+
if (dtype == nm::RUBYOBJ) {
|
|
798
|
+
rb_ary_store(result, 0, *reinterpret_cast<VALUE*>(pC));
|
|
799
|
+
rb_ary_store(result, 1, *reinterpret_cast<VALUE*>(pS));
|
|
800
|
+
} else {
|
|
801
|
+
rb_ary_store(result, 0, nm::rubyobj_from_cval(pC, dtype).rval);
|
|
802
|
+
rb_ary_store(result, 1, nm::rubyobj_from_cval(pS, dtype).rval);
|
|
803
|
+
}
|
|
804
|
+
NM_CONSERVATIVE(nm_unregister_value(&ab));
|
|
805
|
+
NM_CONSERVATIVE(nm_unregister_value(&self));
|
|
806
|
+
return result;
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
|
|
811
|
+
/*
|
|
812
|
+
* Call any of the cblas_xrot functions as directly as possible.
|
|
813
|
+
*
|
|
814
|
+
* xROT is a BLAS level 1 routine (taking two vectors) which applies a plane rotation.
|
|
815
|
+
*
|
|
816
|
+
* It's tough to find documentation on xROT. Here are what we think the arguments are for:
|
|
817
|
+
* * n :: number of elements to consider in x and y
|
|
818
|
+
* * x :: a vector (expects an NVector)
|
|
819
|
+
* * incx :: stride of x
|
|
820
|
+
* * y :: a vector (expects an NVector)
|
|
821
|
+
* * incy :: stride of y
|
|
822
|
+
* * c :: cosine of the angle of rotation
|
|
823
|
+
* * s :: sine of the angle of rotation
|
|
824
|
+
*
|
|
825
|
+
* Note that c and s will be the same dtype as x and y, except when x and y are complex. If x and y are complex, c and s
|
|
826
|
+
* will be float for Complex64 or double for Complex128.
|
|
827
|
+
*
|
|
828
|
+
* You probably don't want to call this function. Instead, why don't you try rot, which is more flexible
|
|
829
|
+
* with its arguments?
|
|
830
|
+
*
|
|
831
|
+
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
|
832
|
+
* handling, so you can easily crash Ruby!
|
|
833
|
+
*/
|
|
834
|
+
static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s) {
|
|
835
|
+
static void (*ttable[nm::NUM_DTYPES])(const int N, void*, const int, void*, const int, const void*, const void*) = {
|
|
836
|
+
NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
|
|
837
|
+
nm::math::cblas_rot<float,float>,
|
|
838
|
+
nm::math::cblas_rot<double,double>,
|
|
839
|
+
nm::math::cblas_rot<nm::Complex64,float>,
|
|
840
|
+
nm::math::cblas_rot<nm::Complex128,double>,
|
|
841
|
+
nm::math::cblas_rot<nm::RubyObject,nm::RubyObject>
|
|
842
|
+
};
|
|
843
|
+
|
|
844
|
+
nm::dtype_t dtype = NM_DTYPE(x);
|
|
845
|
+
|
|
846
|
+
|
|
847
|
+
if (!ttable[dtype]) {
|
|
848
|
+
rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
|
|
849
|
+
return Qfalse;
|
|
850
|
+
} else {
|
|
851
|
+
void *pC, *pS;
|
|
852
|
+
|
|
853
|
+
// We need to ensure the cosine and sine arguments are the correct dtype -- which may differ from the actual dtype.
|
|
854
|
+
if (dtype == nm::COMPLEX64) {
|
|
855
|
+
pC = NM_ALLOCA_N(float,1);
|
|
856
|
+
pS = NM_ALLOCA_N(float,1);
|
|
857
|
+
rubyval_to_cval(c, nm::FLOAT32, pC);
|
|
858
|
+
rubyval_to_cval(s, nm::FLOAT32, pS);
|
|
859
|
+
} else if (dtype == nm::COMPLEX128) {
|
|
860
|
+
pC = NM_ALLOCA_N(double,1);
|
|
861
|
+
pS = NM_ALLOCA_N(double,1);
|
|
862
|
+
rubyval_to_cval(c, nm::FLOAT64, pC);
|
|
863
|
+
rubyval_to_cval(s, nm::FLOAT64, pS);
|
|
864
|
+
} else {
|
|
865
|
+
pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
|
866
|
+
pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
|
867
|
+
rubyval_to_cval(c, dtype, pC);
|
|
868
|
+
rubyval_to_cval(s, dtype, pS);
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
|
|
872
|
+
ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), NM_STORAGE_DENSE(y)->elements, FIX2INT(incy), pC, pS);
|
|
873
|
+
|
|
874
|
+
return Qtrue;
|
|
875
|
+
}
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
|
|
879
|
+
/*
|
|
880
|
+
* Call any of the cblas_xnrm2 functions as directly as possible.
|
|
881
|
+
*
|
|
882
|
+
* xNRM2 is a BLAS level 1 routine which calculates the 2-norm of an n-vector x.
|
|
883
|
+
*
|
|
884
|
+
* Arguments:
|
|
885
|
+
* * n :: length of x, must be at least 0
|
|
886
|
+
* * x :: pointer to first entry of input vector
|
|
887
|
+
* * incx :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
|
|
888
|
+
*
|
|
889
|
+
* You probably don't want to call this function. Instead, why don't you try nrm2, which is more flexible
|
|
890
|
+
* with its arguments?
|
|
891
|
+
*
|
|
892
|
+
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
|
893
|
+
* handling, so you can easily crash Ruby!
|
|
894
|
+
*/
|
|
895
|
+
static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx) {
|
|
896
|
+
|
|
897
|
+
static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
|
|
898
|
+
NULL, NULL, NULL, NULL, NULL, // no help for integers
|
|
899
|
+
nm::math::cblas_nrm2<float32_t>,
|
|
900
|
+
nm::math::cblas_nrm2<float64_t>,
|
|
901
|
+
nm::math::cblas_nrm2<nm::Complex64>,
|
|
902
|
+
nm::math::cblas_nrm2<nm::Complex128>,
|
|
903
|
+
nm::math::cblas_nrm2<nm::RubyObject>
|
|
904
|
+
};
|
|
905
|
+
|
|
906
|
+
nm::dtype_t dtype = NM_DTYPE(x);
|
|
907
|
+
|
|
908
|
+
if (!ttable[dtype]) {
|
|
909
|
+
rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
|
|
910
|
+
return Qnil;
|
|
911
|
+
|
|
912
|
+
} else {
|
|
913
|
+
// Determine the return dtype and allocate it
|
|
914
|
+
nm::dtype_t rdtype = dtype;
|
|
915
|
+
if (dtype == nm::COMPLEX64) rdtype = nm::FLOAT32;
|
|
916
|
+
else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;
|
|
917
|
+
|
|
918
|
+
void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);
|
|
919
|
+
|
|
920
|
+
ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
|
|
921
|
+
|
|
922
|
+
return nm::rubyobj_from_cval(Result, rdtype).rval;
|
|
923
|
+
}
|
|
924
|
+
}
|
|
925
|
+
|
|
926
|
+
|
|
927
|
+
|
|
928
|
+
/*
|
|
929
|
+
* Call any of the cblas_xasum functions as directly as possible.
|
|
930
|
+
*
|
|
931
|
+
* xASUM is a BLAS level 1 routine which calculates the sum of absolute values of the entries
|
|
932
|
+
* of a vector x.
|
|
933
|
+
*
|
|
934
|
+
* Arguments:
|
|
935
|
+
* * n :: length of x, must be at least 0
|
|
936
|
+
* * x :: pointer to first entry of input vector
|
|
937
|
+
* * incx :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
|
|
938
|
+
*
|
|
939
|
+
* You probably don't want to call this function. Instead, why don't you try asum, which is more flexible
|
|
940
|
+
* with its arguments?
|
|
941
|
+
*
|
|
942
|
+
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
|
943
|
+
* handling, so you can easily crash Ruby!
|
|
944
|
+
*/
|
|
945
|
+
static VALUE nm_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx) {
|
|
946
|
+
|
|
947
|
+
static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
|
|
948
|
+
nm::math::cblas_asum<uint8_t>,
|
|
949
|
+
nm::math::cblas_asum<int8_t>,
|
|
950
|
+
nm::math::cblas_asum<int16_t>,
|
|
951
|
+
nm::math::cblas_asum<int32_t>,
|
|
952
|
+
nm::math::cblas_asum<int64_t>,
|
|
953
|
+
nm::math::cblas_asum<float32_t>,
|
|
954
|
+
nm::math::cblas_asum<float64_t>,
|
|
955
|
+
nm::math::cblas_asum<nm::Complex64>,
|
|
956
|
+
nm::math::cblas_asum<nm::Complex128>,
|
|
957
|
+
nm::math::cblas_asum<nm::RubyObject>
|
|
958
|
+
};
|
|
959
|
+
|
|
960
|
+
nm::dtype_t dtype = NM_DTYPE(x);
|
|
961
|
+
|
|
962
|
+
// Determine the return dtype and allocate it
|
|
963
|
+
nm::dtype_t rdtype = dtype;
|
|
964
|
+
if (dtype == nm::COMPLEX64) rdtype = nm::FLOAT32;
|
|
965
|
+
else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;
|
|
966
|
+
|
|
967
|
+
void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);
|
|
968
|
+
|
|
969
|
+
ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
|
|
970
|
+
|
|
971
|
+
return nm::rubyobj_from_cval(Result, rdtype).rval;
|
|
972
|
+
}
|
|
973
|
+
|
|
974
|
+
/*
|
|
975
|
+
* call-seq:
|
|
976
|
+
* NMatrix::BLAS.cblas_imax(n, vector, inc) -> Fixnum
|
|
977
|
+
*
|
|
978
|
+
* BLAS level 1 routine.
|
|
979
|
+
*
|
|
980
|
+
* Return the index of the largest element of +vector+.
|
|
981
|
+
*
|
|
982
|
+
* - +n+ -> Vector's size. Generally, you can use NMatrix#rows or NMatrix#cols.
|
|
983
|
+
* - +vector+ -> A NMatrix of shape [n,1] or [1,n] with any dtype.
|
|
984
|
+
* - +inc+ -> It's the increment used when searching. Use 1 except if you know
|
|
985
|
+
* what you're doing.
|
|
986
|
+
*/
|
|
987
|
+
static VALUE nm_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx) {
|
|
988
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_imax, int, const int n, const void* x, const int incx);
|
|
989
|
+
|
|
990
|
+
nm::dtype_t dtype = NM_DTYPE(x);
|
|
991
|
+
|
|
992
|
+
int index = ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx));
|
|
993
|
+
|
|
994
|
+
// Convert to Ruby's Int value.
|
|
995
|
+
return INT2FIX(index);
|
|
996
|
+
}
|
|
997
|
+
|
|
998
|
+
|
|
999
|
+
/* Call any of the cblas_xgemm functions as directly as possible.
|
|
1000
|
+
*
|
|
1001
|
+
* The cblas_xgemm functions (dgemm, sgemm, cgemm, and zgemm) define the following operation:
|
|
1002
|
+
*
|
|
1003
|
+
* C = alpha*op(A)*op(B) + beta*C
|
|
1004
|
+
*
|
|
1005
|
+
* where op(X) is one of <tt>op(X) = X</tt>, <tt>op(X) = X**T</tt>, or the complex conjugate of X.
|
|
1006
|
+
*
|
|
1007
|
+
* Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
|
|
1008
|
+
* Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
|
|
1009
|
+
* expose the ultra-optimized ATLAS versions.
|
|
1010
|
+
*
|
|
1011
|
+
* == Arguments
|
|
1012
|
+
* See: http://www.netlib.org/blas/dgemm.f
|
|
1013
|
+
*
|
|
1014
|
+
* You probably don't want to call this function. Instead, why don't you try gemm, which is more flexible
|
|
1015
|
+
* with its arguments?
|
|
1016
|
+
*
|
|
1017
|
+
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
|
1018
|
+
* handling, so you can easily crash Ruby!
|
|
1019
|
+
*/
|
|
1020
|
+
static VALUE nm_cblas_gemm(VALUE self,
|
|
1021
|
+
VALUE order,
|
|
1022
|
+
VALUE trans_a, VALUE trans_b,
|
|
1023
|
+
VALUE m, VALUE n, VALUE k,
|
|
1024
|
+
VALUE alpha,
|
|
1025
|
+
VALUE a, VALUE lda,
|
|
1026
|
+
VALUE b, VALUE ldb,
|
|
1027
|
+
VALUE beta,
|
|
1028
|
+
VALUE c, VALUE ldc)
|
|
1029
|
+
{
|
|
1030
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemm, void, const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, int m, int n, int k, void* alpha, void* a, int lda, void* b, int ldb, void* beta, void* c, int ldc);
|
|
1031
|
+
|
|
1032
|
+
nm::dtype_t dtype = NM_DTYPE(a);
|
|
1033
|
+
|
|
1034
|
+
void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
|
|
1035
|
+
*pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
|
1036
|
+
rubyval_to_cval(alpha, dtype, pAlpha);
|
|
1037
|
+
rubyval_to_cval(beta, dtype, pBeta);
|
|
1038
|
+
|
|
1039
|
+
ttable[dtype](blas_order_sym(order), blas_transpose_sym(trans_a), blas_transpose_sym(trans_b), FIX2INT(m), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
|
|
1040
|
+
|
|
1041
|
+
return c;
|
|
1042
|
+
}
|
|
1043
|
+
|
|
1044
|
+
|
|
1045
|
+
/* Call any of the cblas_xgemv functions as directly as possible.
|
|
1046
|
+
*
|
|
1047
|
+
* The cblas_xgemv functions (dgemv, sgemv, cgemv, and zgemv) define the following operation:
|
|
1048
|
+
*
|
|
1049
|
+
* y = alpha*op(A)*x + beta*y
|
|
1050
|
+
*
|
|
1051
|
+
* where op(A) is one of <tt>op(A) = A</tt>, <tt>op(A) = A**T</tt>, or the complex conjugate of A.
|
|
1052
|
+
*
|
|
1053
|
+
* Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
|
|
1054
|
+
* Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
|
|
1055
|
+
* expose the ultra-optimized ATLAS versions.
|
|
1056
|
+
*
|
|
1057
|
+
* == Arguments
|
|
1058
|
+
* See: http://www.netlib.org/blas/dgemm.f
|
|
1059
|
+
*
|
|
1060
|
+
* You probably don't want to call this function. Instead, why don't you try cblas_gemv, which is more flexible
|
|
1061
|
+
* with its arguments?
|
|
1062
|
+
*
|
|
1063
|
+
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
|
1064
|
+
* handling, so you can easily crash Ruby!
|
|
1065
|
+
*/
|
|
1066
|
+
static VALUE nm_cblas_gemv(VALUE self,
|
|
1067
|
+
VALUE trans_a,
|
|
1068
|
+
VALUE m, VALUE n,
|
|
1069
|
+
VALUE alpha,
|
|
1070
|
+
VALUE a, VALUE lda,
|
|
1071
|
+
VALUE x, VALUE incx,
|
|
1072
|
+
VALUE beta,
|
|
1073
|
+
VALUE y, VALUE incy)
|
|
1074
|
+
{
|
|
1075
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemv, bool, const enum CBLAS_TRANSPOSE, const int, const int, const void*, const void*, const int, const void*, const int, const void*, void*, const int)
|
|
1076
|
+
|
|
1077
|
+
nm::dtype_t dtype = NM_DTYPE(a);
|
|
1078
|
+
|
|
1079
|
+
void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
|
|
1080
|
+
*pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
|
1081
|
+
rubyval_to_cval(alpha, dtype, pAlpha);
|
|
1082
|
+
rubyval_to_cval(beta, dtype, pBeta);
|
|
1083
|
+
|
|
1084
|
+
return ttable[dtype](blas_transpose_sym(trans_a), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), pBeta, NM_STORAGE_DENSE(y)->elements, FIX2INT(incy)) ? Qtrue : Qfalse;
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
|
|
1088
|
+
static VALUE nm_cblas_trsm(VALUE self,
|
|
1089
|
+
VALUE order,
|
|
1090
|
+
VALUE side, VALUE uplo,
|
|
1091
|
+
VALUE trans_a, VALUE diag,
|
|
1092
|
+
VALUE m, VALUE n,
|
|
1093
|
+
VALUE alpha,
|
|
1094
|
+
VALUE a, VALUE lda,
|
|
1095
|
+
VALUE b, VALUE ldb)
|
|
1096
|
+
{
|
|
1097
|
+
static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_SIDE, const enum CBLAS_UPLO,
|
|
1098
|
+
const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
|
|
1099
|
+
const int m, const int n, const void* alpha, const void* a,
|
|
1100
|
+
const int lda, void* b, const int ldb) = {
|
|
1101
|
+
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
|
1102
|
+
nm::math::cblas_trsm<float>,
|
|
1103
|
+
nm::math::cblas_trsm<double>,
|
|
1104
|
+
nm::math::cblas_trsm<nm::Complex64>,
|
|
1105
|
+
nm::math::cblas_trsm<nm::Complex128>,
|
|
1106
|
+
nm::math::cblas_trsm<nm::RubyObject>
|
|
1107
|
+
};
|
|
1108
|
+
|
|
1109
|
+
nm::dtype_t dtype = NM_DTYPE(a);
|
|
1110
|
+
|
|
1111
|
+
if (!ttable[dtype]) {
|
|
1112
|
+
rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
|
1113
|
+
} else {
|
|
1114
|
+
void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
|
1115
|
+
rubyval_to_cval(alpha, dtype, pAlpha);
|
|
1116
|
+
|
|
1117
|
+
ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
|
|
1118
|
+
}
|
|
1119
|
+
|
|
1120
|
+
return Qtrue;
|
|
1121
|
+
}
|
|
1122
|
+
|
|
1123
|
+
/* Call any of the clapack_xgetrf functions as directly as possible.
|
|
1124
|
+
*
|
|
1125
|
+
* The clapack_getrf functions (dgetrf, sgetrf, cgetrf, and zgetrf) compute an LU factorization of a general M-by-N
|
|
1126
|
+
* matrix A using partial pivoting with row interchanges.
|
|
1127
|
+
*
|
|
1128
|
+
* The factorization has the form:
|
|
1129
|
+
* A = P * L * U
|
|
1130
|
+
* where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n),
|
|
1131
|
+
* and U is upper triangular (upper trapezoidal if m < n).
|
|
1132
|
+
*
|
|
1133
|
+
* This is the right-looking level 3 BLAS version of the algorithm.
|
|
1134
|
+
*
|
|
1135
|
+
* == Arguments
|
|
1136
|
+
* See: http://www.netlib.org/lapack/double/dgetrf.f
|
|
1137
|
+
* (You don't need argument 5; this is the value returned by this function.)
|
|
1138
|
+
*
|
|
1139
|
+
* You probably don't want to call this function. Instead, why don't you try clapack_getrf, which is more flexible
|
|
1140
|
+
* with its arguments?
|
|
1141
|
+
*
|
|
1142
|
+
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
|
1143
|
+
* handling, so you can easily crash Ruby!
|
|
1144
|
+
*
|
|
1145
|
+
* Returns an array giving the pivot indices (normally these are argument #5).
|
|
1146
|
+
*/
|
|
1147
|
+
static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda) {
|
|
1148
|
+
static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int m, const int n, void* a, const int lda, int* ipiv) = {
|
|
1149
|
+
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
|
1150
|
+
nm::math::clapack_getrf<float>,
|
|
1151
|
+
nm::math::clapack_getrf<double>,
|
|
1152
|
+
nm::math::clapack_getrf<nm::Complex64>,
|
|
1153
|
+
nm::math::clapack_getrf<nm::Complex128>,
|
|
1154
|
+
nm::math::clapack_getrf<nm::RubyObject>
|
|
1155
|
+
};
|
|
1156
|
+
|
|
1157
|
+
int M = FIX2INT(m),
|
|
1158
|
+
N = FIX2INT(n);
|
|
1159
|
+
|
|
1160
|
+
// Allocate the pivot index array, which is of size MIN(M, N).
|
|
1161
|
+
size_t ipiv_size = std::min(M,N);
|
|
1162
|
+
int* ipiv = NM_ALLOCA_N(int, ipiv_size);
|
|
1163
|
+
|
|
1164
|
+
if (!ttable[NM_DTYPE(a)]) {
|
|
1165
|
+
rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
|
1166
|
+
} else {
|
|
1167
|
+
// Call either our version of getrf or the LAPACK version.
|
|
1168
|
+
ttable[NM_DTYPE(a)](blas_order_sym(order), M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv);
|
|
1169
|
+
}
|
|
1170
|
+
|
|
1171
|
+
// Result will be stored in a. We return ipiv as an array.
|
|
1172
|
+
VALUE ipiv_array = rb_ary_new2(ipiv_size);
|
|
1173
|
+
for (size_t i = 0; i < ipiv_size; ++i) {
|
|
1174
|
+
rb_ary_store(ipiv_array, i, INT2FIX(ipiv[i]));
|
|
1175
|
+
}
|
|
1176
|
+
|
|
1177
|
+
return ipiv_array;
|
|
1178
|
+
}
|
|
1179
|
+
|
|
1180
|
+
|
|
1181
|
+
/*
|
|
1182
|
+
* Call any of the clapack_xgetrs functions as directly as possible.
|
|
1183
|
+
*/
|
|
1184
|
+
static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb) {
|
|
1185
|
+
static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE Trans, const int N,
|
|
1186
|
+
const int NRHS, const void* A, const int lda, const int* ipiv, void* B,
|
|
1187
|
+
const int ldb) = {
|
|
1188
|
+
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
|
1189
|
+
nm::math::clapack_getrs<float>,
|
|
1190
|
+
nm::math::clapack_getrs<double>,
|
|
1191
|
+
nm::math::clapack_getrs<nm::Complex64>,
|
|
1192
|
+
nm::math::clapack_getrs<nm::Complex128>,
|
|
1193
|
+
nm::math::clapack_getrs<nm::RubyObject>
|
|
1194
|
+
};
|
|
1195
|
+
|
|
1196
|
+
// Allocate the C version of the pivot index array
|
|
1197
|
+
int* ipiv_;
|
|
1198
|
+
if (!RB_TYPE_P(ipiv, T_ARRAY)) {
|
|
1199
|
+
rb_raise(rb_eArgError, "ipiv must be of type Array");
|
|
1200
|
+
} else {
|
|
1201
|
+
ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
|
|
1202
|
+
for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
|
|
1203
|
+
ipiv_[index] = FIX2INT( RARRAY_AREF(ipiv, index) );
|
|
1204
|
+
}
|
|
1205
|
+
}
|
|
1206
|
+
|
|
1207
|
+
if (!ttable[NM_DTYPE(a)]) {
|
|
1208
|
+
rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
|
1209
|
+
} else {
|
|
1210
|
+
|
|
1211
|
+
// Call either our version of getrs or the LAPACK version.
|
|
1212
|
+
ttable[NM_DTYPE(a)](blas_order_sym(order), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
|
|
1213
|
+
ipiv_, NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
|
|
1214
|
+
}
|
|
1215
|
+
|
|
1216
|
+
// b is both returned and modified directly in the argument list.
|
|
1217
|
+
return b;
|
|
1218
|
+
}
|
|
1219
|
+
|
|
1220
|
+
/*
|
|
1221
|
+
* Simple way to check from within Ruby code if clapack functions are available, without
|
|
1222
|
+
* having to wait around for an exception to be thrown.
|
|
1223
|
+
*/
|
|
1224
|
+
static VALUE nm_has_clapack(VALUE self) {
|
|
1225
|
+
return Qfalse;
|
|
1226
|
+
}
|
|
1227
|
+
|
|
1228
|
+
/*
|
|
1229
|
+
* Call any of the clapack_xlaswp functions as directly as possible.
|
|
1230
|
+
*
|
|
1231
|
+
* Note that LAPACK's xlaswp functions accept a column-order matrix, but NMatrix uses row-order. Thus, n should be the
|
|
1232
|
+
* number of rows and lda should be the number of columns, no matter what it says in the documentation for dlaswp.f.
|
|
1233
|
+
*/
|
|
1234
|
+
static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx) {
|
|
1235
|
+
static void (*ttable[nm::NUM_DTYPES])(const int n, void* a, const int lda, const int k1, const int k2, const int* ipiv, const int incx) = {
|
|
1236
|
+
nm::math::clapack_laswp<uint8_t>,
|
|
1237
|
+
nm::math::clapack_laswp<int8_t>,
|
|
1238
|
+
nm::math::clapack_laswp<int16_t>,
|
|
1239
|
+
nm::math::clapack_laswp<int32_t>,
|
|
1240
|
+
nm::math::clapack_laswp<int64_t>,
|
|
1241
|
+
nm::math::clapack_laswp<float>,
|
|
1242
|
+
nm::math::clapack_laswp<double>,
|
|
1243
|
+
nm::math::clapack_laswp<nm::Complex64>,
|
|
1244
|
+
nm::math::clapack_laswp<nm::Complex128>,
|
|
1245
|
+
nm::math::clapack_laswp<nm::RubyObject>
|
|
1246
|
+
};
|
|
1247
|
+
|
|
1248
|
+
// Allocate the C version of the pivot index array
|
|
1249
|
+
int* ipiv_;
|
|
1250
|
+
if (!RB_TYPE_P(ipiv, T_ARRAY)) {
|
|
1251
|
+
rb_raise(rb_eArgError, "ipiv must be of type Array");
|
|
1252
|
+
} else {
|
|
1253
|
+
ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
|
|
1254
|
+
for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
|
|
1255
|
+
ipiv_[index] = FIX2INT( RARRAY_AREF(ipiv, index) );
|
|
1256
|
+
}
|
|
1257
|
+
}
|
|
1258
|
+
|
|
1259
|
+
// Call either our version of laswp or the LAPACK version.
|
|
1260
|
+
ttable[NM_DTYPE(a)](FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), FIX2INT(k1), FIX2INT(k2), ipiv_, FIX2INT(incx));
|
|
1261
|
+
|
|
1262
|
+
// a is both returned and modified directly in the argument list.
|
|
1263
|
+
return a;
|
|
1264
|
+
}
|
|
1265
|
+
|
|
1266
|
+
|
|
1267
|
+
/*
|
|
1268
|
+
* C accessor for calculating an exact determinant. Dense matrix version.
|
|
1269
|
+
*/
|
|
1270
|
+
void nm_math_det_exact_from_dense(const int M, const void* elements, const int lda,
|
|
1271
|
+
nm::dtype_t dtype, void* result) {
|
|
1272
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::det_exact_from_dense, void, const int M,
|
|
1273
|
+
const void* A_elements, const int lda, void* result_arg);
|
|
1274
|
+
|
|
1275
|
+
ttable[dtype](M, elements, lda, result);
|
|
1276
|
+
}
|
|
1277
|
+
|
|
1278
|
+
/*
|
|
1279
|
+
* C accessor for calculating an exact determinant. Yale matrix version.
|
|
1280
|
+
*/
|
|
1281
|
+
void nm_math_det_exact_from_yale(const int M, const YALE_STORAGE* storage, const int lda,
|
|
1282
|
+
nm::dtype_t dtype, void* result) {
|
|
1283
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::det_exact_from_yale, void, const int M,
|
|
1284
|
+
const YALE_STORAGE* storage, const int lda, void* result_arg);
|
|
1285
|
+
|
|
1286
|
+
ttable[dtype](M, storage, lda, result);
|
|
1287
|
+
}
|
|
1288
|
+
|
|
1289
|
+
/*
|
|
1290
|
+
* C accessor for solving a system of linear equations.
|
|
1291
|
+
*/
|
|
1292
|
+
void nm_math_solve(VALUE lu, VALUE b, VALUE x, VALUE ipiv) {
|
|
1293
|
+
int* pivot = new int[RARRAY_LEN(ipiv)];
|
|
1294
|
+
|
|
1295
|
+
for (int i = 0; i < RARRAY_LEN(ipiv); ++i) {
|
|
1296
|
+
pivot[i] = FIX2INT(rb_ary_entry(ipiv, i));
|
|
1297
|
+
}
|
|
1298
|
+
|
|
1299
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::solve, void, const int, const void*, const void*, void*, const int*);
|
|
1300
|
+
|
|
1301
|
+
ttable[NM_DTYPE(x)](NM_SHAPE0(b), NM_STORAGE_DENSE(lu)->elements,
|
|
1302
|
+
NM_STORAGE_DENSE(b)->elements, NM_STORAGE_DENSE(x)->elements, pivot);
|
|
1303
|
+
}
|
|
1304
|
+
|
|
1305
|
+
/*
|
|
1306
|
+
* C accessor for reducing a matrix to hessenberg form.
|
|
1307
|
+
*/
|
|
1308
|
+
void nm_math_hessenberg(VALUE a) {
|
|
1309
|
+
static void (*ttable[nm::NUM_DTYPES])(const int, void*) = {
|
|
1310
|
+
NULL, NULL, NULL, NULL, NULL, // does not support ints
|
|
1311
|
+
nm::math::hessenberg<float>,
|
|
1312
|
+
nm::math::hessenberg<double>,
|
|
1313
|
+
NULL, NULL, // does not support Complex
|
|
1314
|
+
NULL // no support for Ruby Object
|
|
1315
|
+
};
|
|
1316
|
+
|
|
1317
|
+
ttable[NM_DTYPE(a)](NM_SHAPE0(a), NM_STORAGE_DENSE(a)->elements);
|
|
1318
|
+
}
|
|
1319
|
+
/*
|
|
1320
|
+
* C accessor for calculating an in-place inverse.
|
|
1321
|
+
*/
|
|
1322
|
+
void nm_math_inverse(const int M, void* a_elements, nm::dtype_t dtype) {
|
|
1323
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::inverse, void, const int, void*);
|
|
1324
|
+
|
|
1325
|
+
ttable[dtype](M, a_elements);
|
|
1326
|
+
}
|
|
1327
|
+
|
|
1328
|
+
/*
|
|
1329
|
+
* C accessor for calculating an exact inverse. Dense matrix version.
|
|
1330
|
+
*/
|
|
1331
|
+
void nm_math_inverse_exact_from_dense(const int M, const void* A_elements,
|
|
1332
|
+
const int lda, void* B_elements, const int ldb, nm::dtype_t dtype) {
|
|
1333
|
+
|
|
1334
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::inverse_exact_from_dense, void,
|
|
1335
|
+
const int, const void*, const int, void*, const int);
|
|
1336
|
+
|
|
1337
|
+
ttable[dtype](M, A_elements, lda, B_elements, ldb);
|
|
1338
|
+
}
|
|
1339
|
+
|
|
1340
|
+
/*
|
|
1341
|
+
* C accessor for calculating an exact inverse. Yale matrix version.
|
|
1342
|
+
*/
|
|
1343
|
+
void nm_math_inverse_exact_from_yale(const int M, const YALE_STORAGE* storage,
|
|
1344
|
+
const int lda, YALE_STORAGE* inverse, const int ldb, nm::dtype_t dtype) {
|
|
1345
|
+
|
|
1346
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::inverse_exact_from_yale, void,
|
|
1347
|
+
const int, const YALE_STORAGE*, const int, YALE_STORAGE*, const int);
|
|
1348
|
+
|
|
1349
|
+
ttable[dtype](M, storage, lda, inverse, ldb);
|
|
1350
|
+
}
|
|
1351
|
+
|
|
1352
|
+
/*
|
|
1353
|
+
* Transpose an array of elements that represent a row-major dense matrix. Does not allocate anything, only does an memcpy.
|
|
1354
|
+
*/
|
|
1355
|
+
void nm_math_transpose_generic(const size_t M, const size_t N, const void* A, const int lda, void* B, const int ldb, size_t element_size) {
|
|
1356
|
+
for (size_t i = 0; i < N; ++i) {
|
|
1357
|
+
for (size_t j = 0; j < M; ++j) {
|
|
1358
|
+
|
|
1359
|
+
memcpy(reinterpret_cast<char*>(B) + (i*ldb+j)*element_size,
|
|
1360
|
+
reinterpret_cast<const char*>(A) + (j*lda+i)*element_size,
|
|
1361
|
+
element_size);
|
|
1362
|
+
|
|
1363
|
+
}
|
|
1364
|
+
}
|
|
1365
|
+
}
|
|
1366
|
+
|
|
1367
|
+
|
|
1368
|
+
} // end of extern "C" block
|