pnmatrix 1.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/ext/nmatrix/binary_format.txt +53 -0
- data/ext/nmatrix/data/complex.h +388 -0
- data/ext/nmatrix/data/data.cpp +274 -0
- data/ext/nmatrix/data/data.h +651 -0
- data/ext/nmatrix/data/meta.h +64 -0
- data/ext/nmatrix/data/ruby_object.h +386 -0
- data/ext/nmatrix/extconf.rb +70 -0
- data/ext/nmatrix/math/asum.h +99 -0
- data/ext/nmatrix/math/cblas_enums.h +36 -0
- data/ext/nmatrix/math/cblas_templates_core.h +507 -0
- data/ext/nmatrix/math/gemm.h +241 -0
- data/ext/nmatrix/math/gemv.h +178 -0
- data/ext/nmatrix/math/getrf.h +255 -0
- data/ext/nmatrix/math/getrs.h +121 -0
- data/ext/nmatrix/math/imax.h +82 -0
- data/ext/nmatrix/math/laswp.h +165 -0
- data/ext/nmatrix/math/long_dtype.h +62 -0
- data/ext/nmatrix/math/magnitude.h +54 -0
- data/ext/nmatrix/math/math.h +751 -0
- data/ext/nmatrix/math/nrm2.h +165 -0
- data/ext/nmatrix/math/rot.h +117 -0
- data/ext/nmatrix/math/rotg.h +106 -0
- data/ext/nmatrix/math/scal.h +71 -0
- data/ext/nmatrix/math/trsm.h +336 -0
- data/ext/nmatrix/math/util.h +162 -0
- data/ext/nmatrix/math.cpp +1368 -0
- data/ext/nmatrix/nm_memory.h +60 -0
- data/ext/nmatrix/nmatrix.cpp +285 -0
- data/ext/nmatrix/nmatrix.h +476 -0
- data/ext/nmatrix/ruby_constants.cpp +151 -0
- data/ext/nmatrix/ruby_constants.h +106 -0
- data/ext/nmatrix/ruby_nmatrix.c +3130 -0
- data/ext/nmatrix/storage/common.cpp +77 -0
- data/ext/nmatrix/storage/common.h +183 -0
- data/ext/nmatrix/storage/dense/dense.cpp +1096 -0
- data/ext/nmatrix/storage/dense/dense.h +129 -0
- data/ext/nmatrix/storage/list/list.cpp +1628 -0
- data/ext/nmatrix/storage/list/list.h +138 -0
- data/ext/nmatrix/storage/storage.cpp +730 -0
- data/ext/nmatrix/storage/storage.h +99 -0
- data/ext/nmatrix/storage/yale/class.h +1139 -0
- data/ext/nmatrix/storage/yale/iterators/base.h +143 -0
- data/ext/nmatrix/storage/yale/iterators/iterator.h +131 -0
- data/ext/nmatrix/storage/yale/iterators/row.h +450 -0
- data/ext/nmatrix/storage/yale/iterators/row_stored.h +140 -0
- data/ext/nmatrix/storage/yale/iterators/row_stored_nd.h +169 -0
- data/ext/nmatrix/storage/yale/iterators/stored_diagonal.h +124 -0
- data/ext/nmatrix/storage/yale/math/transpose.h +110 -0
- data/ext/nmatrix/storage/yale/yale.cpp +2074 -0
- data/ext/nmatrix/storage/yale/yale.h +203 -0
- data/ext/nmatrix/types.h +55 -0
- data/ext/nmatrix/util/io.cpp +279 -0
- data/ext/nmatrix/util/io.h +115 -0
- data/ext/nmatrix/util/sl_list.cpp +627 -0
- data/ext/nmatrix/util/sl_list.h +144 -0
- data/ext/nmatrix/util/util.h +78 -0
- data/lib/nmatrix/blas.rb +378 -0
- data/lib/nmatrix/cruby/math.rb +744 -0
- data/lib/nmatrix/enumerate.rb +253 -0
- data/lib/nmatrix/homogeneous.rb +241 -0
- data/lib/nmatrix/io/fortran_format.rb +138 -0
- data/lib/nmatrix/io/harwell_boeing.rb +221 -0
- data/lib/nmatrix/io/market.rb +263 -0
- data/lib/nmatrix/io/point_cloud.rb +189 -0
- data/lib/nmatrix/jruby/decomposition.rb +24 -0
- data/lib/nmatrix/jruby/enumerable.rb +13 -0
- data/lib/nmatrix/jruby/error.rb +4 -0
- data/lib/nmatrix/jruby/math.rb +501 -0
- data/lib/nmatrix/jruby/nmatrix_java.rb +840 -0
- data/lib/nmatrix/jruby/operators.rb +283 -0
- data/lib/nmatrix/jruby/slice.rb +264 -0
- data/lib/nmatrix/lapack_core.rb +181 -0
- data/lib/nmatrix/lapack_plugin.rb +44 -0
- data/lib/nmatrix/math.rb +953 -0
- data/lib/nmatrix/mkmf.rb +100 -0
- data/lib/nmatrix/monkeys.rb +137 -0
- data/lib/nmatrix/nmatrix.rb +1172 -0
- data/lib/nmatrix/rspec.rb +75 -0
- data/lib/nmatrix/shortcuts.rb +1163 -0
- data/lib/nmatrix/version.rb +39 -0
- data/lib/nmatrix/yale_functions.rb +118 -0
- data/lib/nmatrix.rb +28 -0
- data/spec/00_nmatrix_spec.rb +892 -0
- data/spec/01_enum_spec.rb +196 -0
- data/spec/02_slice_spec.rb +407 -0
- data/spec/03_nmatrix_monkeys_spec.rb +80 -0
- data/spec/2x2_dense_double.mat +0 -0
- data/spec/4x4_sparse.mat +0 -0
- data/spec/4x5_dense.mat +0 -0
- data/spec/blas_spec.rb +215 -0
- data/spec/elementwise_spec.rb +311 -0
- data/spec/homogeneous_spec.rb +100 -0
- data/spec/io/fortran_format_spec.rb +88 -0
- data/spec/io/harwell_boeing_spec.rb +98 -0
- data/spec/io/test.rua +9 -0
- data/spec/io_spec.rb +159 -0
- data/spec/lapack_core_spec.rb +482 -0
- data/spec/leakcheck.rb +16 -0
- data/spec/math_spec.rb +1363 -0
- data/spec/nmatrix_yale_resize_test_associations.yaml +2802 -0
- data/spec/nmatrix_yale_spec.rb +286 -0
- data/spec/rspec_monkeys.rb +56 -0
- data/spec/rspec_spec.rb +35 -0
- data/spec/shortcuts_spec.rb +474 -0
- data/spec/slice_set_spec.rb +162 -0
- data/spec/spec_helper.rb +172 -0
- data/spec/stat_spec.rb +214 -0
- data/spec/test.pcd +20 -0
- data/spec/utm5940.mtx +83844 -0
- metadata +295 -0
@@ -0,0 +1,1368 @@
|
|
1
|
+
/////////////////////////////////////////////////////////////////////
|
2
|
+
// = NMatrix
|
3
|
+
//
|
4
|
+
// A linear algebra library for scientific computation in Ruby.
|
5
|
+
// NMatrix is part of SciRuby.
|
6
|
+
//
|
7
|
+
// NMatrix was originally inspired by and derived from NArray, by
|
8
|
+
// Masahiro Tanaka: http://narray.rubyforge.org
|
9
|
+
//
|
10
|
+
// == Copyright Information
|
11
|
+
//
|
12
|
+
// SciRuby is Copyright (c) 2010 - present, Ruby Science Foundation
|
13
|
+
// NMatrix is Copyright (c) 2012 - present, John Woods and the Ruby Science Foundation
|
14
|
+
//
|
15
|
+
// Please see LICENSE.txt for additional copyright notices.
|
16
|
+
//
|
17
|
+
// == Contributing
|
18
|
+
//
|
19
|
+
// By contributing source code to SciRuby, you agree to be bound by
|
20
|
+
// our Contributor Agreement:
|
21
|
+
//
|
22
|
+
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
|
23
|
+
//
|
24
|
+
// == math.cpp
|
25
|
+
//
|
26
|
+
// Ruby-exposed CBLAS and LAPACK functions that are available without
|
27
|
+
// an external library.
|
28
|
+
//
|
29
|
+
// === Procedure for adding CBLAS functions to math.cpp/math.h:
|
30
|
+
//
|
31
|
+
// This procedure is written as if for a fictional function with double
|
32
|
+
// version dbacon, which we'll say is from CBLAS.
|
33
|
+
//
|
34
|
+
// 1. Write a default templated version which probably returns a boolean.
|
35
|
+
// Call it bacon, and put it in math.h.
|
36
|
+
//
|
37
|
+
// template <typename DType>
|
38
|
+
// bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, DType* A, ...) {
|
39
|
+
// rb_raise(rb_eNotImpError, "only implemented for ATLAS types (float32, float64, complex64, complex128)");
|
40
|
+
// }
|
41
|
+
//
|
42
|
+
// Make sure this is in namespace nm::math
|
43
|
+
//
|
44
|
+
// 2. In math.cpp, add a templated inline static version of the function which takes
|
45
|
+
// only void* pointers and uses static_cast to convert them to the
|
46
|
+
// proper dtype. This should also be in namespace nm::math
|
47
|
+
//
|
48
|
+
// This function may also need to switch m and n if these arguments are given.
|
49
|
+
//
|
50
|
+
// For an example, see cblas_gemm. This function should do nothing other than cast
|
51
|
+
// appropriately. If cblas_dbacon, cblas_sbacon, cblas_cbacon, and cblas_zbacon
|
52
|
+
// all take void* only, and no other pointers that vary between functions, you can skip
|
53
|
+
// this particular step -- as we can call them directly using a custom function pointer
|
54
|
+
// array (same function signature!).
|
55
|
+
//
|
56
|
+
// This version of the function will be the one exposed through NMatrix::BLAS. We
|
57
|
+
// want it to be as close to the actual BLAS version of the function as possible,
|
58
|
+
// and with as few checks as possible.
|
59
|
+
//
|
60
|
+
// You will probably need a forward declaration in the extern "C" block.
|
61
|
+
//
|
62
|
+
// Note: In that case, the function you wrote in Step 1 should also take exactly the
|
63
|
+
// same arguments as cblas_xbacon. Otherwise Bad Things will happen.
|
64
|
+
//
|
65
|
+
// 3. In cblas_templates_core.h, add a default template like in step 1 (which will just
|
66
|
+
// call nm::math::bacon()) and also
|
67
|
+
// inline specialized versions of bacon for the different BLAS types.
|
68
|
+
// This will allow both nmatrix-atlas and nmatrix-lapacke to use the optimized version
|
69
|
+
// of bacon from whatever external library is available, as well as the internal version
|
70
|
+
// if an external version is not available. These functions will end up in a namsespace
|
71
|
+
// like nm::math::atlas, but don't explicitly put them in a namespace, they will get
|
72
|
+
// put in the appropriate namespace when cblas_templates_core.h is included.
|
73
|
+
//
|
74
|
+
// template <typename DType>
|
75
|
+
// inline bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, DType* A, ...) {
|
76
|
+
// nm::math::bacon(trans, M, N, A, ...);
|
77
|
+
// }
|
78
|
+
//
|
79
|
+
// template <>
|
80
|
+
// inline bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, float* A, ...) {
|
81
|
+
// cblas_sbacon(trans, M, N, A, ...);
|
82
|
+
// return true;
|
83
|
+
// }
|
84
|
+
//
|
85
|
+
// Note that you should do everything in your power here to parse any return values
|
86
|
+
// cblas_sbacon may give you. We're not trying very hard in this example, but you might
|
87
|
+
// look at getrf to see how it might be done.
|
88
|
+
//
|
89
|
+
// 4. Write the C function nm_cblas_bacon, which is what Ruby will call. Use the example
|
90
|
+
// of nm_cblas_gemm below. Also you must add a similar function in math_atlas.cpp
|
91
|
+
// and math_lapacke.cpp
|
92
|
+
//
|
93
|
+
// 5. Expose the function in nm_math_init_blas(), in math.cpp:
|
94
|
+
//
|
95
|
+
// rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_bacon", (METHOD)nm_cblas_bacon, 5);
|
96
|
+
//
|
97
|
+
// Do something similar in math_atlas.cpp and math_lapacke.cpp to add the function
|
98
|
+
// to the plugin gems.
|
99
|
+
//
|
100
|
+
// Here, we're telling Ruby that nm_cblas_bacon takes five arguments as a Ruby function.
|
101
|
+
//
|
102
|
+
// 6. In blas.rb, write a bacon function which accesses cblas_bacon, but does all the
|
103
|
+
// sanity checks we left out in step 2.
|
104
|
+
//
|
105
|
+
// 7. Write tests for NMatrix::BLAS::bacon, confirming that it works for the ATLAS dtypes.
|
106
|
+
//
|
107
|
+
// 8. After you get it working properly with CBLAS, download dbacon.f from NETLIB, and use
|
108
|
+
// f2c to convert it to C. Clean it up so it's readable. Remove the extra indices -- f2c
|
109
|
+
// inserts a lot of unnecessary stuff.
|
110
|
+
//
|
111
|
+
// Copy and paste the output into the default templated function you wrote in Step 1.
|
112
|
+
// Fix it so it works as a template instead of just for doubles.
|
113
|
+
//
|
114
|
+
// Because of step 3, this will automatically also work for the nmatrix-atlas
|
115
|
+
// and nmatrix-lapacke implementations.
|
116
|
+
//
|
117
|
+
// 9. Write tests to confirm that it works for all data types.
|
118
|
+
//
|
119
|
+
// 10. See about adding a Ruby-like interface, such as matrix_matrix_multiply for cblas_gemm,
|
120
|
+
// or matrix_vector_multiply for cblas_gemv. This step is not mandatory.
|
121
|
+
//
|
122
|
+
// 11. Pull request!
|
123
|
+
|
124
|
+
/*
|
125
|
+
* Project Includes
|
126
|
+
*/
|
127
|
+
|
128
|
+
|
129
|
+
#include <ruby.h>
|
130
|
+
#include <algorithm>
|
131
|
+
#include <limits>
|
132
|
+
#include <cmath>
|
133
|
+
|
134
|
+
#include "math/cblas_enums.h"
|
135
|
+
|
136
|
+
#include "data/data.h"
|
137
|
+
#include "math/magnitude.h"
|
138
|
+
#include "math/imax.h"
|
139
|
+
#include "math/scal.h"
|
140
|
+
#include "math/laswp.h"
|
141
|
+
#include "math/trsm.h"
|
142
|
+
#include "math/gemm.h"
|
143
|
+
#include "math/gemv.h"
|
144
|
+
#include "math/asum.h"
|
145
|
+
#include "math/nrm2.h"
|
146
|
+
#include "math/getrf.h"
|
147
|
+
#include "math/getrs.h"
|
148
|
+
#include "math/rot.h"
|
149
|
+
#include "math/rotg.h"
|
150
|
+
#include "math/math.h"
|
151
|
+
#include "math/util.h"
|
152
|
+
#include "storage/dense/dense.h"
|
153
|
+
|
154
|
+
#include "nmatrix.h"
|
155
|
+
#include "ruby_constants.h"
|
156
|
+
|
157
|
+
/*
|
158
|
+
* Forward Declarations
|
159
|
+
*/
|
160
|
+
|
161
|
+
extern "C" {
|
162
|
+
/* BLAS Level 1. */
|
163
|
+
static VALUE nm_cblas_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx);
|
164
|
+
static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx);
|
165
|
+
static VALUE nm_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx);
|
166
|
+
static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s);
|
167
|
+
static VALUE nm_cblas_rotg(VALUE self, VALUE ab);
|
168
|
+
static VALUE nm_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx);
|
169
|
+
|
170
|
+
/* BLAS Level 2. */
|
171
|
+
static VALUE nm_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
|
172
|
+
VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);
|
173
|
+
|
174
|
+
/* BLAS Level 3. */
|
175
|
+
static VALUE nm_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha,
|
176
|
+
VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc);
|
177
|
+
static VALUE nm_cblas_trsm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
|
178
|
+
VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
|
179
|
+
|
180
|
+
/* LAPACK. */
|
181
|
+
static VALUE nm_has_clapack(VALUE self);
|
182
|
+
static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda);
|
183
|
+
static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb);
|
184
|
+
static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx);
|
185
|
+
} // end of extern "C" block
|
186
|
+
|
187
|
+
////////////////////
|
188
|
+
// Math Functions //
|
189
|
+
////////////////////
|
190
|
+
|
191
|
+
namespace nm {
|
192
|
+
namespace math {
|
193
|
+
|
194
|
+
/*
|
195
|
+
* Calculate the determinant for a dense matrix (A [elements]) of size 2 or 3. Return the result.
|
196
|
+
*/
|
197
|
+
template <typename DType>
|
198
|
+
void det_exact_from_dense(const int M, const void* A_elements, const int lda, void* result_arg) {
|
199
|
+
DType* result = reinterpret_cast<DType*>(result_arg);
|
200
|
+
const DType* A = reinterpret_cast<const DType*>(A_elements);
|
201
|
+
|
202
|
+
typename LongDType<DType>::type x, y;
|
203
|
+
|
204
|
+
if (M == 2) {
|
205
|
+
*result = A[0] * A[lda+1] - A[1] * A[lda];
|
206
|
+
} else if (M == 3) {
|
207
|
+
x = A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]; // ei - fh
|
208
|
+
y = A[lda] * A[2*lda+2] - A[lda+2] * A[2*lda]; // fg - di
|
209
|
+
x = A[0]*x - A[1]*y ; // a*(ei-fh) - b*(fg-di)
|
210
|
+
|
211
|
+
y = A[lda] * A[2*lda+1] - A[lda+1] * A[2*lda]; // dh - eg
|
212
|
+
*result = A[2]*y + x; // c*(dh-eg) + _
|
213
|
+
} else if (M < 2) {
|
214
|
+
rb_raise(rb_eArgError, "can only calculate exact determinant of a square matrix of size 2 or larger");
|
215
|
+
} else {
|
216
|
+
rb_raise(rb_eNotImpError, "exact determinant calculation needed for matrices larger than 3x3");
|
217
|
+
}
|
218
|
+
}
|
219
|
+
|
220
|
+
//we can't do det_exact on byte, because it will want to return a byte (unsigned), but determinants can be negative, even if all elements of the matrix are positive
|
221
|
+
template <>
|
222
|
+
void det_exact_from_dense<uint8_t>(const int M, const void* A_elements, const int lda, void* result_arg) {
|
223
|
+
rb_raise(nm_eDataTypeError, "cannot call det_exact on unsigned type");
|
224
|
+
}
|
225
|
+
/*
|
226
|
+
* Calculate the determinant for a yale matrix (storage) of size 2 or 3. Return the result.
|
227
|
+
*/
|
228
|
+
template <typename DType>
|
229
|
+
void det_exact_from_yale(const int M, const YALE_STORAGE* storage, const int lda, void* result_arg) {
|
230
|
+
DType* result = reinterpret_cast<DType*>(result_arg);
|
231
|
+
IType* ija = reinterpret_cast<IType *>(storage->ija);
|
232
|
+
DType* a = reinterpret_cast<DType*>(storage->a);
|
233
|
+
IType col_pos = storage->shape[0] + 1;
|
234
|
+
if (M == 2) {
|
235
|
+
if (ija[2] - ija[0] == 2) {
|
236
|
+
*result = a[0] * a[1] - a[col_pos] * a[col_pos+1];
|
237
|
+
}
|
238
|
+
else { *result = a[0] * a[1]; }
|
239
|
+
} else if (M == 3) {
|
240
|
+
DType m[3][3];
|
241
|
+
for (int i = 0; i < 3; ++i) {
|
242
|
+
m[i][i] = a[i];
|
243
|
+
switch(ija[i+1] - ija[i]) {
|
244
|
+
case 2:
|
245
|
+
m[i][ija[col_pos]] = a[col_pos];
|
246
|
+
m[i][ija[col_pos+1]] = a[col_pos+1];
|
247
|
+
col_pos += 2;
|
248
|
+
break;
|
249
|
+
case 1:
|
250
|
+
m[i][(i+1)%3] = m[i][(i+2)%3] = 0;
|
251
|
+
m[i][ija[col_pos]] = a[col_pos];
|
252
|
+
++col_pos;
|
253
|
+
break;
|
254
|
+
case 0:
|
255
|
+
m[i][(i+1)%3] = m[i][(i+2)%3] = 0;
|
256
|
+
break;
|
257
|
+
default:
|
258
|
+
rb_raise(rb_eArgError, "some value in IJA is incorrect!");
|
259
|
+
}
|
260
|
+
}
|
261
|
+
*result =
|
262
|
+
m[0][0] * m[1][1] * m[2][2] + m[0][1] * m[1][2] * m[2][0] + m[0][2] * m[1][0] * m[2][1]
|
263
|
+
- m[0][0] * m[1][2] * m[2][1] - m[0][1] * m[1][0] * m[2][2] - m[0][2] * m[1][1] * m[2][0];
|
264
|
+
|
265
|
+
} else if (M < 2) {
|
266
|
+
rb_raise(rb_eArgError, "can only calculate exact determinant of a square matrix of size 2 or larger");
|
267
|
+
} else {
|
268
|
+
rb_raise(rb_eNotImpError, "exact determinant calculation needed for matrices larger than 3x3");
|
269
|
+
}
|
270
|
+
}
|
271
|
+
|
272
|
+
/*
|
273
|
+
* Solve a system of linear equations using forward-substution followed by
|
274
|
+
* back substution from the LU factorization of the matrix of co-efficients.
|
275
|
+
* Replaces x_elements with the result. Works only with non-integer, non-object
|
276
|
+
* data types.
|
277
|
+
*
|
278
|
+
* args - r -> The number of rows of the matrix.
|
279
|
+
* lu_elements -> Elements of the LU decomposition of the co-efficients
|
280
|
+
* matrix, as a contiguos array.
|
281
|
+
* b_elements -> Elements of the the right hand sides, as a contiguous array.
|
282
|
+
* x_elements -> The array that will contain the results of the computation.
|
283
|
+
* pivot -> Positions of permuted rows.
|
284
|
+
*/
|
285
|
+
template <typename DType>
|
286
|
+
void solve(const int r, const void* lu_elements, const void* b_elements, void* x_elements, const int* pivot) {
|
287
|
+
int ii = 0, ip;
|
288
|
+
DType sum;
|
289
|
+
|
290
|
+
const DType* matrix = reinterpret_cast<const DType*>(lu_elements);
|
291
|
+
const DType* b = reinterpret_cast<const DType*>(b_elements);
|
292
|
+
DType* x = reinterpret_cast<DType*>(x_elements);
|
293
|
+
|
294
|
+
for (int i = 0; i < r; ++i) { x[i] = b[i]; }
|
295
|
+
for (int i = 0; i < r; ++i) { // forward substitution loop
|
296
|
+
ip = pivot[i];
|
297
|
+
sum = x[ip];
|
298
|
+
x[ip] = x[i];
|
299
|
+
|
300
|
+
if (ii != 0) {
|
301
|
+
for (int j = ii - 1;j < i; ++j) { sum = sum - matrix[i * r + j] * x[j]; }
|
302
|
+
}
|
303
|
+
else if (sum != 0.0) {
|
304
|
+
ii = i + 1;
|
305
|
+
}
|
306
|
+
x[i] = sum;
|
307
|
+
}
|
308
|
+
|
309
|
+
for (int i = r - 1; i >= 0; --i) { // back substitution loop
|
310
|
+
sum = x[i];
|
311
|
+
for (int j = i + 1; j < r; j++) { sum = sum - matrix[i * r + j] * x[j]; }
|
312
|
+
x[i] = sum/matrix[i * r + i];
|
313
|
+
}
|
314
|
+
}
|
315
|
+
|
316
|
+
/*
|
317
|
+
* Calculates in-place inverse of A_elements. Uses Gauss-Jordan elimination technique.
|
318
|
+
* In-place inversion of the matrix saves on memory and time.
|
319
|
+
*
|
320
|
+
* args - M - Shape of the matrix
|
321
|
+
* a_elements - A duplicate of the original expressed as a contiguos array
|
322
|
+
*/
|
323
|
+
template <typename DType>
|
324
|
+
void inverse(const int M, void* a_elements) {
|
325
|
+
DType* matrix = reinterpret_cast<DType*>(a_elements);
|
326
|
+
int row_index[M]; // arrays for keeping track of column scrambling
|
327
|
+
int col_index[M];
|
328
|
+
|
329
|
+
for (int k = 0;k < M; ++k) {
|
330
|
+
typename MagnitudeDType<DType>::type akk;
|
331
|
+
akk = magnitude( matrix[k * (M + 1)] ); // diagonal element
|
332
|
+
|
333
|
+
int interchange = k;
|
334
|
+
|
335
|
+
for (int row = k + 1; row < M; ++row) {
|
336
|
+
typename MagnitudeDType<DType>::type big;
|
337
|
+
big = magnitude( matrix[M*row + k] ); // element below the temp pivot
|
338
|
+
|
339
|
+
if ( big > akk ) {
|
340
|
+
interchange = row;
|
341
|
+
akk = big;
|
342
|
+
}
|
343
|
+
}
|
344
|
+
|
345
|
+
if (interchange != k) { // check if rows need flipping
|
346
|
+
DType temp;
|
347
|
+
|
348
|
+
for (int col = 0; col < M; ++col) {
|
349
|
+
NM_SWAP(matrix[interchange*M + col], matrix[k*M + col], temp);
|
350
|
+
}
|
351
|
+
}
|
352
|
+
|
353
|
+
row_index[k] = interchange;
|
354
|
+
col_index[k] = k;
|
355
|
+
|
356
|
+
if (matrix[k * (M + 1)] == (DType)(0)) {
|
357
|
+
rb_raise(rb_eZeroDivError, "Expected Non-Singular Matrix.");
|
358
|
+
}
|
359
|
+
|
360
|
+
DType pivot = matrix[k * (M + 1)];
|
361
|
+
matrix[k * (M + 1)] = (DType)(1); // set diagonal as 1 for in-place inversion
|
362
|
+
|
363
|
+
for (int col = 0; col < M; ++col) {
|
364
|
+
// divide each element in the kth row with the pivot
|
365
|
+
matrix[k*M + col] = matrix[k*M + col] / pivot;
|
366
|
+
}
|
367
|
+
|
368
|
+
for (int kk = 0; kk < M; ++kk) { // iterate and reduce all rows
|
369
|
+
if (kk == k) continue;
|
370
|
+
|
371
|
+
DType dum = matrix[k + M*kk];
|
372
|
+
matrix[k + M*kk] = (DType)(0); // prepare for inplace inversion
|
373
|
+
for (int col = 0; col < M; ++col) {
|
374
|
+
matrix[M*kk + col] = matrix[M*kk + col] - matrix[M*k + col] * dum;
|
375
|
+
}
|
376
|
+
}
|
377
|
+
}
|
378
|
+
|
379
|
+
// Unscramble columns
|
380
|
+
DType temp;
|
381
|
+
|
382
|
+
for (int k = M - 1; k >= 0; --k) {
|
383
|
+
if (row_index[k] != col_index[k]) {
|
384
|
+
|
385
|
+
for (int row = 0; row < M; ++row) {
|
386
|
+
NM_SWAP(matrix[row * M + row_index[k]], matrix[row * M + col_index[k]],
|
387
|
+
temp);
|
388
|
+
}
|
389
|
+
}
|
390
|
+
}
|
391
|
+
}
|
392
|
+
|
393
|
+
/*
|
394
|
+
* Reduce a square matrix to hessenberg form with householder transforms
|
395
|
+
*
|
396
|
+
* == Arguments
|
397
|
+
*
|
398
|
+
* nrows - The number of rows present in matrix a.
|
399
|
+
* a_elements - Elements of the matrix to be reduced in 1D array form.
|
400
|
+
*
|
401
|
+
* == References
|
402
|
+
*
|
403
|
+
* http://www.mymathlib.com/c_source/matrices/eigen/hessenberg_orthog.c
|
404
|
+
* This code has been included by permission of the author.
|
405
|
+
*/
|
406
|
+
template <typename DType>
|
407
|
+
void hessenberg(const int nrows, void* a_elements) {
|
408
|
+
DType* a = reinterpret_cast<DType*>(a_elements);
|
409
|
+
DType* u = new DType[nrows]; // auxillary storage for the chosen vector
|
410
|
+
DType sum_of_squares, *p_row, *psubdiag, *p_a, scale, innerproduct;
|
411
|
+
int i, k, col;
|
412
|
+
|
413
|
+
// For each column use a Householder transformation to zero all entries
|
414
|
+
// below the subdiagonal.
|
415
|
+
for (psubdiag = a + nrows, col = 0; col < nrows - 2; psubdiag += nrows + 1,
|
416
|
+
col++) {
|
417
|
+
// Calculate the signed square root of the sum of squares of the
|
418
|
+
// elements below the diagonal.
|
419
|
+
|
420
|
+
for (p_a = psubdiag, sum_of_squares = 0.0, i = col + 1; i < nrows;
|
421
|
+
p_a += nrows, i++) {
|
422
|
+
sum_of_squares += *p_a * *p_a;
|
423
|
+
}
|
424
|
+
if (sum_of_squares == 0.0) { continue; }
|
425
|
+
sum_of_squares = std::sqrt(sum_of_squares);
|
426
|
+
|
427
|
+
if ( *psubdiag >= 0.0 ) { sum_of_squares = -sum_of_squares; }
|
428
|
+
|
429
|
+
// Calculate the Householder transformation Q = I - 2uu'/u'u.
|
430
|
+
u[col + 1] = *psubdiag - sum_of_squares;
|
431
|
+
*psubdiag = sum_of_squares;
|
432
|
+
|
433
|
+
for (p_a = psubdiag + nrows, i = col + 2; i < nrows; p_a += nrows, i++) {
|
434
|
+
u[i] = *p_a;
|
435
|
+
*p_a = 0.0;
|
436
|
+
}
|
437
|
+
|
438
|
+
// Premultiply A by Q
|
439
|
+
scale = -1.0 / (sum_of_squares * u[col+1]);
|
440
|
+
for (p_row = psubdiag - col, i = col + 1; i < nrows; i++) {
|
441
|
+
p_a = a + nrows * (col + 1) + i;
|
442
|
+
for (innerproduct = 0.0, k = col + 1; k < nrows; p_a += nrows, k++) {
|
443
|
+
innerproduct += u[k] * *p_a;
|
444
|
+
}
|
445
|
+
innerproduct *= scale;
|
446
|
+
for (p_a = p_row + i, k = col + 1; k < nrows; p_a += nrows, k++) {
|
447
|
+
*p_a -= u[k] * innerproduct;
|
448
|
+
}
|
449
|
+
}
|
450
|
+
|
451
|
+
// Postmultiply QA by Q
|
452
|
+
for (p_row = a, i = 0; i < nrows; p_row += nrows, i++) {
|
453
|
+
for (innerproduct = 0.0, k = col + 1; k < nrows; k++) {
|
454
|
+
innerproduct += u[k] * *(p_row + k);
|
455
|
+
}
|
456
|
+
innerproduct *= scale;
|
457
|
+
|
458
|
+
for (k = col + 1; k < nrows; k++) {
|
459
|
+
*(p_row + k) -= u[k] * innerproduct;
|
460
|
+
}
|
461
|
+
}
|
462
|
+
}
|
463
|
+
|
464
|
+
delete[] u;
|
465
|
+
}
|
466
|
+
|
467
|
+
void raise_not_invertible_error() {
|
468
|
+
rb_raise(nm_eNotInvertibleError,
|
469
|
+
"matrix must have non-zero determinant to be invertible (not getting this error does not mean matrix is invertible if you're dealing with floating points)");
|
470
|
+
}
|
471
|
+
|
472
|
+
/*
|
473
|
+
* Calculate the exact inverse for a dense matrix (A [elements]) of size 2 or 3. Places the result in B_elements.
|
474
|
+
*/
|
475
|
+
template <typename DType>
|
476
|
+
void inverse_exact_from_dense(const int M, const void* A_elements,
|
477
|
+
const int lda, void* B_elements, const int ldb) {
|
478
|
+
|
479
|
+
const DType* A = reinterpret_cast<const DType*>(A_elements);
|
480
|
+
DType* B = reinterpret_cast<DType*>(B_elements);
|
481
|
+
|
482
|
+
if (M == 2) {
|
483
|
+
DType det = A[0] * A[lda+1] - A[1] * A[lda];
|
484
|
+
if (det == 0) { raise_not_invertible_error(); }
|
485
|
+
B[0] = A[lda+1] / det;
|
486
|
+
B[1] = -A[1] / det;
|
487
|
+
B[ldb] = -A[lda] / det;
|
488
|
+
B[ldb+1] = A[0] / det;
|
489
|
+
|
490
|
+
} else if (M == 3) {
|
491
|
+
// Calculate the exact determinant.
|
492
|
+
DType det;
|
493
|
+
det_exact_from_dense<DType>(M, A_elements, lda, reinterpret_cast<void*>(&det));
|
494
|
+
if (det == 0) { raise_not_invertible_error(); }
|
495
|
+
|
496
|
+
B[0] = ( A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]) / det; // A = ei - fh
|
497
|
+
B[1] = (- A[1] * A[2*lda+2] + A[2] * A[2*lda+1]) / det; // D = -bi + ch
|
498
|
+
B[2] = ( A[1] * A[lda+2] - A[2] * A[lda+1]) / det; // G = bf - ce
|
499
|
+
B[ldb] = (- A[lda] * A[2*lda+2] + A[lda+2] * A[2*lda]) / det; // B = -di + fg
|
500
|
+
B[ldb+1] = ( A[0] * A[2*lda+2] - A[2] * A[2*lda]) / det; // E = ai - cg
|
501
|
+
B[ldb+2] = (- A[0] * A[lda+2] + A[2] * A[lda]) / det; // H = -af + cd
|
502
|
+
B[2*ldb] = ( A[lda] * A[2*lda+1] - A[lda+1] * A[2*lda]) / det; // C = dh - eg
|
503
|
+
B[2*ldb+1]= ( -A[0] * A[2*lda+1] + A[1] * A[2*lda]) / det; // F = -ah + bg
|
504
|
+
B[2*ldb+2]= ( A[0] * A[lda+1] - A[1] * A[lda]) / det; // I = ae - bd
|
505
|
+
} else if (M == 1) {
|
506
|
+
B[0] = 1 / A[0];
|
507
|
+
} else {
|
508
|
+
rb_raise(rb_eNotImpError, "exact inverse calculation needed for matrices larger than 3x3");
|
509
|
+
}
|
510
|
+
}
|
511
|
+
|
512
|
+
template <typename DType>
|
513
|
+
void inverse_exact_from_yale(const int M, const YALE_STORAGE* storage,
|
514
|
+
const int lda, YALE_STORAGE* inverse, const int ldb) {
|
515
|
+
|
516
|
+
// inverse is a clone of storage
|
517
|
+
const DType* a = reinterpret_cast<const DType*>(storage->a);
|
518
|
+
const IType* ija = reinterpret_cast<const IType *>(storage->ija);
|
519
|
+
DType* b = reinterpret_cast<DType*>(inverse->a);
|
520
|
+
IType* ijb = reinterpret_cast<IType *>(inverse->ija);
|
521
|
+
IType col_pos = storage->shape[0] + 1;
|
522
|
+
// Calculate the exact determinant.
|
523
|
+
DType det;
|
524
|
+
|
525
|
+
if (M == 2) {
|
526
|
+
IType ndnz = ija[2] - ija[0];
|
527
|
+
if (ndnz == 2) {
|
528
|
+
det = a[0] * a[1] - a[col_pos] * a[col_pos+1];
|
529
|
+
}
|
530
|
+
else { det = a[0] * a[1]; }
|
531
|
+
if (det == 0) { raise_not_invertible_error(); }
|
532
|
+
b[0] = a[1] / det;
|
533
|
+
b[1] = a[0] / det;
|
534
|
+
if (ndnz == 2) {
|
535
|
+
b[col_pos] = -a[col_pos] / det;
|
536
|
+
b[col_pos+1] = -a[col_pos+1] / det;
|
537
|
+
}
|
538
|
+
else if (ndnz == 1) {
|
539
|
+
b[col_pos] = -a[col_pos] / det;
|
540
|
+
}
|
541
|
+
|
542
|
+
} else if (M == 3) {
|
543
|
+
DType *A = new DType[lda*3];
|
544
|
+
for (int i = 0; i < lda; ++i) {
|
545
|
+
A[i*3+i] = a[i];
|
546
|
+
switch (ija[i+1] - ija[i]) {
|
547
|
+
case 2:
|
548
|
+
A[i*3 + ija[col_pos]] = a[col_pos];
|
549
|
+
A[i*3 + ija[col_pos+1]] = a[col_pos+1];
|
550
|
+
col_pos += 2;
|
551
|
+
break;
|
552
|
+
case 1:
|
553
|
+
A[i*3 + (i+1)%3] = A[i*3 + (i+2)%3] = 0;
|
554
|
+
A[i*3 + ija[col_pos]] = a[col_pos];
|
555
|
+
col_pos += 1;
|
556
|
+
break;
|
557
|
+
case 0:
|
558
|
+
A[i*3 + (i+1)%3] = A[i*3 + (i+2)%3] = 0;
|
559
|
+
break;
|
560
|
+
default:
|
561
|
+
rb_raise(rb_eArgError, "some value in IJA is incorrect!");
|
562
|
+
}
|
563
|
+
}
|
564
|
+
det =
|
565
|
+
A[0] * A[lda+1] * A[2*lda+2] + A[1] * A[lda+2] * A[2*lda] + A[2] * A[lda] * A[2*lda+1]
|
566
|
+
- A[0] * A[lda+2] * A[2*lda+1] - A[1] * A[lda] * A[2*lda+2] - A[2] * A[lda+1] * A[2*lda];
|
567
|
+
if (det == 0) { raise_not_invertible_error(); }
|
568
|
+
|
569
|
+
DType *B = new DType[3*ldb];
|
570
|
+
B[0] = ( A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]) / det; // A = ei - fh
|
571
|
+
B[1] = (- A[1] * A[2*lda+2] + A[2] * A[2*lda+1]) / det; // D = -bi + ch
|
572
|
+
B[2] = ( A[1] * A[lda+2] - A[2] * A[lda+1]) / det; // G = bf - ce
|
573
|
+
B[ldb] = (- A[lda] * A[2*lda+2] + A[lda+2] * A[2*lda]) / det; // B = -di + fg
|
574
|
+
B[ldb+1] = ( A[0] * A[2*lda+2] - A[2] * A[2*lda]) / det; // E = ai - cg
|
575
|
+
B[ldb+2] = (- A[0] * A[lda+2] + A[2] * A[lda]) / det; // H = -af + cd
|
576
|
+
B[2*ldb] = ( A[lda] * A[2*lda+1] - A[lda+1] * A[2*lda]) / det; // C = dh - eg
|
577
|
+
B[2*ldb+1]= ( -A[0] * A[2*lda+1] + A[1] * A[2*lda]) / det; // F = -ah + bg
|
578
|
+
B[2*ldb+2]= ( A[0] * A[lda+1] - A[1] * A[lda]) / det; // I = ae - bd
|
579
|
+
|
580
|
+
// Calculate the size of ijb and b, then reallocate them.
|
581
|
+
IType ndnz = 0;
|
582
|
+
for (int i = 0; i < 3; ++i) {
|
583
|
+
for (int j = 0; j < 3; ++j) {
|
584
|
+
if (j != i && B[i*ldb + j] != 0) { ++ndnz; }
|
585
|
+
}
|
586
|
+
}
|
587
|
+
inverse->ndnz = ndnz;
|
588
|
+
col_pos = 4; // shape[0] + 1
|
589
|
+
inverse->capacity = 4 + ndnz;
|
590
|
+
NM_REALLOC_N(inverse->a, DType, 4 + ndnz);
|
591
|
+
NM_REALLOC_N(inverse->ija, IType, 4 + ndnz);
|
592
|
+
b = reinterpret_cast<DType*>(inverse->a);
|
593
|
+
ijb = reinterpret_cast<IType *>(inverse->ija);
|
594
|
+
|
595
|
+
for (int i = 0; i < 3; ++i) {
|
596
|
+
ijb[i] = col_pos;
|
597
|
+
for (int j = 0; j < 3; ++j) {
|
598
|
+
if (j == i) {
|
599
|
+
b[i] = B[i*ldb + j];
|
600
|
+
}
|
601
|
+
else if (B[i*ldb + j] != 0) {
|
602
|
+
b[col_pos] = B[i*ldb + j];
|
603
|
+
ijb[col_pos] = j;
|
604
|
+
++col_pos;
|
605
|
+
}
|
606
|
+
}
|
607
|
+
}
|
608
|
+
b[3] = 0;
|
609
|
+
ijb[3] = col_pos;
|
610
|
+
delete [] B;
|
611
|
+
delete [] A;
|
612
|
+
} else if (M == 1) {
|
613
|
+
b[0] = 1 / a[0];
|
614
|
+
} else {
|
615
|
+
rb_raise(rb_eNotImpError, "exact inverse calculation needed for matrices larger than 3x3");
|
616
|
+
}
|
617
|
+
}
|
618
|
+
|
619
|
+
/*
|
620
|
+
* Function signature conversion for calling CBLAS' gemm functions as directly as possible.
|
621
|
+
*
|
622
|
+
* For documentation: http://www.netlib.org/blas/dgemm.f
|
623
|
+
*/
|
624
|
+
template <typename DType>
|
625
|
+
inline static void cblas_gemm(const enum CBLAS_ORDER order,
|
626
|
+
const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b,
|
627
|
+
int m, int n, int k,
|
628
|
+
void* alpha,
|
629
|
+
void* a, int lda,
|
630
|
+
void* b, int ldb,
|
631
|
+
void* beta,
|
632
|
+
void* c, int ldc)
|
633
|
+
{
|
634
|
+
gemm<DType>(order, trans_a, trans_b, m, n, k, reinterpret_cast<DType*>(alpha),
|
635
|
+
reinterpret_cast<DType*>(a), lda,
|
636
|
+
reinterpret_cast<DType*>(b), ldb, reinterpret_cast<DType*>(beta),
|
637
|
+
reinterpret_cast<DType*>(c), ldc);
|
638
|
+
}
|
639
|
+
|
640
|
+
|
641
|
+
/*
|
642
|
+
* Function signature conversion for calling CBLAS's gemv functions as directly as possible.
|
643
|
+
*
|
644
|
+
* For documentation: http://www.netlib.org/lapack/double/dgetrf.f
|
645
|
+
*/
|
646
|
+
template <typename DType>
|
647
|
+
inline static bool cblas_gemv(const enum CBLAS_TRANSPOSE trans,
|
648
|
+
const int m, const int n,
|
649
|
+
const void* alpha,
|
650
|
+
const void* a, const int lda,
|
651
|
+
const void* x, const int incx,
|
652
|
+
const void* beta,
|
653
|
+
void* y, const int incy)
|
654
|
+
{
|
655
|
+
return gemv<DType>(trans,
|
656
|
+
m, n, reinterpret_cast<const DType*>(alpha),
|
657
|
+
reinterpret_cast<const DType*>(a), lda,
|
658
|
+
reinterpret_cast<const DType*>(x), incx, reinterpret_cast<const DType*>(beta),
|
659
|
+
reinterpret_cast<DType*>(y), incy);
|
660
|
+
}
|
661
|
+
|
662
|
+
|
663
|
+
/*
|
664
|
+
* Function signature conversion for calling CBLAS' trsm functions as directly as possible.
|
665
|
+
*
|
666
|
+
* For documentation: http://www.netlib.org/blas/dtrsm.f
|
667
|
+
*/
|
668
|
+
template <typename DType>
|
669
|
+
inline static void cblas_trsm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
670
|
+
const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_DIAG diag,
|
671
|
+
const int m, const int n, const void* alpha, const void* a,
|
672
|
+
const int lda, void* b, const int ldb)
|
673
|
+
{
|
674
|
+
trsm<DType>(order, side, uplo, trans_a, diag, m, n, *reinterpret_cast<const DType*>(alpha),
|
675
|
+
reinterpret_cast<const DType*>(a), lda, reinterpret_cast<DType*>(b), ldb);
|
676
|
+
}
|
677
|
+
|
678
|
+
}
|
679
|
+
} // end of namespace nm::math
|
680
|
+
|
681
|
+
|
682
|
+
extern "C" {
|
683
|
+
|
684
|
+
///////////////////
|
685
|
+
// Ruby Bindings //
|
686
|
+
///////////////////
|
687
|
+
|
688
|
+
void nm_math_init_blas() {
|
689
|
+
VALUE cNMatrix_Internal = rb_define_module_under(cNMatrix, "Internal");
|
690
|
+
|
691
|
+
rb_define_singleton_method(cNMatrix, "has_clapack?", (METHOD)nm_has_clapack, 0);
|
692
|
+
|
693
|
+
VALUE cNMatrix_Internal_LAPACK = rb_define_module_under(cNMatrix_Internal, "LAPACK");
|
694
|
+
|
695
|
+
/* ATLAS-CLAPACK Functions that are implemented internally */
|
696
|
+
rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_getrf", (METHOD)nm_clapack_getrf, 5);
|
697
|
+
rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_getrs", (METHOD)nm_clapack_getrs, 9);
|
698
|
+
rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_laswp", (METHOD)nm_clapack_laswp, 7);
|
699
|
+
|
700
|
+
VALUE cNMatrix_Internal_BLAS = rb_define_module_under(cNMatrix_Internal, "BLAS");
|
701
|
+
|
702
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_scal", (METHOD)nm_cblas_scal, 4);
|
703
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_nrm2", (METHOD)nm_cblas_nrm2, 3);
|
704
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_asum", (METHOD)nm_cblas_asum, 3);
|
705
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_rot", (METHOD)nm_cblas_rot, 7);
|
706
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_rotg", (METHOD)nm_cblas_rotg, 1);
|
707
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_imax", (METHOD)nm_cblas_imax, 3);
|
708
|
+
|
709
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_gemm", (METHOD)nm_cblas_gemm, 14);
|
710
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11);
|
711
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_trsm", (METHOD)nm_cblas_trsm, 12);
|
712
|
+
}
|
713
|
+
|
714
|
+
/*
|
715
|
+
* call-seq:
|
716
|
+
* NMatrix::BLAS.cblas_scal(n, alpha, vector, inc) -> NMatrix
|
717
|
+
*
|
718
|
+
* BLAS level 1 function +scal+. Works with all dtypes.
|
719
|
+
*
|
720
|
+
* Scale +vector+ in-place by +alpha+ and also return it. The operation is as
|
721
|
+
* follows:
|
722
|
+
* x <- alpha * x
|
723
|
+
*
|
724
|
+
* - +n+ -> Number of elements of +vector+.
|
725
|
+
* - +alpha+ -> Scalar value used in the operation.
|
726
|
+
* - +vector+ -> NMatrix of shape [n,1] or [1,n]. Modified in-place.
|
727
|
+
* - +inc+ -> Increment used in the scaling function. Should generally be 1.
|
728
|
+
*/
|
729
|
+
static VALUE nm_cblas_scal(VALUE self, VALUE n, VALUE alpha, VALUE vector, VALUE incx) {
|
730
|
+
nm::dtype_t dtype = NM_DTYPE(vector);
|
731
|
+
|
732
|
+
void* scalar = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
733
|
+
rubyval_to_cval(alpha, dtype, scalar);
|
734
|
+
|
735
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_scal, void, const int n,
|
736
|
+
const void* scalar, void* x, const int incx);
|
737
|
+
|
738
|
+
ttable[dtype](FIX2INT(n), scalar, NM_STORAGE_DENSE(vector)->elements,
|
739
|
+
FIX2INT(incx));
|
740
|
+
|
741
|
+
return vector;
|
742
|
+
}
|
743
|
+
|
744
|
+
/*
|
745
|
+
* Call any of the cblas_xrotg functions as directly as possible.
|
746
|
+
*
|
747
|
+
* xROTG computes the elements of a Givens plane rotation matrix such that:
|
748
|
+
*
|
749
|
+
* | c s | | a | | r |
|
750
|
+
* | -s c | * | b | = | 0 |
|
751
|
+
*
|
752
|
+
* where r = +- sqrt( a**2 + b**2 ) and c**2 + s**2 = 1.
|
753
|
+
*
|
754
|
+
* The Givens plane rotation can be used to introduce zero elements into a matrix selectively.
|
755
|
+
*
|
756
|
+
* This function differs from most of the other raw BLAS accessors. Instead of
|
757
|
+
* providing a, b, c, s as arguments, you should only provide a and b (the
|
758
|
+
* inputs), and you should provide them as the first two elements of any dense
|
759
|
+
* NMatrix type.
|
760
|
+
*
|
761
|
+
* The outputs [c,s] will be returned in a Ruby Array at the end; the input
|
762
|
+
* NMatrix will also be modified in-place.
|
763
|
+
*
|
764
|
+
* This function, like the other cblas_ functions, does minimal type-checking.
|
765
|
+
*/
|
766
|
+
static VALUE nm_cblas_rotg(VALUE self, VALUE ab) {
|
767
|
+
static void (*ttable[nm::NUM_DTYPES])(void* a, void* b, void* c, void* s) = {
|
768
|
+
NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
|
769
|
+
nm::math::cblas_rotg<float>,
|
770
|
+
nm::math::cblas_rotg<double>,
|
771
|
+
nm::math::cblas_rotg<nm::Complex64>,
|
772
|
+
nm::math::cblas_rotg<nm::Complex128>,
|
773
|
+
NULL //nm::math::cblas_rotg<nm::RubyObject>
|
774
|
+
};
|
775
|
+
|
776
|
+
nm::dtype_t dtype = NM_DTYPE(ab);
|
777
|
+
|
778
|
+
if (!ttable[dtype]) {
|
779
|
+
rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
|
780
|
+
return Qnil;
|
781
|
+
|
782
|
+
} else {
|
783
|
+
NM_CONSERVATIVE(nm_register_value(&self));
|
784
|
+
NM_CONSERVATIVE(nm_register_value(&ab));
|
785
|
+
void *pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
|
786
|
+
*pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
787
|
+
|
788
|
+
// extract A and B from the NVector (first two elements)
|
789
|
+
void* pA = NM_STORAGE_DENSE(ab)->elements;
|
790
|
+
void* pB = (char*)(NM_STORAGE_DENSE(ab)->elements) + DTYPE_SIZES[dtype];
|
791
|
+
// c and s are output
|
792
|
+
|
793
|
+
ttable[dtype](pA, pB, pC, pS);
|
794
|
+
|
795
|
+
VALUE result = rb_ary_new2(2);
|
796
|
+
|
797
|
+
if (dtype == nm::RUBYOBJ) {
|
798
|
+
rb_ary_store(result, 0, *reinterpret_cast<VALUE*>(pC));
|
799
|
+
rb_ary_store(result, 1, *reinterpret_cast<VALUE*>(pS));
|
800
|
+
} else {
|
801
|
+
rb_ary_store(result, 0, nm::rubyobj_from_cval(pC, dtype).rval);
|
802
|
+
rb_ary_store(result, 1, nm::rubyobj_from_cval(pS, dtype).rval);
|
803
|
+
}
|
804
|
+
NM_CONSERVATIVE(nm_unregister_value(&ab));
|
805
|
+
NM_CONSERVATIVE(nm_unregister_value(&self));
|
806
|
+
return result;
|
807
|
+
}
|
808
|
+
}
|
809
|
+
|
810
|
+
|
811
|
+
/*
|
812
|
+
* Call any of the cblas_xrot functions as directly as possible.
|
813
|
+
*
|
814
|
+
* xROT is a BLAS level 1 routine (taking two vectors) which applies a plane rotation.
|
815
|
+
*
|
816
|
+
* It's tough to find documentation on xROT. Here are what we think the arguments are for:
|
817
|
+
* * n :: number of elements to consider in x and y
|
818
|
+
* * x :: a vector (expects an NVector)
|
819
|
+
* * incx :: stride of x
|
820
|
+
* * y :: a vector (expects an NVector)
|
821
|
+
* * incy :: stride of y
|
822
|
+
* * c :: cosine of the angle of rotation
|
823
|
+
* * s :: sine of the angle of rotation
|
824
|
+
*
|
825
|
+
* Note that c and s will be the same dtype as x and y, except when x and y are complex. If x and y are complex, c and s
|
826
|
+
* will be float for Complex64 or double for Complex128.
|
827
|
+
*
|
828
|
+
* You probably don't want to call this function. Instead, why don't you try rot, which is more flexible
|
829
|
+
* with its arguments?
|
830
|
+
*
|
831
|
+
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
832
|
+
* handling, so you can easily crash Ruby!
|
833
|
+
*/
|
834
|
+
static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s) {
|
835
|
+
static void (*ttable[nm::NUM_DTYPES])(const int N, void*, const int, void*, const int, const void*, const void*) = {
|
836
|
+
NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
|
837
|
+
nm::math::cblas_rot<float,float>,
|
838
|
+
nm::math::cblas_rot<double,double>,
|
839
|
+
nm::math::cblas_rot<nm::Complex64,float>,
|
840
|
+
nm::math::cblas_rot<nm::Complex128,double>,
|
841
|
+
nm::math::cblas_rot<nm::RubyObject,nm::RubyObject>
|
842
|
+
};
|
843
|
+
|
844
|
+
nm::dtype_t dtype = NM_DTYPE(x);
|
845
|
+
|
846
|
+
|
847
|
+
if (!ttable[dtype]) {
|
848
|
+
rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
|
849
|
+
return Qfalse;
|
850
|
+
} else {
|
851
|
+
void *pC, *pS;
|
852
|
+
|
853
|
+
// We need to ensure the cosine and sine arguments are the correct dtype -- which may differ from the actual dtype.
|
854
|
+
if (dtype == nm::COMPLEX64) {
|
855
|
+
pC = NM_ALLOCA_N(float,1);
|
856
|
+
pS = NM_ALLOCA_N(float,1);
|
857
|
+
rubyval_to_cval(c, nm::FLOAT32, pC);
|
858
|
+
rubyval_to_cval(s, nm::FLOAT32, pS);
|
859
|
+
} else if (dtype == nm::COMPLEX128) {
|
860
|
+
pC = NM_ALLOCA_N(double,1);
|
861
|
+
pS = NM_ALLOCA_N(double,1);
|
862
|
+
rubyval_to_cval(c, nm::FLOAT64, pC);
|
863
|
+
rubyval_to_cval(s, nm::FLOAT64, pS);
|
864
|
+
} else {
|
865
|
+
pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
866
|
+
pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
867
|
+
rubyval_to_cval(c, dtype, pC);
|
868
|
+
rubyval_to_cval(s, dtype, pS);
|
869
|
+
}
|
870
|
+
|
871
|
+
|
872
|
+
ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), NM_STORAGE_DENSE(y)->elements, FIX2INT(incy), pC, pS);
|
873
|
+
|
874
|
+
return Qtrue;
|
875
|
+
}
|
876
|
+
}
|
877
|
+
|
878
|
+
|
879
|
+
/*
|
880
|
+
* Call any of the cblas_xnrm2 functions as directly as possible.
|
881
|
+
*
|
882
|
+
* xNRM2 is a BLAS level 1 routine which calculates the 2-norm of an n-vector x.
|
883
|
+
*
|
884
|
+
* Arguments:
|
885
|
+
* * n :: length of x, must be at least 0
|
886
|
+
* * x :: pointer to first entry of input vector
|
887
|
+
* * incx :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
|
888
|
+
*
|
889
|
+
* You probably don't want to call this function. Instead, why don't you try nrm2, which is more flexible
|
890
|
+
* with its arguments?
|
891
|
+
*
|
892
|
+
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
893
|
+
* handling, so you can easily crash Ruby!
|
894
|
+
*/
|
895
|
+
static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx) {
|
896
|
+
|
897
|
+
static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
|
898
|
+
NULL, NULL, NULL, NULL, NULL, // no help for integers
|
899
|
+
nm::math::cblas_nrm2<float32_t>,
|
900
|
+
nm::math::cblas_nrm2<float64_t>,
|
901
|
+
nm::math::cblas_nrm2<nm::Complex64>,
|
902
|
+
nm::math::cblas_nrm2<nm::Complex128>,
|
903
|
+
nm::math::cblas_nrm2<nm::RubyObject>
|
904
|
+
};
|
905
|
+
|
906
|
+
nm::dtype_t dtype = NM_DTYPE(x);
|
907
|
+
|
908
|
+
if (!ttable[dtype]) {
|
909
|
+
rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
|
910
|
+
return Qnil;
|
911
|
+
|
912
|
+
} else {
|
913
|
+
// Determine the return dtype and allocate it
|
914
|
+
nm::dtype_t rdtype = dtype;
|
915
|
+
if (dtype == nm::COMPLEX64) rdtype = nm::FLOAT32;
|
916
|
+
else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;
|
917
|
+
|
918
|
+
void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);
|
919
|
+
|
920
|
+
ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
|
921
|
+
|
922
|
+
return nm::rubyobj_from_cval(Result, rdtype).rval;
|
923
|
+
}
|
924
|
+
}
|
925
|
+
|
926
|
+
|
927
|
+
|
928
|
+
/*
|
929
|
+
* Call any of the cblas_xasum functions as directly as possible.
|
930
|
+
*
|
931
|
+
* xASUM is a BLAS level 1 routine which calculates the sum of absolute values of the entries
|
932
|
+
* of a vector x.
|
933
|
+
*
|
934
|
+
* Arguments:
|
935
|
+
* * n :: length of x, must be at least 0
|
936
|
+
* * x :: pointer to first entry of input vector
|
937
|
+
* * incx :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
|
938
|
+
*
|
939
|
+
* You probably don't want to call this function. Instead, why don't you try asum, which is more flexible
|
940
|
+
* with its arguments?
|
941
|
+
*
|
942
|
+
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
943
|
+
* handling, so you can easily crash Ruby!
|
944
|
+
*/
|
945
|
+
static VALUE nm_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx) {
|
946
|
+
|
947
|
+
static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
|
948
|
+
nm::math::cblas_asum<uint8_t>,
|
949
|
+
nm::math::cblas_asum<int8_t>,
|
950
|
+
nm::math::cblas_asum<int16_t>,
|
951
|
+
nm::math::cblas_asum<int32_t>,
|
952
|
+
nm::math::cblas_asum<int64_t>,
|
953
|
+
nm::math::cblas_asum<float32_t>,
|
954
|
+
nm::math::cblas_asum<float64_t>,
|
955
|
+
nm::math::cblas_asum<nm::Complex64>,
|
956
|
+
nm::math::cblas_asum<nm::Complex128>,
|
957
|
+
nm::math::cblas_asum<nm::RubyObject>
|
958
|
+
};
|
959
|
+
|
960
|
+
nm::dtype_t dtype = NM_DTYPE(x);
|
961
|
+
|
962
|
+
// Determine the return dtype and allocate it
|
963
|
+
nm::dtype_t rdtype = dtype;
|
964
|
+
if (dtype == nm::COMPLEX64) rdtype = nm::FLOAT32;
|
965
|
+
else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;
|
966
|
+
|
967
|
+
void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);
|
968
|
+
|
969
|
+
ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
|
970
|
+
|
971
|
+
return nm::rubyobj_from_cval(Result, rdtype).rval;
|
972
|
+
}
|
973
|
+
|
974
|
+
/*
|
975
|
+
* call-seq:
|
976
|
+
* NMatrix::BLAS.cblas_imax(n, vector, inc) -> Fixnum
|
977
|
+
*
|
978
|
+
* BLAS level 1 routine.
|
979
|
+
*
|
980
|
+
* Return the index of the largest element of +vector+.
|
981
|
+
*
|
982
|
+
* - +n+ -> Vector's size. Generally, you can use NMatrix#rows or NMatrix#cols.
|
983
|
+
* - +vector+ -> A NMatrix of shape [n,1] or [1,n] with any dtype.
|
984
|
+
* - +inc+ -> It's the increment used when searching. Use 1 except if you know
|
985
|
+
* what you're doing.
|
986
|
+
*/
|
987
|
+
static VALUE nm_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx) {
|
988
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_imax, int, const int n, const void* x, const int incx);
|
989
|
+
|
990
|
+
nm::dtype_t dtype = NM_DTYPE(x);
|
991
|
+
|
992
|
+
int index = ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx));
|
993
|
+
|
994
|
+
// Convert to Ruby's Int value.
|
995
|
+
return INT2FIX(index);
|
996
|
+
}
|
997
|
+
|
998
|
+
|
999
|
+
/* Call any of the cblas_xgemm functions as directly as possible.
|
1000
|
+
*
|
1001
|
+
* The cblas_xgemm functions (dgemm, sgemm, cgemm, and zgemm) define the following operation:
|
1002
|
+
*
|
1003
|
+
* C = alpha*op(A)*op(B) + beta*C
|
1004
|
+
*
|
1005
|
+
* where op(X) is one of <tt>op(X) = X</tt>, <tt>op(X) = X**T</tt>, or the complex conjugate of X.
|
1006
|
+
*
|
1007
|
+
* Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
|
1008
|
+
* Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
|
1009
|
+
* expose the ultra-optimized ATLAS versions.
|
1010
|
+
*
|
1011
|
+
* == Arguments
|
1012
|
+
* See: http://www.netlib.org/blas/dgemm.f
|
1013
|
+
*
|
1014
|
+
* You probably don't want to call this function. Instead, why don't you try gemm, which is more flexible
|
1015
|
+
* with its arguments?
|
1016
|
+
*
|
1017
|
+
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
1018
|
+
* handling, so you can easily crash Ruby!
|
1019
|
+
*/
|
1020
|
+
static VALUE nm_cblas_gemm(VALUE self,
|
1021
|
+
VALUE order,
|
1022
|
+
VALUE trans_a, VALUE trans_b,
|
1023
|
+
VALUE m, VALUE n, VALUE k,
|
1024
|
+
VALUE alpha,
|
1025
|
+
VALUE a, VALUE lda,
|
1026
|
+
VALUE b, VALUE ldb,
|
1027
|
+
VALUE beta,
|
1028
|
+
VALUE c, VALUE ldc)
|
1029
|
+
{
|
1030
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemm, void, const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, int m, int n, int k, void* alpha, void* a, int lda, void* b, int ldb, void* beta, void* c, int ldc);
|
1031
|
+
|
1032
|
+
nm::dtype_t dtype = NM_DTYPE(a);
|
1033
|
+
|
1034
|
+
void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
|
1035
|
+
*pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
1036
|
+
rubyval_to_cval(alpha, dtype, pAlpha);
|
1037
|
+
rubyval_to_cval(beta, dtype, pBeta);
|
1038
|
+
|
1039
|
+
ttable[dtype](blas_order_sym(order), blas_transpose_sym(trans_a), blas_transpose_sym(trans_b), FIX2INT(m), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
|
1040
|
+
|
1041
|
+
return c;
|
1042
|
+
}
|
1043
|
+
|
1044
|
+
|
1045
|
+
/* Call any of the cblas_xgemv functions as directly as possible.
|
1046
|
+
*
|
1047
|
+
* The cblas_xgemv functions (dgemv, sgemv, cgemv, and zgemv) define the following operation:
|
1048
|
+
*
|
1049
|
+
* y = alpha*op(A)*x + beta*y
|
1050
|
+
*
|
1051
|
+
* where op(A) is one of <tt>op(A) = A</tt>, <tt>op(A) = A**T</tt>, or the complex conjugate of A.
|
1052
|
+
*
|
1053
|
+
* Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
|
1054
|
+
* Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
|
1055
|
+
* expose the ultra-optimized ATLAS versions.
|
1056
|
+
*
|
1057
|
+
* == Arguments
|
1058
|
+
* See: http://www.netlib.org/blas/dgemm.f
|
1059
|
+
*
|
1060
|
+
* You probably don't want to call this function. Instead, why don't you try cblas_gemv, which is more flexible
|
1061
|
+
* with its arguments?
|
1062
|
+
*
|
1063
|
+
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
1064
|
+
* handling, so you can easily crash Ruby!
|
1065
|
+
*/
|
1066
|
+
static VALUE nm_cblas_gemv(VALUE self,
|
1067
|
+
VALUE trans_a,
|
1068
|
+
VALUE m, VALUE n,
|
1069
|
+
VALUE alpha,
|
1070
|
+
VALUE a, VALUE lda,
|
1071
|
+
VALUE x, VALUE incx,
|
1072
|
+
VALUE beta,
|
1073
|
+
VALUE y, VALUE incy)
|
1074
|
+
{
|
1075
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemv, bool, const enum CBLAS_TRANSPOSE, const int, const int, const void*, const void*, const int, const void*, const int, const void*, void*, const int)
|
1076
|
+
|
1077
|
+
nm::dtype_t dtype = NM_DTYPE(a);
|
1078
|
+
|
1079
|
+
void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
|
1080
|
+
*pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
1081
|
+
rubyval_to_cval(alpha, dtype, pAlpha);
|
1082
|
+
rubyval_to_cval(beta, dtype, pBeta);
|
1083
|
+
|
1084
|
+
return ttable[dtype](blas_transpose_sym(trans_a), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), pBeta, NM_STORAGE_DENSE(y)->elements, FIX2INT(incy)) ? Qtrue : Qfalse;
|
1085
|
+
}
|
1086
|
+
|
1087
|
+
|
1088
|
+
static VALUE nm_cblas_trsm(VALUE self,
|
1089
|
+
VALUE order,
|
1090
|
+
VALUE side, VALUE uplo,
|
1091
|
+
VALUE trans_a, VALUE diag,
|
1092
|
+
VALUE m, VALUE n,
|
1093
|
+
VALUE alpha,
|
1094
|
+
VALUE a, VALUE lda,
|
1095
|
+
VALUE b, VALUE ldb)
|
1096
|
+
{
|
1097
|
+
static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_SIDE, const enum CBLAS_UPLO,
|
1098
|
+
const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
|
1099
|
+
const int m, const int n, const void* alpha, const void* a,
|
1100
|
+
const int lda, void* b, const int ldb) = {
|
1101
|
+
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
1102
|
+
nm::math::cblas_trsm<float>,
|
1103
|
+
nm::math::cblas_trsm<double>,
|
1104
|
+
nm::math::cblas_trsm<nm::Complex64>,
|
1105
|
+
nm::math::cblas_trsm<nm::Complex128>,
|
1106
|
+
nm::math::cblas_trsm<nm::RubyObject>
|
1107
|
+
};
|
1108
|
+
|
1109
|
+
nm::dtype_t dtype = NM_DTYPE(a);
|
1110
|
+
|
1111
|
+
if (!ttable[dtype]) {
|
1112
|
+
rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
1113
|
+
} else {
|
1114
|
+
void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
1115
|
+
rubyval_to_cval(alpha, dtype, pAlpha);
|
1116
|
+
|
1117
|
+
ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
|
1118
|
+
}
|
1119
|
+
|
1120
|
+
return Qtrue;
|
1121
|
+
}
|
1122
|
+
|
1123
|
+
/* Call any of the clapack_xgetrf functions as directly as possible.
|
1124
|
+
*
|
1125
|
+
* The clapack_getrf functions (dgetrf, sgetrf, cgetrf, and zgetrf) compute an LU factorization of a general M-by-N
|
1126
|
+
* matrix A using partial pivoting with row interchanges.
|
1127
|
+
*
|
1128
|
+
* The factorization has the form:
|
1129
|
+
* A = P * L * U
|
1130
|
+
* where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n),
|
1131
|
+
* and U is upper triangular (upper trapezoidal if m < n).
|
1132
|
+
*
|
1133
|
+
* This is the right-looking level 3 BLAS version of the algorithm.
|
1134
|
+
*
|
1135
|
+
* == Arguments
|
1136
|
+
* See: http://www.netlib.org/lapack/double/dgetrf.f
|
1137
|
+
* (You don't need argument 5; this is the value returned by this function.)
|
1138
|
+
*
|
1139
|
+
* You probably don't want to call this function. Instead, why don't you try clapack_getrf, which is more flexible
|
1140
|
+
* with its arguments?
|
1141
|
+
*
|
1142
|
+
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
1143
|
+
* handling, so you can easily crash Ruby!
|
1144
|
+
*
|
1145
|
+
* Returns an array giving the pivot indices (normally these are argument #5).
|
1146
|
+
*/
|
1147
|
+
static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda) {
|
1148
|
+
static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int m, const int n, void* a, const int lda, int* ipiv) = {
|
1149
|
+
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
1150
|
+
nm::math::clapack_getrf<float>,
|
1151
|
+
nm::math::clapack_getrf<double>,
|
1152
|
+
nm::math::clapack_getrf<nm::Complex64>,
|
1153
|
+
nm::math::clapack_getrf<nm::Complex128>,
|
1154
|
+
nm::math::clapack_getrf<nm::RubyObject>
|
1155
|
+
};
|
1156
|
+
|
1157
|
+
int M = FIX2INT(m),
|
1158
|
+
N = FIX2INT(n);
|
1159
|
+
|
1160
|
+
// Allocate the pivot index array, which is of size MIN(M, N).
|
1161
|
+
size_t ipiv_size = std::min(M,N);
|
1162
|
+
int* ipiv = NM_ALLOCA_N(int, ipiv_size);
|
1163
|
+
|
1164
|
+
if (!ttable[NM_DTYPE(a)]) {
|
1165
|
+
rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
1166
|
+
} else {
|
1167
|
+
// Call either our version of getrf or the LAPACK version.
|
1168
|
+
ttable[NM_DTYPE(a)](blas_order_sym(order), M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv);
|
1169
|
+
}
|
1170
|
+
|
1171
|
+
// Result will be stored in a. We return ipiv as an array.
|
1172
|
+
VALUE ipiv_array = rb_ary_new2(ipiv_size);
|
1173
|
+
for (size_t i = 0; i < ipiv_size; ++i) {
|
1174
|
+
rb_ary_store(ipiv_array, i, INT2FIX(ipiv[i]));
|
1175
|
+
}
|
1176
|
+
|
1177
|
+
return ipiv_array;
|
1178
|
+
}
|
1179
|
+
|
1180
|
+
|
1181
|
+
/*
|
1182
|
+
* Call any of the clapack_xgetrs functions as directly as possible.
|
1183
|
+
*/
|
1184
|
+
static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb) {
|
1185
|
+
static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE Trans, const int N,
|
1186
|
+
const int NRHS, const void* A, const int lda, const int* ipiv, void* B,
|
1187
|
+
const int ldb) = {
|
1188
|
+
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
1189
|
+
nm::math::clapack_getrs<float>,
|
1190
|
+
nm::math::clapack_getrs<double>,
|
1191
|
+
nm::math::clapack_getrs<nm::Complex64>,
|
1192
|
+
nm::math::clapack_getrs<nm::Complex128>,
|
1193
|
+
nm::math::clapack_getrs<nm::RubyObject>
|
1194
|
+
};
|
1195
|
+
|
1196
|
+
// Allocate the C version of the pivot index array
|
1197
|
+
int* ipiv_;
|
1198
|
+
if (!RB_TYPE_P(ipiv, T_ARRAY)) {
|
1199
|
+
rb_raise(rb_eArgError, "ipiv must be of type Array");
|
1200
|
+
} else {
|
1201
|
+
ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
|
1202
|
+
for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
|
1203
|
+
ipiv_[index] = FIX2INT( RARRAY_AREF(ipiv, index) );
|
1204
|
+
}
|
1205
|
+
}
|
1206
|
+
|
1207
|
+
if (!ttable[NM_DTYPE(a)]) {
|
1208
|
+
rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
1209
|
+
} else {
|
1210
|
+
|
1211
|
+
// Call either our version of getrs or the LAPACK version.
|
1212
|
+
ttable[NM_DTYPE(a)](blas_order_sym(order), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
|
1213
|
+
ipiv_, NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
|
1214
|
+
}
|
1215
|
+
|
1216
|
+
// b is both returned and modified directly in the argument list.
|
1217
|
+
return b;
|
1218
|
+
}
|
1219
|
+
|
1220
|
+
/*
|
1221
|
+
* Simple way to check from within Ruby code if clapack functions are available, without
|
1222
|
+
* having to wait around for an exception to be thrown.
|
1223
|
+
*/
|
1224
|
+
static VALUE nm_has_clapack(VALUE self) {
|
1225
|
+
return Qfalse;
|
1226
|
+
}
|
1227
|
+
|
1228
|
+
/*
|
1229
|
+
* Call any of the clapack_xlaswp functions as directly as possible.
|
1230
|
+
*
|
1231
|
+
* Note that LAPACK's xlaswp functions accept a column-order matrix, but NMatrix uses row-order. Thus, n should be the
|
1232
|
+
* number of rows and lda should be the number of columns, no matter what it says in the documentation for dlaswp.f.
|
1233
|
+
*/
|
1234
|
+
static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx) {
|
1235
|
+
static void (*ttable[nm::NUM_DTYPES])(const int n, void* a, const int lda, const int k1, const int k2, const int* ipiv, const int incx) = {
|
1236
|
+
nm::math::clapack_laswp<uint8_t>,
|
1237
|
+
nm::math::clapack_laswp<int8_t>,
|
1238
|
+
nm::math::clapack_laswp<int16_t>,
|
1239
|
+
nm::math::clapack_laswp<int32_t>,
|
1240
|
+
nm::math::clapack_laswp<int64_t>,
|
1241
|
+
nm::math::clapack_laswp<float>,
|
1242
|
+
nm::math::clapack_laswp<double>,
|
1243
|
+
nm::math::clapack_laswp<nm::Complex64>,
|
1244
|
+
nm::math::clapack_laswp<nm::Complex128>,
|
1245
|
+
nm::math::clapack_laswp<nm::RubyObject>
|
1246
|
+
};
|
1247
|
+
|
1248
|
+
// Allocate the C version of the pivot index array
|
1249
|
+
int* ipiv_;
|
1250
|
+
if (!RB_TYPE_P(ipiv, T_ARRAY)) {
|
1251
|
+
rb_raise(rb_eArgError, "ipiv must be of type Array");
|
1252
|
+
} else {
|
1253
|
+
ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
|
1254
|
+
for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
|
1255
|
+
ipiv_[index] = FIX2INT( RARRAY_AREF(ipiv, index) );
|
1256
|
+
}
|
1257
|
+
}
|
1258
|
+
|
1259
|
+
// Call either our version of laswp or the LAPACK version.
|
1260
|
+
ttable[NM_DTYPE(a)](FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), FIX2INT(k1), FIX2INT(k2), ipiv_, FIX2INT(incx));
|
1261
|
+
|
1262
|
+
// a is both returned and modified directly in the argument list.
|
1263
|
+
return a;
|
1264
|
+
}
|
1265
|
+
|
1266
|
+
|
1267
|
+
/*
|
1268
|
+
* C accessor for calculating an exact determinant. Dense matrix version.
|
1269
|
+
*/
|
1270
|
+
void nm_math_det_exact_from_dense(const int M, const void* elements, const int lda,
|
1271
|
+
nm::dtype_t dtype, void* result) {
|
1272
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::det_exact_from_dense, void, const int M,
|
1273
|
+
const void* A_elements, const int lda, void* result_arg);
|
1274
|
+
|
1275
|
+
ttable[dtype](M, elements, lda, result);
|
1276
|
+
}
|
1277
|
+
|
1278
|
+
/*
|
1279
|
+
* C accessor for calculating an exact determinant. Yale matrix version.
|
1280
|
+
*/
|
1281
|
+
void nm_math_det_exact_from_yale(const int M, const YALE_STORAGE* storage, const int lda,
|
1282
|
+
nm::dtype_t dtype, void* result) {
|
1283
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::det_exact_from_yale, void, const int M,
|
1284
|
+
const YALE_STORAGE* storage, const int lda, void* result_arg);
|
1285
|
+
|
1286
|
+
ttable[dtype](M, storage, lda, result);
|
1287
|
+
}
|
1288
|
+
|
1289
|
+
/*
|
1290
|
+
* C accessor for solving a system of linear equations.
|
1291
|
+
*/
|
1292
|
+
void nm_math_solve(VALUE lu, VALUE b, VALUE x, VALUE ipiv) {
|
1293
|
+
int* pivot = new int[RARRAY_LEN(ipiv)];
|
1294
|
+
|
1295
|
+
for (int i = 0; i < RARRAY_LEN(ipiv); ++i) {
|
1296
|
+
pivot[i] = FIX2INT(rb_ary_entry(ipiv, i));
|
1297
|
+
}
|
1298
|
+
|
1299
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::solve, void, const int, const void*, const void*, void*, const int*);
|
1300
|
+
|
1301
|
+
ttable[NM_DTYPE(x)](NM_SHAPE0(b), NM_STORAGE_DENSE(lu)->elements,
|
1302
|
+
NM_STORAGE_DENSE(b)->elements, NM_STORAGE_DENSE(x)->elements, pivot);
|
1303
|
+
}
|
1304
|
+
|
1305
|
+
/*
|
1306
|
+
* C accessor for reducing a matrix to hessenberg form.
|
1307
|
+
*/
|
1308
|
+
void nm_math_hessenberg(VALUE a) {
|
1309
|
+
static void (*ttable[nm::NUM_DTYPES])(const int, void*) = {
|
1310
|
+
NULL, NULL, NULL, NULL, NULL, // does not support ints
|
1311
|
+
nm::math::hessenberg<float>,
|
1312
|
+
nm::math::hessenberg<double>,
|
1313
|
+
NULL, NULL, // does not support Complex
|
1314
|
+
NULL // no support for Ruby Object
|
1315
|
+
};
|
1316
|
+
|
1317
|
+
ttable[NM_DTYPE(a)](NM_SHAPE0(a), NM_STORAGE_DENSE(a)->elements);
|
1318
|
+
}
|
1319
|
+
/*
|
1320
|
+
* C accessor for calculating an in-place inverse.
|
1321
|
+
*/
|
1322
|
+
void nm_math_inverse(const int M, void* a_elements, nm::dtype_t dtype) {
|
1323
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::inverse, void, const int, void*);
|
1324
|
+
|
1325
|
+
ttable[dtype](M, a_elements);
|
1326
|
+
}
|
1327
|
+
|
1328
|
+
/*
|
1329
|
+
* C accessor for calculating an exact inverse. Dense matrix version.
|
1330
|
+
*/
|
1331
|
+
void nm_math_inverse_exact_from_dense(const int M, const void* A_elements,
|
1332
|
+
const int lda, void* B_elements, const int ldb, nm::dtype_t dtype) {
|
1333
|
+
|
1334
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::inverse_exact_from_dense, void,
|
1335
|
+
const int, const void*, const int, void*, const int);
|
1336
|
+
|
1337
|
+
ttable[dtype](M, A_elements, lda, B_elements, ldb);
|
1338
|
+
}
|
1339
|
+
|
1340
|
+
/*
|
1341
|
+
* C accessor for calculating an exact inverse. Yale matrix version.
|
1342
|
+
*/
|
1343
|
+
void nm_math_inverse_exact_from_yale(const int M, const YALE_STORAGE* storage,
|
1344
|
+
const int lda, YALE_STORAGE* inverse, const int ldb, nm::dtype_t dtype) {
|
1345
|
+
|
1346
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::inverse_exact_from_yale, void,
|
1347
|
+
const int, const YALE_STORAGE*, const int, YALE_STORAGE*, const int);
|
1348
|
+
|
1349
|
+
ttable[dtype](M, storage, lda, inverse, ldb);
|
1350
|
+
}
|
1351
|
+
|
1352
|
+
/*
|
1353
|
+
* Transpose an array of elements that represent a row-major dense matrix. Does not allocate anything, only does an memcpy.
|
1354
|
+
*/
|
1355
|
+
void nm_math_transpose_generic(const size_t M, const size_t N, const void* A, const int lda, void* B, const int ldb, size_t element_size) {
|
1356
|
+
for (size_t i = 0; i < N; ++i) {
|
1357
|
+
for (size_t j = 0; j < M; ++j) {
|
1358
|
+
|
1359
|
+
memcpy(reinterpret_cast<char*>(B) + (i*ldb+j)*element_size,
|
1360
|
+
reinterpret_cast<const char*>(A) + (j*lda+i)*element_size,
|
1361
|
+
element_size);
|
1362
|
+
|
1363
|
+
}
|
1364
|
+
}
|
1365
|
+
}
|
1366
|
+
|
1367
|
+
|
1368
|
+
} // end of extern "C" block
|