nmatrix 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +27 -0
- data/.rspec +2 -0
- data/Gemfile +3 -5
- data/Guardfile +6 -0
- data/History.txt +33 -0
- data/Manifest.txt +41 -38
- data/README.rdoc +88 -11
- data/Rakefile +35 -53
- data/ext/nmatrix/data/complex.h +372 -0
- data/ext/nmatrix/data/data.cpp +275 -0
- data/ext/nmatrix/data/data.h +707 -0
- data/ext/nmatrix/data/rational.h +421 -0
- data/ext/nmatrix/data/ruby_object.h +446 -0
- data/ext/nmatrix/extconf.rb +101 -51
- data/ext/nmatrix/new_extconf.rb +56 -0
- data/ext/nmatrix/nmatrix.cpp +1609 -0
- data/ext/nmatrix/nmatrix.h +265 -849
- data/ext/nmatrix/ruby_constants.cpp +134 -0
- data/ext/nmatrix/ruby_constants.h +103 -0
- data/ext/nmatrix/storage/common.cpp +70 -0
- data/ext/nmatrix/storage/common.h +170 -0
- data/ext/nmatrix/storage/dense.cpp +665 -0
- data/ext/nmatrix/storage/dense.h +116 -0
- data/ext/nmatrix/storage/list.cpp +1088 -0
- data/ext/nmatrix/storage/list.h +129 -0
- data/ext/nmatrix/storage/storage.cpp +658 -0
- data/ext/nmatrix/storage/storage.h +99 -0
- data/ext/nmatrix/storage/yale.cpp +1601 -0
- data/ext/nmatrix/storage/yale.h +208 -0
- data/ext/nmatrix/ttable_helper.rb +126 -0
- data/ext/nmatrix/{yale/smmp1_header.template.c → types.h} +36 -9
- data/ext/nmatrix/util/io.cpp +295 -0
- data/ext/nmatrix/util/io.h +117 -0
- data/ext/nmatrix/util/lapack.h +1175 -0
- data/ext/nmatrix/util/math.cpp +557 -0
- data/ext/nmatrix/util/math.h +1363 -0
- data/ext/nmatrix/util/sl_list.cpp +475 -0
- data/ext/nmatrix/util/sl_list.h +255 -0
- data/ext/nmatrix/util/util.h +78 -0
- data/lib/nmatrix/blas.rb +70 -0
- data/lib/nmatrix/io/mat5_reader.rb +567 -0
- data/lib/nmatrix/io/mat_reader.rb +162 -0
- data/lib/{string.rb → nmatrix/monkeys.rb} +49 -2
- data/lib/nmatrix/nmatrix.rb +199 -0
- data/lib/nmatrix/nvector.rb +103 -0
- data/lib/nmatrix/version.rb +27 -0
- data/lib/nmatrix.rb +22 -230
- data/nmatrix.gemspec +59 -0
- data/scripts/mac-brew-gcc.sh +47 -0
- data/spec/4x4_sparse.mat +0 -0
- data/spec/4x5_dense.mat +0 -0
- data/spec/blas_spec.rb +47 -0
- data/spec/elementwise_spec.rb +164 -0
- data/spec/io_spec.rb +60 -0
- data/spec/lapack_spec.rb +52 -0
- data/spec/math_spec.rb +96 -0
- data/spec/nmatrix_spec.rb +93 -89
- data/spec/nmatrix_yale_spec.rb +52 -36
- data/spec/nvector_spec.rb +1 -1
- data/spec/slice_spec.rb +257 -0
- data/spec/spec_helper.rb +51 -0
- data/spec/utm5940.mtx +83844 -0
- metadata +113 -71
- data/.autotest +0 -23
- data/.gemtest +0 -0
- data/ext/nmatrix/cblas.c +0 -150
- data/ext/nmatrix/dense/blas_header.template.c +0 -52
- data/ext/nmatrix/dense/elementwise.template.c +0 -107
- data/ext/nmatrix/dense/gemm.template.c +0 -159
- data/ext/nmatrix/dense/gemv.template.c +0 -130
- data/ext/nmatrix/dense/rationalmath.template.c +0 -68
- data/ext/nmatrix/dense.c +0 -307
- data/ext/nmatrix/depend +0 -18
- data/ext/nmatrix/generator/syntax_tree.rb +0 -481
- data/ext/nmatrix/generator.rb +0 -594
- data/ext/nmatrix/list.c +0 -774
- data/ext/nmatrix/nmatrix.c +0 -1977
- data/ext/nmatrix/rational.c +0 -98
- data/ext/nmatrix/yale/complexmath.template.c +0 -71
- data/ext/nmatrix/yale/elementwise.template.c +0 -46
- data/ext/nmatrix/yale/elementwise_op.template.c +0 -73
- data/ext/nmatrix/yale/numbmm.template.c +0 -94
- data/ext/nmatrix/yale/smmp1.template.c +0 -21
- data/ext/nmatrix/yale/smmp2.template.c +0 -43
- data/ext/nmatrix/yale/smmp2_header.template.c +0 -46
- data/ext/nmatrix/yale/sort_columns.template.c +0 -56
- data/ext/nmatrix/yale/symbmm.template.c +0 -54
- data/ext/nmatrix/yale/transp.template.c +0 -68
- data/ext/nmatrix/yale.c +0 -726
- data/lib/array.rb +0 -67
- data/spec/syntax_tree_spec.rb +0 -46
@@ -0,0 +1,557 @@
|
|
1
|
+
/////////////////////////////////////////////////////////////////////
|
2
|
+
// = NMatrix
|
3
|
+
//
|
4
|
+
// A linear algebra library for scientific computation in Ruby.
|
5
|
+
// NMatrix is part of SciRuby.
|
6
|
+
//
|
7
|
+
// NMatrix was originally inspired by and derived from NArray, by
|
8
|
+
// Masahiro Tanaka: http://narray.rubyforge.org
|
9
|
+
//
|
10
|
+
// == Copyright Information
|
11
|
+
//
|
12
|
+
// SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
|
13
|
+
// NMatrix is Copyright (c) 2012, Ruby Science Foundation
|
14
|
+
//
|
15
|
+
// Please see LICENSE.txt for additional copyright notices.
|
16
|
+
//
|
17
|
+
// == Contributing
|
18
|
+
//
|
19
|
+
// By contributing source code to SciRuby, you agree to be bound by
|
20
|
+
// our Contributor Agreement:
|
21
|
+
//
|
22
|
+
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
|
23
|
+
//
|
24
|
+
// == math.cpp
|
25
|
+
//
|
26
|
+
// Ruby-exposed BLAS functions.
|
27
|
+
//
|
28
|
+
// === Procedure for adding LAPACK or CBLAS functions to math.cpp/math.h:
|
29
|
+
//
|
30
|
+
// This procedure is written as if for a fictional function with double
|
31
|
+
// version dbacon, which we'll say is from LAPACK.
|
32
|
+
//
|
33
|
+
// 1. Write a default templated version which probably returns a boolean.
|
34
|
+
// Call it bacon, and put it in math.h.
|
35
|
+
//
|
36
|
+
// Order will always be row-major, so we don't need to pass that.
|
37
|
+
// CBLAS_TRANSPOSE-type arguments, however, should be passed.
|
38
|
+
//
|
39
|
+
// Otherwise, arguments should look like those in cblas.h or clapack.h:
|
40
|
+
//
|
41
|
+
// template <typename DType>
|
42
|
+
// bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, DType* A, ...) {
|
43
|
+
// rb_raise(rb_eNotImpError, "only implemented for ATLAS types (float32, float64, complex64, complex128)");
|
44
|
+
// }
|
45
|
+
//
|
46
|
+
// 2. In math.cpp, add a templated inline static version of the function which takes
|
47
|
+
// only void* pointers and uses reinterpret_cast to convert them to the
|
48
|
+
// proper dtype.
|
49
|
+
//
|
50
|
+
// This function may also need to switch m and n if these arguments are given.
|
51
|
+
//
|
52
|
+
// For an example, see cblas_gemm. This function should do nothing other than cast
|
53
|
+
// appropriately. If clapack_dbacon, clapack_sbacon, clapack_cbacon, and clapack_zbacon
|
54
|
+
// all take void* only, and no other pointers that vary between functions, you can skip
|
55
|
+
// this particular step -- as we can call them directly using a custom function pointer
|
56
|
+
// array (same function signature!).
|
57
|
+
//
|
58
|
+
// This version of the function will be the one exposed through NMatrix::LAPACK. We
|
59
|
+
// want it to be as close to the actual LAPACK version of the function as possible,
|
60
|
+
// and with as few checks as possible.
|
61
|
+
//
|
62
|
+
// You will probably need a forward declaration in the extern "C" block.
|
63
|
+
//
|
64
|
+
// Note: In that case, the function you wrote in Step 1 should also take exactly the
|
65
|
+
// same arguments as clapack_xbacon. Otherwise Bad Things will happen.
|
66
|
+
//
|
67
|
+
// 3. In math.cpp, add inline specialized versions of bacon for the different ATLAS types.
|
68
|
+
//
|
69
|
+
// You could do this with a macro, if the arguments are all similar (see #define LAPACK_GETRF).
|
70
|
+
// Or you may prefer to do it by hand:
|
71
|
+
//
|
72
|
+
// template <>
|
73
|
+
// inline bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, float* A, ...) {
|
74
|
+
// clapack_sbacon(trans, M, N, A, ...);
|
75
|
+
// return true;
|
76
|
+
// }
|
77
|
+
//
|
78
|
+
// Make sure these functions are in the namespace nm::math.
|
79
|
+
//
|
80
|
+
// Note that you should do everything in your power here to parse any return values
|
81
|
+
// clapack_sbacon may give you. We're not trying very hard in this example, but you might
|
82
|
+
// look at getrf to see how it might be done.
|
83
|
+
//
|
84
|
+
// 4. Expose the function in nm_math_init_blas(), in math.cpp:
|
85
|
+
//
|
86
|
+
// rb_define_singleton_method(cNMatrix_LAPACK, "clapack_bacon", (METHOD)nm_lapack_bacon, 5);
|
87
|
+
//
|
88
|
+
// Here, we're telling Ruby that nm_lapack_bacon takes five arguments as a Ruby function.
|
89
|
+
//
|
90
|
+
// 5. In blas.rb, write a bacon function which accesses clapack_bacon, but does all the
|
91
|
+
// sanity checks we left out in step 2.
|
92
|
+
//
|
93
|
+
// 6. Write tests for NMatrix::LAPACK::getrf, confirming that it works for the ATLAS dtypes.
|
94
|
+
//
|
95
|
+
// 7. After you get it working properly with ATLAS, download dbacon.f from NETLIB, and use
|
96
|
+
// f2c to convert it to C. Clean it up so it's readable. Remove the extra indices -- f2c
|
97
|
+
// inserts a lot of unnecessary stuff.
|
98
|
+
//
|
99
|
+
// Copy and paste the output into the default templated function you wrote in Step 1.
|
100
|
+
// Fix it so it works as a template instead of just for doubles.
|
101
|
+
//
|
102
|
+
// 8. Write tests to confirm that it works for integers, rationals, and Ruby objects.
|
103
|
+
//
|
104
|
+
// 9. See about adding a Ruby-like interface, such as matrix_matrix_multiply for cblas_gemm,
|
105
|
+
// or matrix_vector_multiply for cblas_gemv. This step is not mandatory.
|
106
|
+
//
|
107
|
+
// 10. Pull request!
|
108
|
+
|
109
|
+
|
110
|
+
|
111
|
+
/*
|
112
|
+
* Project Includes
|
113
|
+
*/
|
114
|
+
|
115
|
+
#include "math.h"
|
116
|
+
#include "lapack.h"
|
117
|
+
|
118
|
+
#include "nmatrix.h"
|
119
|
+
#include "ruby_constants.h"
|
120
|
+
|
121
|
+
/*
|
122
|
+
* Forward Declarations
|
123
|
+
*/
|
124
|
+
|
125
|
+
extern "C" {
|
126
|
+
#ifdef HAVE_CLAPACK_H
|
127
|
+
#include <clapack.h>
|
128
|
+
#endif
|
129
|
+
|
130
|
+
static VALUE nm_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha,
|
131
|
+
VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc);
|
132
|
+
|
133
|
+
static VALUE nm_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
|
134
|
+
VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);
|
135
|
+
|
136
|
+
static VALUE nm_cblas_trsm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
|
137
|
+
VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
|
138
|
+
|
139
|
+
static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda);
|
140
|
+
|
141
|
+
static VALUE nm_clapack_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx);
|
142
|
+
|
143
|
+
} // end of extern "C" block
|
144
|
+
|
145
|
+
////////////////////
|
146
|
+
// Math Functions //
|
147
|
+
////////////////////
|
148
|
+
|
149
|
+
namespace nm { namespace math {
|
150
|
+
|
151
|
+
/*
|
152
|
+
* Calculate the determinant for a dense matrix (A [elements]) of size 2 or 3. Return the result.
|
153
|
+
*/
|
154
|
+
template <typename DType>
|
155
|
+
void det_exact(const int M, const void* A_elements, const int lda, void* result_arg) {
|
156
|
+
DType* result = reinterpret_cast<DType*>(result_arg);
|
157
|
+
const DType* A = reinterpret_cast<const DType*>(A_elements);
|
158
|
+
|
159
|
+
typename LongDType<DType>::type x, y;
|
160
|
+
|
161
|
+
if (M == 2) {
|
162
|
+
*result = A[0] * A[lda+1] - A[1] * A[lda];
|
163
|
+
|
164
|
+
} else if (M == 3) {
|
165
|
+
x = A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]; // ei - fh
|
166
|
+
y = A[lda] * A[2*lda+2] - A[lda+2] * A[2*lda]; // fg - di
|
167
|
+
x = A[0]*x - A[1]*y ; // a*(ei-fh) - b*(fg-di)
|
168
|
+
|
169
|
+
y = A[lda] * A[2*lda+1] - A[lda+1] * A[2*lda]; // dh - eg
|
170
|
+
*result = A[2]*y + x; // c*(dh-eg) + _
|
171
|
+
} else if (M < 2) {
|
172
|
+
rb_raise(rb_eArgError, "can only calculate exact determinant of a square matrix of size 2 or larger");
|
173
|
+
} else {
|
174
|
+
rb_raise(rb_eNotImpError, "exact determinant calculation needed for matrices larger than 3x3");
|
175
|
+
}
|
176
|
+
}
|
177
|
+
|
178
|
+
|
179
|
+
|
180
|
+
|
181
|
+
/*
|
182
|
+
* Function signature conversion for calling CBLAS' gemm functions as directly as possible.
|
183
|
+
*
|
184
|
+
* For documentation: http://www.netlib.org/blas/dgemm.f
|
185
|
+
*/
|
186
|
+
template <typename DType>
|
187
|
+
inline static void cblas_gemm(const enum CBLAS_ORDER order,
|
188
|
+
const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b,
|
189
|
+
int m, int n, int k,
|
190
|
+
void* alpha,
|
191
|
+
void* a, int lda,
|
192
|
+
void* b, int ldb,
|
193
|
+
void* beta,
|
194
|
+
void* c, int ldc)
|
195
|
+
{
|
196
|
+
gemm<DType>(order, trans_a, trans_b, m, n, k, reinterpret_cast<DType*>(alpha),
|
197
|
+
reinterpret_cast<DType*>(a), lda,
|
198
|
+
reinterpret_cast<DType*>(b), ldb, reinterpret_cast<DType*>(beta),
|
199
|
+
reinterpret_cast<DType*>(c), ldc);
|
200
|
+
}
|
201
|
+
|
202
|
+
|
203
|
+
/*
|
204
|
+
* Function signature conversion for calling CBLAS's gemv functions as directly as possible.
|
205
|
+
*
|
206
|
+
* For documentation: http://www.netlib.org/lapack/double/dgetrf.f
|
207
|
+
*/
|
208
|
+
template <typename DType>
|
209
|
+
inline static bool cblas_gemv(const enum CBLAS_TRANSPOSE trans_a,
|
210
|
+
int m, int n,
|
211
|
+
void* alpha,
|
212
|
+
void* a, int lda,
|
213
|
+
void* x, int incx,
|
214
|
+
void* beta,
|
215
|
+
void* y, int incy)
|
216
|
+
{
|
217
|
+
return gemv<DType>(trans_a,
|
218
|
+
m, n, reinterpret_cast<DType*>(alpha),
|
219
|
+
reinterpret_cast<DType*>(a), lda,
|
220
|
+
reinterpret_cast<DType*>(x), incx, reinterpret_cast<DType*>(beta),
|
221
|
+
reinterpret_cast<DType*>(y), incy);
|
222
|
+
}
|
223
|
+
|
224
|
+
|
225
|
+
/*
|
226
|
+
* Function signature conversion for calling CBLAS' trsm functions as directly as possible.
|
227
|
+
*
|
228
|
+
* For documentation: http://www.netlib.org/blas/dtrsm.f
|
229
|
+
*/
|
230
|
+
template <typename DType>
|
231
|
+
inline static void cblas_trsm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
232
|
+
const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_DIAG diag,
|
233
|
+
const int m, const int n, const void* alpha, const void* a,
|
234
|
+
const int lda, void* b, const int ldb)
|
235
|
+
{
|
236
|
+
trsm<DType>(order, side, uplo, trans_a, diag, m, n, *reinterpret_cast<const DType*>(alpha),
|
237
|
+
reinterpret_cast<const DType*>(a), lda, reinterpret_cast<DType*>(b), ldb);
|
238
|
+
}
|
239
|
+
|
240
|
+
|
241
|
+
|
242
|
+
}} // end of namespace nm::math
|
243
|
+
|
244
|
+
|
245
|
+
extern "C" {
|
246
|
+
|
247
|
+
///////////////////
|
248
|
+
// Ruby Bindings //
|
249
|
+
///////////////////
|
250
|
+
|
251
|
+
void nm_math_init_blas() {
|
252
|
+
cNMatrix_LAPACK = rb_define_module_under(cNMatrix, "LAPACK");
|
253
|
+
|
254
|
+
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getrf", (METHOD)nm_clapack_getrf, 5);
|
255
|
+
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_scal", (METHOD)nm_clapack_scal, 4);
|
256
|
+
|
257
|
+
cNMatrix_BLAS = rb_define_module_under(cNMatrix, "BLAS");
|
258
|
+
|
259
|
+
rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemm", (METHOD)nm_cblas_gemm, 14);
|
260
|
+
rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11);
|
261
|
+
rb_define_singleton_method(cNMatrix_BLAS, "cblas_trsm", (METHOD)nm_cblas_trsm, 12);
|
262
|
+
}
|
263
|
+
|
264
|
+
|
265
|
+
/* Interprets cblas argument which could be any of false/:no_transpose, :transpose, or :complex_conjugate,
|
266
|
+
* into an enum recognized by cblas.
|
267
|
+
*
|
268
|
+
* Called by nm_cblas_gemm -- basically inline.
|
269
|
+
*
|
270
|
+
*/
|
271
|
+
static inline enum CBLAS_TRANSPOSE blas_transpose_sym(VALUE op) {
|
272
|
+
if (op == Qfalse || rb_to_id(op) == nm_rb_no_transpose) return CblasNoTrans;
|
273
|
+
else if (rb_to_id(op) == nm_rb_transpose) return CblasTrans;
|
274
|
+
else if (rb_to_id(op) == nm_rb_complex_conjugate) return CblasConjTrans;
|
275
|
+
else rb_raise(rb_eArgError, "Expected false, :transpose, or :complex_conjugate");
|
276
|
+
return CblasNoTrans;
|
277
|
+
}
|
278
|
+
|
279
|
+
/*
|
280
|
+
* Interprets cblas argument which could be :left or :right
|
281
|
+
*
|
282
|
+
* Called by nm_cblas_trsm -- basically inline
|
283
|
+
*/
|
284
|
+
static inline enum CBLAS_SIDE blas_side_sym(VALUE op) {
|
285
|
+
ID op_id = rb_to_id(op);
|
286
|
+
if (op_id == nm_rb_left) return CblasLeft;
|
287
|
+
if (op_id == nm_rb_right) return CblasRight;
|
288
|
+
rb_raise(rb_eArgError, "Expected :left or :right for side argument");
|
289
|
+
return CblasLeft;
|
290
|
+
}
|
291
|
+
|
292
|
+
/*
|
293
|
+
* Interprets cblas argument which could be :upper or :lower
|
294
|
+
*
|
295
|
+
* Called by nm_cblas_trsm -- basically inline
|
296
|
+
*/
|
297
|
+
static inline enum CBLAS_UPLO blas_uplo_sym(VALUE op) {
|
298
|
+
ID op_id = rb_to_id(op);
|
299
|
+
if (op_id == nm_rb_upper) return CblasUpper;
|
300
|
+
if (op_id == nm_rb_lower) return CblasLower;
|
301
|
+
rb_raise(rb_eArgError, "Expected :upper or :lower for uplo argument");
|
302
|
+
return CblasUpper;
|
303
|
+
}
|
304
|
+
|
305
|
+
|
306
|
+
/*
|
307
|
+
* Interprets cblas argument which could be :unit (true) or :nonunit (false or anything other than true/:unit)
|
308
|
+
*
|
309
|
+
* Called by nm_cblas_trsm -- basically inline
|
310
|
+
*/
|
311
|
+
static inline enum CBLAS_DIAG blas_diag_sym(VALUE op) {
|
312
|
+
if (rb_to_id(op) == nm_rb_unit || op == Qtrue) return CblasUnit;
|
313
|
+
return CblasNonUnit;
|
314
|
+
}
|
315
|
+
|
316
|
+
/*
|
317
|
+
* Interprets cblas argument which could be :row or :col
|
318
|
+
*/
|
319
|
+
static inline enum CBLAS_ORDER blas_order_sym(VALUE op) {
|
320
|
+
if (rb_to_id(op) == rb_intern("row") || rb_to_id(op) == rb_intern("row_major")) return CblasRowMajor;
|
321
|
+
else if (rb_to_id(op) == rb_intern("col") || rb_to_id(op) == rb_intern("col_major") ||
|
322
|
+
rb_to_id(op) == rb_intern("column") || rb_to_id(op) == rb_intern("column_major")) return CblasColMajor;
|
323
|
+
rb_raise(rb_eArgError, "Expected :row or :col for order argument");
|
324
|
+
return CblasRowMajor;
|
325
|
+
}
|
326
|
+
|
327
|
+
|
328
|
+
/* Call any of the cblas_xgemm functions as directly as possible.
|
329
|
+
*
|
330
|
+
* The cblas_xgemm functions (dgemm, sgemm, cgemm, and zgemm) define the following operation:
|
331
|
+
*
|
332
|
+
* C = alpha*op(A)*op(B) + beta*C
|
333
|
+
*
|
334
|
+
* where op(X) is one of <tt>op(X) = X</tt>, <tt>op(X) = X**T</tt>, or the complex conjugate of X.
|
335
|
+
*
|
336
|
+
* Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
|
337
|
+
* Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
|
338
|
+
* expose the ultra-optimized ATLAS versions.
|
339
|
+
*
|
340
|
+
* == Arguments
|
341
|
+
* See: http://www.netlib.org/blas/dgemm.f
|
342
|
+
*
|
343
|
+
* You probably don't want to call this function. Instead, why don't you try cblas_gemm, which is more flexible
|
344
|
+
* with its arguments?
|
345
|
+
*
|
346
|
+
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
347
|
+
* handling, so you can easily crash Ruby!
|
348
|
+
*/
|
349
|
+
static VALUE nm_cblas_gemm(VALUE self,
|
350
|
+
VALUE order,
|
351
|
+
VALUE trans_a, VALUE trans_b,
|
352
|
+
VALUE m, VALUE n, VALUE k,
|
353
|
+
VALUE alpha,
|
354
|
+
VALUE a, VALUE lda,
|
355
|
+
VALUE b, VALUE ldb,
|
356
|
+
VALUE beta,
|
357
|
+
VALUE c, VALUE ldc)
|
358
|
+
{
|
359
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemm, void, const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, int m, int n, int k, void* alpha, void* a, int lda, void* b, int ldb, void* beta, void* c, int ldc);
|
360
|
+
|
361
|
+
dtype_t dtype = NM_DTYPE(a);
|
362
|
+
|
363
|
+
void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]),
|
364
|
+
*pBeta = ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
365
|
+
rubyval_to_cval(alpha, dtype, pAlpha);
|
366
|
+
rubyval_to_cval(beta, dtype, pBeta);
|
367
|
+
|
368
|
+
ttable[dtype](blas_order_sym(order), blas_transpose_sym(trans_a), blas_transpose_sym(trans_b), FIX2INT(m), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
|
369
|
+
|
370
|
+
return c;
|
371
|
+
}
|
372
|
+
|
373
|
+
|
374
|
+
/* Call any of the cblas_xgemv functions as directly as possible.
|
375
|
+
*
|
376
|
+
* The cblas_xgemv functions (dgemv, sgemv, cgemv, and zgemv) define the following operation:
|
377
|
+
*
|
378
|
+
* y = alpha*op(A)*x + beta*y
|
379
|
+
*
|
380
|
+
* where op(A) is one of <tt>op(A) = A</tt>, <tt>op(A) = A**T</tt>, or the complex conjugate of A.
|
381
|
+
*
|
382
|
+
* Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
|
383
|
+
* Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
|
384
|
+
* expose the ultra-optimized ATLAS versions.
|
385
|
+
*
|
386
|
+
* == Arguments
|
387
|
+
* See: http://www.netlib.org/blas/dgemm.f
|
388
|
+
*
|
389
|
+
* You probably don't want to call this function. Instead, why don't you try cblas_gemv, which is more flexible
|
390
|
+
* with its arguments?
|
391
|
+
*
|
392
|
+
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
393
|
+
* handling, so you can easily crash Ruby!
|
394
|
+
*/
|
395
|
+
static VALUE nm_cblas_gemv(VALUE self,
|
396
|
+
VALUE trans_a,
|
397
|
+
VALUE m, VALUE n,
|
398
|
+
VALUE alpha,
|
399
|
+
VALUE a, VALUE lda,
|
400
|
+
VALUE x, VALUE incx,
|
401
|
+
VALUE beta,
|
402
|
+
VALUE y, VALUE incy)
|
403
|
+
{
|
404
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemv, bool, const enum CBLAS_TRANSPOSE trans_a, int m, int n, void* alpha, void* a, int lda, void* x, int incx, void* beta, void* y, int incy);
|
405
|
+
|
406
|
+
dtype_t dtype = NM_DTYPE(a);
|
407
|
+
|
408
|
+
void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]),
|
409
|
+
*pBeta = ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
410
|
+
rubyval_to_cval(alpha, dtype, pAlpha);
|
411
|
+
rubyval_to_cval(beta, dtype, pBeta);
|
412
|
+
|
413
|
+
return ttable[dtype](blas_transpose_sym(trans_a), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), pBeta, NM_STORAGE_DENSE(y)->elements, FIX2INT(incy)) ? Qtrue : Qfalse;
|
414
|
+
}
|
415
|
+
|
416
|
+
|
417
|
+
static VALUE nm_cblas_trsm(VALUE self,
|
418
|
+
VALUE order,
|
419
|
+
VALUE side, VALUE uplo,
|
420
|
+
VALUE trans_a, VALUE diag,
|
421
|
+
VALUE m, VALUE n,
|
422
|
+
VALUE alpha,
|
423
|
+
VALUE a, VALUE lda,
|
424
|
+
VALUE b, VALUE ldb)
|
425
|
+
{
|
426
|
+
static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_SIDE, const enum CBLAS_UPLO,
|
427
|
+
const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
|
428
|
+
const int, const int, const void* alpha, const void* a,
|
429
|
+
const int lda, void* b, const int ldb) = {
|
430
|
+
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
431
|
+
nm::math::cblas_trsm<float>,
|
432
|
+
nm::math::cblas_trsm<double>,
|
433
|
+
cblas_ctrsm, cblas_ztrsm, // call directly, same function signature!
|
434
|
+
nm::math::cblas_trsm<nm::Rational32>,
|
435
|
+
nm::math::cblas_trsm<nm::Rational64>,
|
436
|
+
nm::math::cblas_trsm<nm::Rational128>,
|
437
|
+
nm::math::cblas_trsm<nm::RubyObject>
|
438
|
+
};
|
439
|
+
|
440
|
+
dtype_t dtype = NM_DTYPE(a);
|
441
|
+
|
442
|
+
void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
443
|
+
rubyval_to_cval(alpha, dtype, pAlpha);
|
444
|
+
|
445
|
+
ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
|
446
|
+
|
447
|
+
return Qtrue;
|
448
|
+
}
|
449
|
+
|
450
|
+
|
451
|
+
/*
|
452
|
+
* Based on LAPACK's dscal function, but for any dtype.
|
453
|
+
*
|
454
|
+
* In-place modification; returns the modified vector as well.
|
455
|
+
*/
|
456
|
+
static VALUE nm_clapack_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx) {
|
457
|
+
dtype_t dtype = NM_DTYPE(vector);
|
458
|
+
|
459
|
+
void* da = ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
460
|
+
rubyval_to_cval(scale, dtype, da);
|
461
|
+
|
462
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::clapack_scal, void, const int n, const void* da, void* dx, const int incx);
|
463
|
+
|
464
|
+
ttable[dtype](FIX2INT(n), da, NM_STORAGE_DENSE(vector)->elements, FIX2INT(incx));
|
465
|
+
|
466
|
+
return vector;
|
467
|
+
}
|
468
|
+
|
469
|
+
|
470
|
+
/* Call any of the clpack_xgetrf functions as directly as possible.
|
471
|
+
*
|
472
|
+
* The clapack_getrf functions (dgetrf, sgetrf, cgetrf, and zgetrf) compute an LU factorization of a general M-by-N
|
473
|
+
* matrix A using partial pivoting with row interchanges.
|
474
|
+
*
|
475
|
+
* The factorization has the form:
|
476
|
+
* A = P * L * U
|
477
|
+
* where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n),
|
478
|
+
* and U is upper triangular (upper trapezoidal if m < n).
|
479
|
+
*
|
480
|
+
* This is the right-looking level 3 BLAS version of the algorithm.
|
481
|
+
*
|
482
|
+
* == Arguments
|
483
|
+
* See: http://www.netlib.org/lapack/double/dgetrf.f
|
484
|
+
* (You don't need argument 5; this is the value returned by this function.)
|
485
|
+
*
|
486
|
+
* You probably don't want to call this function. Instead, why don't you try clapack_getrf, which is more flexible
|
487
|
+
* with its arguments?
|
488
|
+
*
|
489
|
+
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
490
|
+
* handling, so you can easily crash Ruby!
|
491
|
+
*
|
492
|
+
* Returns an array giving the pivot indices (normally these are argument #5).
|
493
|
+
*/
|
494
|
+
static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda) {
|
495
|
+
static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int m, const int n, void* a, const int lda, int* ipiv) = {
|
496
|
+
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
497
|
+
nm::math::clapack_getrf<float>,
|
498
|
+
nm::math::clapack_getrf<double>,
|
499
|
+
#ifdef HAVE_CLAPACK_H
|
500
|
+
clapack_cgetrf, clapack_zgetrf, // call directly, same function signature!
|
501
|
+
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
502
|
+
nm::math::clapack_getrf<nm::Complex64>,
|
503
|
+
nm::math::clapack_getrf<nm::Complex128>,
|
504
|
+
#endif
|
505
|
+
nm::math::clapack_getrf<nm::Rational32>,
|
506
|
+
nm::math::clapack_getrf<nm::Rational64>,
|
507
|
+
nm::math::clapack_getrf<nm::Rational128>,
|
508
|
+
nm::math::clapack_getrf<nm::RubyObject>
|
509
|
+
};
|
510
|
+
|
511
|
+
int M = FIX2INT(m),
|
512
|
+
N = FIX2INT(n);
|
513
|
+
|
514
|
+
// Allocate the pivot index array, which is of size MIN(M, N).
|
515
|
+
size_t ipiv_size = std::min(M,N);
|
516
|
+
int* ipiv = ALLOCA_N(int, ipiv_size);
|
517
|
+
|
518
|
+
// Call either our version of getrf or the LAPACK version.
|
519
|
+
ttable[NM_DTYPE(a)](blas_order_sym(order), M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv);
|
520
|
+
|
521
|
+
// Result will be stored in a. We return ipiv as an array.
|
522
|
+
VALUE ipiv_array = rb_ary_new2(ipiv_size);
|
523
|
+
for (size_t i = 0; i < ipiv_size; ++i) {
|
524
|
+
rb_ary_store(ipiv_array, i, INT2FIX(ipiv[i]));
|
525
|
+
}
|
526
|
+
|
527
|
+
return ipiv_array;
|
528
|
+
}
|
529
|
+
|
530
|
+
|
531
|
+
/*
|
532
|
+
* C accessor for calculating an exact determinant.
|
533
|
+
*/
|
534
|
+
void nm_math_det_exact(const int M, const void* elements, const int lda, dtype_t dtype, void* result) {
|
535
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::det_exact, void, const int M, const void* A_elements, const int lda, void* result_arg);
|
536
|
+
|
537
|
+
ttable[dtype](M, elements, lda, result);
|
538
|
+
}
|
539
|
+
|
540
|
+
|
541
|
+
/*
|
542
|
+
* Transpose an array of elements that represent a row-major dense matrix. Does not allocate anything, only does an memcpy.
|
543
|
+
*/
|
544
|
+
void nm_math_transpose_generic(const size_t M, const size_t N, const void* A, const int lda, void* B, const int ldb, size_t element_size) {
|
545
|
+
for (size_t i = 0; i < N; ++i) {
|
546
|
+
for (size_t j = 0; j < M; ++j) {
|
547
|
+
|
548
|
+
memcpy(reinterpret_cast<char*>(B) + (i*ldb+j)*element_size,
|
549
|
+
reinterpret_cast<const char*>(A) + (j*lda+i)*element_size,
|
550
|
+
element_size);
|
551
|
+
|
552
|
+
}
|
553
|
+
}
|
554
|
+
}
|
555
|
+
|
556
|
+
|
557
|
+
} // end of extern "C" block
|