nmatrix 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/nmatrix/data/complex.h +20 -55
- data/ext/nmatrix/data/data.cpp +11 -44
- data/ext/nmatrix/data/data.h +174 -311
- data/ext/nmatrix/data/meta.h +1 -7
- data/ext/nmatrix/data/ruby_object.h +3 -85
- data/ext/nmatrix/extconf.rb +2 -73
- data/ext/nmatrix/math.cpp +170 -813
- data/ext/nmatrix/math/asum.h +2 -25
- data/ext/nmatrix/math/{inc.h → cblas_enums.h} +11 -22
- data/ext/nmatrix/math/cblas_templates_core.h +507 -0
- data/ext/nmatrix/math/gemm.h +2 -32
- data/ext/nmatrix/math/gemv.h +1 -35
- data/ext/nmatrix/math/getrf.h +21 -6
- data/ext/nmatrix/math/getrs.h +0 -8
- data/ext/nmatrix/math/imax.h +0 -22
- data/ext/nmatrix/math/long_dtype.h +0 -3
- data/ext/nmatrix/math/math.h +11 -337
- data/ext/nmatrix/math/nrm2.h +2 -23
- data/ext/nmatrix/math/rot.h +1 -25
- data/ext/nmatrix/math/rotg.h +4 -13
- data/ext/nmatrix/math/scal.h +0 -22
- data/ext/nmatrix/math/trsm.h +0 -55
- data/ext/nmatrix/math/util.h +148 -0
- data/ext/nmatrix/nmatrix.cpp +0 -14
- data/ext/nmatrix/nmatrix.h +92 -84
- data/ext/nmatrix/ruby_constants.cpp +0 -2
- data/ext/nmatrix/ruby_constants.h +0 -2
- data/ext/nmatrix/ruby_nmatrix.c +86 -45
- data/ext/nmatrix/storage/dense/dense.cpp +1 -7
- data/ext/nmatrix/storage/storage.h +0 -1
- data/ext/nmatrix/ttable_helper.rb +0 -6
- data/ext/nmatrix/util/io.cpp +1 -1
- data/lib/nmatrix.rb +1 -19
- data/lib/nmatrix/blas.rb +33 -11
- data/lib/nmatrix/io/market.rb +3 -3
- data/lib/nmatrix/lapack_core.rb +181 -0
- data/lib/nmatrix/lapack_plugin.rb +44 -0
- data/lib/nmatrix/math.rb +382 -131
- data/lib/nmatrix/monkeys.rb +2 -3
- data/lib/nmatrix/nmatrix.rb +166 -13
- data/lib/nmatrix/shortcuts.rb +72 -7
- data/lib/nmatrix/version.rb +2 -2
- data/spec/00_nmatrix_spec.rb +154 -5
- data/spec/02_slice_spec.rb +2 -6
- data/spec/03_nmatrix_monkeys_spec.rb +7 -1
- data/spec/blas_spec.rb +60 -33
- data/spec/homogeneous_spec.rb +10 -10
- data/spec/lapack_core_spec.rb +482 -0
- data/spec/math_spec.rb +436 -52
- data/spec/shortcuts_spec.rb +28 -4
- data/spec/spec_helper.rb +14 -2
- data/spec/utm5940.mtx +83844 -0
- metadata +49 -76
- data/.gitignore +0 -27
- data/.rspec +0 -2
- data/.travis.yml +0 -15
- data/CONTRIBUTING.md +0 -82
- data/Gemfile +0 -2
- data/History.txt +0 -677
- data/LICENSE.txt +0 -23
- data/Manifest.txt +0 -92
- data/README.rdoc +0 -150
- data/Rakefile +0 -216
- data/ext/nmatrix/data/rational.h +0 -440
- data/ext/nmatrix/math/geev.h +0 -82
- data/ext/nmatrix/math/ger.h +0 -96
- data/ext/nmatrix/math/gesdd.h +0 -80
- data/ext/nmatrix/math/gesvd.h +0 -78
- data/ext/nmatrix/math/getf2.h +0 -86
- data/ext/nmatrix/math/getri.h +0 -108
- data/ext/nmatrix/math/potrs.h +0 -129
- data/ext/nmatrix/math/swap.h +0 -52
- data/lib/nmatrix/lapack.rb +0 -240
- data/nmatrix.gemspec +0 -55
- data/scripts/mac-brew-gcc.sh +0 -50
- data/scripts/mac-mavericks-brew-gcc.sh +0 -22
- data/spec/lapack_spec.rb +0 -459
data/ext/nmatrix/data/meta.h
CHANGED
@@ -44,9 +44,6 @@ namespace nm {
|
|
44
44
|
template <> struct ctype_to_dtype_enum<double> { static const nm::dtype_t value_type = nm::FLOAT64; };
|
45
45
|
template <> struct ctype_to_dtype_enum<Complex64> { static const nm::dtype_t value_type = nm::COMPLEX64; };
|
46
46
|
template <> struct ctype_to_dtype_enum<Complex128> { static const nm::dtype_t value_type = nm::COMPLEX128; };
|
47
|
-
template <> struct ctype_to_dtype_enum<Rational32> { static const nm::dtype_t value_type = nm::RATIONAL32; };
|
48
|
-
template <> struct ctype_to_dtype_enum<Rational64> { static const nm::dtype_t value_type = nm::RATIONAL64; };
|
49
|
-
template <> struct ctype_to_dtype_enum<Rational128> { static const nm::dtype_t value_type = nm::RATIONAL128; };
|
50
47
|
template <> struct ctype_to_dtype_enum<RubyObject> { static const nm::dtype_t value_type = nm::RUBYOBJ; };
|
51
48
|
|
52
49
|
|
@@ -60,11 +57,8 @@ namespace nm {
|
|
60
57
|
template <> struct dtype_enum_T<nm::FLOAT64> { typedef double type; };
|
61
58
|
template <> struct dtype_enum_T<nm::COMPLEX64> { typedef nm::Complex64 type; };
|
62
59
|
template <> struct dtype_enum_T<nm::COMPLEX128> { typedef nm::Complex128 type; };
|
63
|
-
template <> struct dtype_enum_T<nm::RATIONAL32> { typedef nm::Rational32 type; };
|
64
|
-
template <> struct dtype_enum_T<nm::RATIONAL64> { typedef nm::Rational64 type; };
|
65
|
-
template <> struct dtype_enum_T<nm::RATIONAL128> { typedef nm::Rational128 type; };
|
66
60
|
template <> struct dtype_enum_T<nm::RUBYOBJ> { typedef nm::RubyObject type; };
|
67
61
|
|
68
62
|
} // end namespace nm
|
69
63
|
|
70
|
-
#endif
|
64
|
+
#endif
|
@@ -45,7 +45,7 @@
|
|
45
45
|
/*
|
46
46
|
* Macros
|
47
47
|
*/
|
48
|
-
#define NM_RUBYVAL_IS_NUMERIC(val) (FIXNUM_P(val) or (TYPE(val) == T_FLOAT) or (TYPE(val) == T_COMPLEX)
|
48
|
+
#define NM_RUBYVAL_IS_NUMERIC(val) (FIXNUM_P(val) or (TYPE(val) == T_FLOAT) or (TYPE(val) == T_COMPLEX))
|
49
49
|
#define NMATRIX_CHECK_TYPE(val) \
|
50
50
|
if (TYPE(val) != T_DATA || (RDATA(val)->dfree != (RUBY_DATA_FUNC)nm_delete && RDATA(val)->dfree != (RUBY_DATA_FUNC)nm_delete_ref)) \
|
51
51
|
rb_raise(rb_eTypeError, "Expected NMatrix on left-hand side of operation.");
|
@@ -76,12 +76,6 @@ class RubyObject {
|
|
76
76
|
template <typename FloatType, typename = typename std::enable_if<std::is_floating_point<FloatType>::value>::type>
|
77
77
|
inline RubyObject(const Complex<FloatType>& other) : rval(rb_complex_new(rb_float_new(other.r), rb_float_new(other.i))) {}
|
78
78
|
|
79
|
-
/*
|
80
|
-
* Rational number constructor.
|
81
|
-
*/
|
82
|
-
template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
|
83
|
-
inline RubyObject(const Rational<IntType>& other) : rval(rb_rational_new(INT2FIX(other.n), INT2FIX(other.d))) {}
|
84
|
-
|
85
79
|
/*
|
86
80
|
* Integer constructor.
|
87
81
|
*
|
@@ -123,10 +117,6 @@ class RubyObject {
|
|
123
117
|
inline operator double() const { RETURN_OBJ2NUM(NUM2DBL) }
|
124
118
|
inline operator float() const { RETURN_OBJ2NUM(NUM2DBL) }
|
125
119
|
|
126
|
-
inline operator Rational32() const { return this->to<Rational32>(); }
|
127
|
-
inline operator Rational64() const { return this->to<Rational64>(); }
|
128
|
-
inline operator Rational128() const { return this->to<Rational128>(); }
|
129
|
-
|
130
120
|
inline operator Complex64() const { return this->to<Complex64>(); }
|
131
121
|
inline operator Complex128() const { return this->to<Complex128>(); }
|
132
122
|
/*
|
@@ -255,20 +245,6 @@ class RubyObject {
|
|
255
245
|
return *this != RubyObject(other);
|
256
246
|
}
|
257
247
|
*/
|
258
|
-
//////////////////////////////
|
259
|
-
// RUBY-RATIONAL OPERATIONS //
|
260
|
-
//////////////////////////////
|
261
|
-
|
262
|
-
template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
|
263
|
-
inline bool operator==(const Rational<IntType>& other) const {
|
264
|
-
return *this == RubyObject(other);
|
265
|
-
}
|
266
|
-
|
267
|
-
template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
|
268
|
-
inline bool operator!=(const Rational<IntType>& other) const {
|
269
|
-
return *this != RubyObject(other);
|
270
|
-
}
|
271
|
-
|
272
248
|
//////////////////////////////
|
273
249
|
// RUBY-COMPLEX OPERATIONS //
|
274
250
|
//////////////////////////////
|
@@ -304,7 +280,7 @@ class RubyObject {
|
|
304
280
|
*/
|
305
281
|
template <typename ComplexType>
|
306
282
|
inline typename std::enable_if<made_from_same_template<ComplexType, Complex64>::value, ComplexType>::type to(void) const {
|
307
|
-
if (FIXNUM_P(this->rval) or TYPE(this->rval) == T_FLOAT
|
283
|
+
if (FIXNUM_P(this->rval) or TYPE(this->rval) == T_FLOAT) {
|
308
284
|
return ComplexType(NUM2DBL(this->rval));
|
309
285
|
|
310
286
|
} else if (TYPE(this->rval) == T_COMPLEX) {
|
@@ -314,25 +290,8 @@ class RubyObject {
|
|
314
290
|
rb_raise(rb_eTypeError, "Invalid conversion to Complex type.");
|
315
291
|
}
|
316
292
|
}
|
317
|
-
|
318
|
-
/*
|
319
|
-
* Convert a Ruby object to a rational number.
|
320
|
-
*/
|
321
|
-
template <typename RationalType>
|
322
|
-
inline typename std::enable_if<made_from_same_template<RationalType, Rational32>::value, RationalType>::type to(void) const {
|
323
|
-
if (FIXNUM_P(this->rval) or TYPE(this->rval) == T_FLOAT or TYPE(this->rval) == T_COMPLEX) {
|
324
|
-
return RationalType(NUM2INT(this->rval));
|
325
|
-
|
326
|
-
} else if (TYPE(this->rval) == T_RATIONAL) {
|
327
|
-
return RationalType(NUM2INT(rb_funcall(this->rval, nm_rb_numer, 0)), NUM2INT(rb_funcall(this->rval, nm_rb_denom, 0)));
|
328
|
-
|
329
|
-
} else {
|
330
|
-
rb_raise(rb_eTypeError, "Invalid conversion to Rational type.");
|
331
|
-
}
|
332
|
-
}
|
333
|
-
|
334
293
|
};
|
335
|
-
|
294
|
+
|
336
295
|
// Negative operator
|
337
296
|
inline RubyObject operator-(const RubyObject& rhs) {
|
338
297
|
return RubyObject(rb_funcall(rhs.rval, nm_rb_negate, 0));
|
@@ -413,47 +372,6 @@ inline bool operator>(const Complex<FloatType>& left, const RubyObject& right) {
|
|
413
372
|
return RubyObject(left) > right;
|
414
373
|
}
|
415
374
|
|
416
|
-
|
417
|
-
|
418
|
-
//////////////////////////////
|
419
|
-
// RATIONAL-RUBY OPERATIONS //
|
420
|
-
//////////////////////////////
|
421
|
-
|
422
|
-
template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
|
423
|
-
inline bool operator==(const Rational<IntType>& left, const RubyObject& right) {
|
424
|
-
return RubyObject(left) == right;
|
425
|
-
}
|
426
|
-
|
427
|
-
template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
|
428
|
-
inline bool operator!=(const Rational<IntType>& left, const RubyObject& right) {
|
429
|
-
return RubyObject(left) != right;
|
430
|
-
}
|
431
|
-
|
432
|
-
template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
|
433
|
-
inline bool operator>=(const Rational<IntType>& left, const RubyObject& right) {
|
434
|
-
return RubyObject(left) >= right;
|
435
|
-
}
|
436
|
-
|
437
|
-
template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
|
438
|
-
inline bool operator<=(const Rational<IntType>& left, const RubyObject& right) {
|
439
|
-
return RubyObject(left) <= right;
|
440
|
-
}
|
441
|
-
|
442
|
-
template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
|
443
|
-
inline bool operator<(const Rational<IntType>& left, const RubyObject& right) {
|
444
|
-
return RubyObject(left) < right;
|
445
|
-
}
|
446
|
-
|
447
|
-
template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
|
448
|
-
inline bool operator>(const Rational<IntType>& left, const RubyObject& right) {
|
449
|
-
return RubyObject(left) > right;
|
450
|
-
}
|
451
|
-
|
452
|
-
inline std::ostream& operator<<(std::ostream& out, const RubyObject& rhs) {
|
453
|
-
out << "RUBYOBJECT" << std::flush; // FIXME: Try calling inspect or something on the Ruby object if we really need to debug it.
|
454
|
-
return out;
|
455
|
-
}
|
456
|
-
|
457
375
|
} // end of namespace nm
|
458
376
|
|
459
377
|
namespace std {
|
data/ext/nmatrix/extconf.rb
CHANGED
@@ -155,83 +155,12 @@ else
|
|
155
155
|
puts "g++ reports version... " + `#{CONFIG['CXX']} --version|head -n 1|cut -f 3 -d " "`
|
156
156
|
end
|
157
157
|
|
158
|
-
# add smmp in to get generic transp; remove smmp2 to eliminate funcptr transp
|
159
|
-
|
160
|
-
# The next line allows the user to supply --with-atlas-dir=/usr/local/atlas,
|
161
|
-
# --with-atlas-lib or --with-atlas-include and tell the compiler where to look
|
162
|
-
# for ATLAS. The same for all the others
|
163
|
-
#
|
164
|
-
#dir_config("clapack", ["/usr/local/atlas/include"], [])
|
165
|
-
#
|
166
|
-
#
|
167
|
-
|
168
|
-
# Is g++ having trouble finding your header files?
|
169
|
-
# Try this:
|
170
|
-
# export C_INCLUDE_PATH=/usr/local/atlas/include
|
171
|
-
# export CPLUS_INCLUDE_PATH=/usr/local/atlas/include
|
172
|
-
# (substituting in the path of your cblas.h and clapack.h for the path I used). -- JW 8/27/12
|
173
|
-
|
174
|
-
idefaults = {lapack: ["/usr/include/atlas"],
|
175
|
-
cblas: ["/usr/local/atlas/include", "/usr/include/atlas"],
|
176
|
-
atlas: ["/usr/local/atlas/include", "/usr/include/atlas"]}
|
177
|
-
|
178
|
-
# For some reason, if we try to look for /usr/lib64/atlas on a Mac OS X Mavericks system, and the directory does not
|
179
|
-
# exist, it will give a linker error -- even if the lib dir is already correctly included with -L. So we need to check
|
180
|
-
# that Dir.exists?(d) for each.
|
181
|
-
ldefaults = {lapack: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) },
|
182
|
-
cblas: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) },
|
183
|
-
atlas: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) }}
|
184
|
-
|
185
|
-
if have_library("clapack") # Usually only applies for Mac OS X
|
186
|
-
$libs += " -lclapack "
|
187
|
-
end
|
188
|
-
|
189
|
-
unless have_library("lapack")
|
190
|
-
dir_config("lapack", idefaults[:lapack], ldefaults[:lapack])
|
191
|
-
end
|
192
|
-
|
193
|
-
unless have_library("cblas")
|
194
|
-
dir_config("cblas", idefaults[:cblas], ldefaults[:cblas])
|
195
|
-
end
|
196
|
-
|
197
|
-
unless have_library("atlas")
|
198
|
-
dir_config("atlas", idefaults[:atlas], ldefaults[:atlas])
|
199
|
-
end
|
200
|
-
|
201
|
-
# If BLAS and LAPACK headers are in an atlas directory, prefer those. Otherwise,
|
202
|
-
# we try our luck with the default location.
|
203
|
-
if have_header("atlas/cblas.h")
|
204
|
-
have_header("atlas/clapack.h")
|
205
|
-
else
|
206
|
-
have_header("cblas.h")
|
207
|
-
have_header("clapack.h")
|
208
|
-
end
|
209
|
-
|
210
|
-
|
211
|
-
# Although have_func is supposed to take a list as its second argument, I find that it simply
|
212
|
-
# applies a :to_s to the second arg and doesn't actually check each one. We may want to put
|
213
|
-
# have_func calls inside an :each block which checks atlas/clapack.h, cblas.h, clapack.h, and
|
214
|
-
# lastly lapack.h. On Ubuntu, it only works if I use atlas/clapack.h. --@mohawkjohn 8/20/14
|
215
|
-
have_func("clapack_dgetrf", "atlas/clapack.h")
|
216
|
-
have_func("clapack_dgetri", "atlas/clapack.h")
|
217
|
-
have_func("dgesvd_", "clapack.h") # This may not do anything. dgesvd_ seems to be in LAPACK, not CLAPACK.
|
218
|
-
|
219
|
-
have_func("cblas_dgemm", "cblas.h")
|
220
|
-
|
221
|
-
#have_func("rb_scan_args", "ruby.h")
|
222
|
-
|
223
|
-
#find_library("lapack", "clapack_dgetrf")
|
224
|
-
#find_library("cblas", "cblas_dgemm")
|
225
|
-
#find_library("atlas", "ATL_dgemmNN")
|
226
|
-
# Order matters here: ATLAS has to go after LAPACK: http://mail.scipy.org/pipermail/scipy-user/2007-January/010717.html
|
227
|
-
$libs += " -llapack -lcblas -latlas "
|
228
158
|
#$libs += " -lprofiler "
|
229
159
|
|
230
|
-
|
231
160
|
# For release, these next two should both be changed to -O3.
|
232
|
-
$CFLAGS += " -O3
|
161
|
+
$CFLAGS += " -O3 "
|
233
162
|
#$CFLAGS += " -static -O0 -g "
|
234
|
-
$CPPFLAGS += " -O3 -std=#{$CPP_STANDARD}
|
163
|
+
$CPPFLAGS += " -O3 -std=#{$CPP_STANDARD} " #-fmax-errors=10 -save-temps
|
235
164
|
#$CPPFLAGS += " -static -O0 -g -std=#{$CPP_STANDARD} "
|
236
165
|
|
237
166
|
CONFIG['warnflags'].gsub!('-Wshorten-64-to-32', '') # doesn't work except in Mac-patched gcc (4.2)
|
data/ext/nmatrix/math.cpp
CHANGED
@@ -23,90 +23,103 @@
|
|
23
23
|
//
|
24
24
|
// == math.cpp
|
25
25
|
//
|
26
|
-
// Ruby-exposed
|
26
|
+
// Ruby-exposed CBLAS and LAPACK functions that are available without
|
27
|
+
// an external library.
|
27
28
|
//
|
28
|
-
// === Procedure for adding
|
29
|
+
// === Procedure for adding CBLAS functions to math.cpp/math.h:
|
29
30
|
//
|
30
31
|
// This procedure is written as if for a fictional function with double
|
31
|
-
// version dbacon, which we'll say is from
|
32
|
+
// version dbacon, which we'll say is from CBLAS.
|
32
33
|
//
|
33
34
|
// 1. Write a default templated version which probably returns a boolean.
|
34
35
|
// Call it bacon, and put it in math.h.
|
35
36
|
//
|
36
|
-
// Order will always be row-major, so we don't need to pass that.
|
37
|
-
// CBLAS_TRANSPOSE-type arguments, however, should be passed.
|
38
|
-
//
|
39
|
-
// Otherwise, arguments should look like those in cblas.h or clapack.h:
|
40
|
-
//
|
41
37
|
// template <typename DType>
|
42
38
|
// bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, DType* A, ...) {
|
43
39
|
// rb_raise(rb_eNotImpError, "only implemented for ATLAS types (float32, float64, complex64, complex128)");
|
44
40
|
// }
|
45
41
|
//
|
42
|
+
// Make sure this is in namespace nm::math
|
43
|
+
//
|
46
44
|
// 2. In math.cpp, add a templated inline static version of the function which takes
|
47
|
-
// only void* pointers and uses
|
48
|
-
// proper dtype.
|
45
|
+
// only void* pointers and uses static_cast to convert them to the
|
46
|
+
// proper dtype. This should also be in namespace nm::math
|
49
47
|
//
|
50
48
|
// This function may also need to switch m and n if these arguments are given.
|
51
49
|
//
|
52
50
|
// For an example, see cblas_gemm. This function should do nothing other than cast
|
53
|
-
// appropriately. If
|
51
|
+
// appropriately. If cblas_dbacon, cblas_sbacon, cblas_cbacon, and cblas_zbacon
|
54
52
|
// all take void* only, and no other pointers that vary between functions, you can skip
|
55
53
|
// this particular step -- as we can call them directly using a custom function pointer
|
56
54
|
// array (same function signature!).
|
57
55
|
//
|
58
|
-
// This version of the function will be the one exposed through NMatrix::
|
59
|
-
// want it to be as close to the actual
|
56
|
+
// This version of the function will be the one exposed through NMatrix::BLAS. We
|
57
|
+
// want it to be as close to the actual BLAS version of the function as possible,
|
60
58
|
// and with as few checks as possible.
|
61
59
|
//
|
62
60
|
// You will probably need a forward declaration in the extern "C" block.
|
63
61
|
//
|
64
62
|
// Note: In that case, the function you wrote in Step 1 should also take exactly the
|
65
|
-
// same arguments as
|
63
|
+
// same arguments as cblas_xbacon. Otherwise Bad Things will happen.
|
66
64
|
//
|
67
|
-
// 3. In
|
65
|
+
// 3. In cblas_templates_core.h, add a default template like in step 1 (which will just
|
66
|
+
// call nm::math::bacon()) and also
|
67
|
+
// inline specialized versions of bacon for the different BLAS types.
|
68
|
+
// This will allow both nmatrix-atlas and nmatrix-lapacke to use the optimized version
|
69
|
+
// of bacon from whatever external library is available, as well as the internal version
|
70
|
+
// if an external version is not available. These functions will end up in a namsespace
|
71
|
+
// like nm::math::atlas, but don't explicitly put them in a namespace, they will get
|
72
|
+
// put in the appropriate namespace when cblas_templates_core.h is included.
|
68
73
|
//
|
69
|
-
//
|
70
|
-
//
|
74
|
+
// template <typename DType>
|
75
|
+
// inline bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, DType* A, ...) {
|
76
|
+
// nm::math::bacon(trans, M, N, A, ...);
|
77
|
+
// }
|
71
78
|
//
|
72
79
|
// template <>
|
73
80
|
// inline bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, float* A, ...) {
|
74
|
-
//
|
81
|
+
// cblas_sbacon(trans, M, N, A, ...);
|
75
82
|
// return true;
|
76
83
|
// }
|
77
84
|
//
|
78
|
-
// Make sure these functions are in the namespace nm::math.
|
79
|
-
//
|
80
85
|
// Note that you should do everything in your power here to parse any return values
|
81
|
-
//
|
86
|
+
// cblas_sbacon may give you. We're not trying very hard in this example, but you might
|
82
87
|
// look at getrf to see how it might be done.
|
83
88
|
//
|
84
|
-
// 4.
|
89
|
+
// 4. Write the C function nm_cblas_bacon, which is what Ruby will call. Use the example
|
90
|
+
// of nm_cblas_gemm below. Also you must add a similar function in math_atlas.cpp
|
91
|
+
// and math_lapacke.cpp
|
85
92
|
//
|
86
|
-
//
|
93
|
+
// 5. Expose the function in nm_math_init_blas(), in math.cpp:
|
87
94
|
//
|
88
|
-
//
|
95
|
+
// rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_bacon", (METHOD)nm_cblas_bacon, 5);
|
89
96
|
//
|
90
|
-
//
|
97
|
+
// Do something similar in math_atlas.cpp and math_lapacke.cpp to add the function
|
98
|
+
// to the plugin gems.
|
99
|
+
//
|
100
|
+
// Here, we're telling Ruby that nm_cblas_bacon takes five arguments as a Ruby function.
|
101
|
+
//
|
102
|
+
// 6. In blas.rb, write a bacon function which accesses cblas_bacon, but does all the
|
91
103
|
// sanity checks we left out in step 2.
|
92
104
|
//
|
93
|
-
//
|
105
|
+
// 7. Write tests for NMatrix::BLAS::bacon, confirming that it works for the ATLAS dtypes.
|
94
106
|
//
|
95
|
-
//
|
107
|
+
// 8. After you get it working properly with CBLAS, download dbacon.f from NETLIB, and use
|
96
108
|
// f2c to convert it to C. Clean it up so it's readable. Remove the extra indices -- f2c
|
97
109
|
// inserts a lot of unnecessary stuff.
|
98
110
|
//
|
99
111
|
// Copy and paste the output into the default templated function you wrote in Step 1.
|
100
112
|
// Fix it so it works as a template instead of just for doubles.
|
101
113
|
//
|
102
|
-
//
|
114
|
+
// Because of step 3, this will automatically also work for the nmatrix-atlas
|
115
|
+
// and nmatrix-lapacke implementations.
|
116
|
+
//
|
117
|
+
// 9. Write tests to confirm that it works for all data types.
|
103
118
|
//
|
104
|
-
//
|
119
|
+
// 10. See about adding a Ruby-like interface, such as matrix_matrix_multiply for cblas_gemm,
|
105
120
|
// or matrix_vector_multiply for cblas_gemv. This step is not mandatory.
|
106
121
|
//
|
107
|
-
//
|
108
|
-
|
109
|
-
|
122
|
+
// 11. Pull request!
|
110
123
|
|
111
124
|
/*
|
112
125
|
* Project Includes
|
@@ -117,30 +130,23 @@
|
|
117
130
|
#include <limits>
|
118
131
|
#include <cmath>
|
119
132
|
|
120
|
-
#include "math/
|
133
|
+
#include "math/cblas_enums.h"
|
134
|
+
|
121
135
|
#include "data/data.h"
|
122
|
-
#include "math/gesdd.h"
|
123
|
-
#include "math/gesvd.h"
|
124
|
-
#include "math/geev.h"
|
125
|
-
#include "math/swap.h"
|
126
136
|
#include "math/imax.h"
|
127
137
|
#include "math/scal.h"
|
128
|
-
#include "math/ger.h"
|
129
|
-
#include "math/getf2.h"
|
130
138
|
#include "math/laswp.h"
|
131
139
|
#include "math/trsm.h"
|
132
|
-
#include "math/long_dtype.h" // for gemm.h
|
133
140
|
#include "math/gemm.h"
|
134
141
|
#include "math/gemv.h"
|
135
142
|
#include "math/asum.h"
|
136
143
|
#include "math/nrm2.h"
|
137
144
|
#include "math/getrf.h"
|
138
|
-
#include "math/getri.h"
|
139
145
|
#include "math/getrs.h"
|
140
|
-
#include "math/potrs.h"
|
141
146
|
#include "math/rot.h"
|
142
147
|
#include "math/rotg.h"
|
143
148
|
#include "math/math.h"
|
149
|
+
#include "math/util.h"
|
144
150
|
#include "storage/dense/dense.h"
|
145
151
|
|
146
152
|
#include "nmatrix.h"
|
@@ -151,12 +157,6 @@
|
|
151
157
|
*/
|
152
158
|
|
153
159
|
extern "C" {
|
154
|
-
#if defined HAVE_CLAPACK_H
|
155
|
-
#include <clapack.h>
|
156
|
-
#elif defined HAVE_ATLAS_CLAPACK_H
|
157
|
-
#include <atlas/clapack.h>
|
158
|
-
#endif
|
159
|
-
|
160
160
|
/* BLAS Level 1. */
|
161
161
|
static VALUE nm_cblas_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx);
|
162
162
|
static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx);
|
@@ -166,34 +166,20 @@ extern "C" {
|
|
166
166
|
static VALUE nm_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx);
|
167
167
|
|
168
168
|
/* BLAS Level 2. */
|
169
|
+
static VALUE nm_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
|
170
|
+
VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);
|
171
|
+
|
169
172
|
/* BLAS Level 3. */
|
170
173
|
static VALUE nm_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha,
|
171
174
|
VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc);
|
172
|
-
static VALUE nm_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
|
173
|
-
VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);
|
174
175
|
static VALUE nm_cblas_trsm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
|
175
176
|
VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
|
176
|
-
static VALUE nm_cblas_trmm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
|
177
|
-
VALUE alpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
|
178
|
-
static VALUE nm_cblas_herk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
|
179
|
-
VALUE lda, VALUE beta, VALUE c, VALUE ldc);
|
180
|
-
static VALUE nm_cblas_syrk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
|
181
|
-
VALUE lda, VALUE beta, VALUE c, VALUE ldc);
|
182
177
|
|
183
178
|
/* LAPACK. */
|
184
179
|
static VALUE nm_has_clapack(VALUE self);
|
185
180
|
static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda);
|
186
|
-
static VALUE nm_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
|
187
181
|
static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb);
|
188
|
-
static VALUE nm_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb);
|
189
|
-
static VALUE nm_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv);
|
190
|
-
static VALUE nm_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
|
191
182
|
static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx);
|
192
|
-
static VALUE nm_clapack_lauum(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
|
193
|
-
|
194
|
-
static VALUE nm_lapack_gesvd(VALUE self, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lworkspace_size);
|
195
|
-
static VALUE nm_lapack_gesdd(VALUE self, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lworkspace_size);
|
196
|
-
static VALUE nm_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr, VALUE lwork);
|
197
183
|
} // end of extern "C" block
|
198
184
|
|
199
185
|
////////////////////
|
@@ -230,6 +216,11 @@ namespace nm {
|
|
230
216
|
}
|
231
217
|
}
|
232
218
|
|
219
|
+
//we can't do det_exact on byte, because it will want to return a byte (unsigned), but determinants can be negative, even if all elements of the matrix are positive
|
220
|
+
template <>
|
221
|
+
void det_exact<uint8_t>(const int M, const void* A_elements, const int lda, void* result_arg) {
|
222
|
+
rb_raise(nm_eDataTypeError, "cannot call det_exact on unsigned type");
|
223
|
+
}
|
233
224
|
|
234
225
|
/*
|
235
226
|
* Calculates in-place inverse of A_elements. Uses Gauss-Jordan elimination technique.
|
@@ -308,6 +299,80 @@ namespace nm {
|
|
308
299
|
delete[] col_index;
|
309
300
|
}
|
310
301
|
|
302
|
+
/*
|
303
|
+
* Reduce a square matrix to hessenberg form with householder transforms
|
304
|
+
*
|
305
|
+
* == Arguments
|
306
|
+
*
|
307
|
+
* nrows - The number of rows present in matrix a.
|
308
|
+
* a_elements - Elements of the matrix to be reduced in 1D array form.
|
309
|
+
*
|
310
|
+
* == References
|
311
|
+
*
|
312
|
+
* http://www.mymathlib.com/c_source/matrices/eigen/hessenberg_orthog.c
|
313
|
+
* This code has been included by permission of the author.
|
314
|
+
*/
|
315
|
+
template <typename DType>
|
316
|
+
void hessenberg(const int nrows, void* a_elements) {
|
317
|
+
DType* a = reinterpret_cast<DType*>(a_elements);
|
318
|
+
DType* u = new DType[nrows]; // auxillary storage for the chosen vector
|
319
|
+
DType sum_of_squares, *p_row, *psubdiag, *p_a, scale, innerproduct;
|
320
|
+
int i, k, col;
|
321
|
+
|
322
|
+
// For each column use a Householder transformation to zero all entries
|
323
|
+
// below the subdiagonal.
|
324
|
+
for (psubdiag = a + nrows, col = 0; col < nrows - 2; psubdiag += nrows + 1,
|
325
|
+
col++) {
|
326
|
+
// Calculate the signed square root of the sum of squares of the
|
327
|
+
// elements below the diagonal.
|
328
|
+
|
329
|
+
for (p_a = psubdiag, sum_of_squares = 0.0, i = col + 1; i < nrows;
|
330
|
+
p_a += nrows, i++) {
|
331
|
+
sum_of_squares += *p_a * *p_a;
|
332
|
+
}
|
333
|
+
if (sum_of_squares == 0.0) { continue; }
|
334
|
+
sum_of_squares = std::sqrt(sum_of_squares);
|
335
|
+
|
336
|
+
if ( *psubdiag >= 0.0 ) { sum_of_squares = -sum_of_squares; }
|
337
|
+
|
338
|
+
// Calculate the Householder transformation Q = I - 2uu'/u'u.
|
339
|
+
u[col + 1] = *psubdiag - sum_of_squares;
|
340
|
+
*psubdiag = sum_of_squares;
|
341
|
+
|
342
|
+
for (p_a = psubdiag + nrows, i = col + 2; i < nrows; p_a += nrows, i++) {
|
343
|
+
u[i] = *p_a;
|
344
|
+
*p_a = 0.0;
|
345
|
+
}
|
346
|
+
|
347
|
+
// Premultiply A by Q
|
348
|
+
scale = -1.0 / (sum_of_squares * u[col+1]);
|
349
|
+
for (p_row = psubdiag - col, i = col + 1; i < nrows; i++) {
|
350
|
+
p_a = a + nrows * (col + 1) + i;
|
351
|
+
for (innerproduct = 0.0, k = col + 1; k < nrows; p_a += nrows, k++) {
|
352
|
+
innerproduct += u[k] * *p_a;
|
353
|
+
}
|
354
|
+
innerproduct *= scale;
|
355
|
+
for (p_a = p_row + i, k = col + 1; k < nrows; p_a += nrows, k++) {
|
356
|
+
*p_a -= u[k] * innerproduct;
|
357
|
+
}
|
358
|
+
}
|
359
|
+
|
360
|
+
// Postmultiply QA by Q
|
361
|
+
for (p_row = a, i = 0; i < nrows; p_row += nrows, i++) {
|
362
|
+
for (innerproduct = 0.0, k = col + 1; k < nrows; k++) {
|
363
|
+
innerproduct += u[k] * *(p_row + k);
|
364
|
+
}
|
365
|
+
innerproduct *= scale;
|
366
|
+
|
367
|
+
for (k = col + 1; k < nrows; k++) {
|
368
|
+
*(p_row + k) -= u[k] * innerproduct;
|
369
|
+
}
|
370
|
+
}
|
371
|
+
}
|
372
|
+
|
373
|
+
delete[] u;
|
374
|
+
}
|
375
|
+
|
311
376
|
/*
|
312
377
|
* Calculate the exact inverse for a dense matrix (A [elements]) of size 2 or 3. Places the result in B_elements.
|
313
378
|
*/
|
@@ -318,6 +383,10 @@ namespace nm {
|
|
318
383
|
|
319
384
|
if (M == 2) {
|
320
385
|
DType det = A[0] * A[lda+1] - A[1] * A[lda];
|
386
|
+
if (det == 0) {
|
387
|
+
rb_raise(nm_eNotInvertibleError,
|
388
|
+
"matrix must have non-zero determinant to be invertible (not getting this error does not mean matrix is invertible if you're dealing with floating points)");
|
389
|
+
}
|
321
390
|
B[0] = A[lda+1] / det;
|
322
391
|
B[1] = -A[1] / det;
|
323
392
|
B[ldb] = -A[lda] / det;
|
@@ -328,7 +397,8 @@ namespace nm {
|
|
328
397
|
DType det;
|
329
398
|
det_exact<DType>(M, A_elements, lda, reinterpret_cast<void*>(&det));
|
330
399
|
if (det == 0) {
|
331
|
-
rb_raise(nm_eNotInvertibleError,
|
400
|
+
rb_raise(nm_eNotInvertibleError,
|
401
|
+
"matrix must have non-zero determinant to be invertible (not getting this error does not mean matrix is invertible if you're dealing with floating points)");
|
332
402
|
}
|
333
403
|
|
334
404
|
B[0] = ( A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]) / det; // A = ei - fh
|
@@ -347,22 +417,6 @@ namespace nm {
|
|
347
417
|
}
|
348
418
|
}
|
349
419
|
|
350
|
-
/*
|
351
|
-
* Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
|
352
|
-
*/
|
353
|
-
template <typename DType, typename CType>
|
354
|
-
inline static int lapack_gesvd(char jobu, char jobvt, int m, int n, void* a, int lda, void* s, void* u, int ldu, void* vt, int ldvt, void* work, int lwork, void* rwork) {
|
355
|
-
return gesvd<DType,CType>(jobu, jobvt, m, n, reinterpret_cast<DType*>(a), lda, reinterpret_cast<DType*>(s), reinterpret_cast<DType*>(u), ldu, reinterpret_cast<DType*>(vt), ldvt, reinterpret_cast<DType*>(work), lwork, reinterpret_cast<CType*>(rwork));
|
356
|
-
}
|
357
|
-
|
358
|
-
/*
|
359
|
-
* Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
|
360
|
-
*/
|
361
|
-
template <typename DType, typename CType>
|
362
|
-
inline static int lapack_gesdd(char jobz, int m, int n, void* a, int lda, void* s, void* u, int ldu, void* vt, int ldvt, void* work, int lwork, int* iwork, void* rwork) {
|
363
|
-
return gesdd<DType,CType>(jobz, m, n, reinterpret_cast<DType*>(a), lda, reinterpret_cast<DType*>(s), reinterpret_cast<DType*>(u), ldu, reinterpret_cast<DType*>(vt), ldvt, reinterpret_cast<DType*>(work), lwork, iwork, reinterpret_cast<CType*>(rwork));
|
364
|
-
}
|
365
|
-
|
366
420
|
/*
|
367
421
|
* Function signature conversion for calling CBLAS' gemm functions as directly as possible.
|
368
422
|
*
|
@@ -422,39 +476,6 @@ namespace nm {
|
|
422
476
|
reinterpret_cast<const DType*>(a), lda, reinterpret_cast<DType*>(b), ldb);
|
423
477
|
}
|
424
478
|
|
425
|
-
|
426
|
-
/*
|
427
|
-
* Function signature conversion for calling CBLAS' trmm functions as directly as possible.
|
428
|
-
*
|
429
|
-
* For documentation: http://www.netlib.org/blas/dtrmm.f
|
430
|
-
*/
|
431
|
-
template <typename DType>
|
432
|
-
inline static void cblas_trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
433
|
-
const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const void* alpha,
|
434
|
-
const void* A, const int lda, void* B, const int ldb)
|
435
|
-
{
|
436
|
-
trmm<DType>(order, side, uplo, ta, diag, m, n, reinterpret_cast<const DType*>(alpha),
|
437
|
-
reinterpret_cast<const DType*>(A), lda, reinterpret_cast<DType*>(B), ldb);
|
438
|
-
}
|
439
|
-
|
440
|
-
|
441
|
-
/*
|
442
|
-
* Function signature conversion for calling CBLAS' syrk functions as directly as possible.
|
443
|
-
*
|
444
|
-
* For documentation: http://www.netlib.org/blas/dsyrk.f
|
445
|
-
*/
|
446
|
-
template <typename DType>
|
447
|
-
inline static void cblas_syrk(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const enum CBLAS_TRANSPOSE trans,
|
448
|
-
const int n, const int k, const void* alpha,
|
449
|
-
const void* A, const int lda, const void* beta, void* C, const int ldc)
|
450
|
-
{
|
451
|
-
syrk<DType>(order, uplo, trans, n, k, reinterpret_cast<const DType*>(alpha),
|
452
|
-
reinterpret_cast<const DType*>(A), lda, reinterpret_cast<const DType*>(beta), reinterpret_cast<DType*>(C), ldc);
|
453
|
-
}
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
479
|
}
|
459
480
|
} // end of namespace nm::math
|
460
481
|
|
@@ -466,80 +487,29 @@ extern "C" {
|
|
466
487
|
///////////////////
|
467
488
|
|
468
489
|
void nm_math_init_blas() {
|
469
|
-
|
490
|
+
VALUE cNMatrix_Internal = rb_define_module_under(cNMatrix, "Internal");
|
470
491
|
|
471
492
|
rb_define_singleton_method(cNMatrix, "has_clapack?", (METHOD)nm_has_clapack, 0);
|
472
493
|
|
473
|
-
|
474
|
-
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getrf", (METHOD)nm_clapack_getrf, 5);
|
475
|
-
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potrf", (METHOD)nm_clapack_potrf, 5);
|
476
|
-
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getrs", (METHOD)nm_clapack_getrs, 9);
|
477
|
-
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potrs", (METHOD)nm_clapack_potrs, 8);
|
478
|
-
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getri", (METHOD)nm_clapack_getri, 5);
|
479
|
-
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potri", (METHOD)nm_clapack_potri, 5);
|
480
|
-
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_laswp", (METHOD)nm_clapack_laswp, 7);
|
481
|
-
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_lauum", (METHOD)nm_clapack_lauum, 5);
|
482
|
-
|
483
|
-
/* Non-ATLAS regular LAPACK Functions called via Fortran interface */
|
484
|
-
rb_define_singleton_method(cNMatrix_LAPACK, "lapack_gesvd", (METHOD)nm_lapack_gesvd, 12);
|
485
|
-
rb_define_singleton_method(cNMatrix_LAPACK, "lapack_gesdd", (METHOD)nm_lapack_gesdd, 11);
|
486
|
-
rb_define_singleton_method(cNMatrix_LAPACK, "lapack_geev", (METHOD)nm_lapack_geev, 12);
|
487
|
-
|
488
|
-
cNMatrix_BLAS = rb_define_module_under(cNMatrix, "BLAS");
|
489
|
-
|
490
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_scal", (METHOD)nm_cblas_scal, 4);
|
491
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_nrm2", (METHOD)nm_cblas_nrm2, 3);
|
492
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_asum", (METHOD)nm_cblas_asum, 3);
|
493
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_rot", (METHOD)nm_cblas_rot, 7);
|
494
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_rotg", (METHOD)nm_cblas_rotg, 1);
|
495
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_imax", (METHOD)nm_cblas_imax, 3);
|
496
|
-
|
497
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemm", (METHOD)nm_cblas_gemm, 14);
|
498
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11);
|
499
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_trsm", (METHOD)nm_cblas_trsm, 12);
|
500
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_trmm", (METHOD)nm_cblas_trmm, 12);
|
501
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_syrk", (METHOD)nm_cblas_syrk, 11);
|
502
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_herk", (METHOD)nm_cblas_herk, 11);
|
503
|
-
}
|
504
|
-
|
505
|
-
/*
|
506
|
-
* Interprets lapack jobu and jobvt arguments, for which LAPACK needs character values A, S, O, or N.
|
507
|
-
*
|
508
|
-
* Called by lapack_gesvd -- basically inline. svd stands for singular value decomposition.
|
509
|
-
*/
|
510
|
-
static inline char lapack_svd_job_sym(VALUE op) {
|
511
|
-
if (rb_to_id(op) == rb_intern("all") || rb_to_id(op) == rb_intern("a")) return 'A';
|
512
|
-
else if (rb_to_id(op) == rb_intern("return") || rb_to_id(op) == rb_intern("s")) return 'S';
|
513
|
-
else if (rb_to_id(op) == rb_intern("overwrite") || rb_to_id(op) == rb_intern("o")) return 'O';
|
514
|
-
else if (rb_to_id(op) == rb_intern("none") || rb_to_id(op) == rb_intern("n")) return 'N';
|
515
|
-
else rb_raise(rb_eArgError, "Expected :all, :return, :overwrite, :none (or :a, :s, :o, :n, respectively)");
|
516
|
-
return 'a';
|
517
|
-
}
|
494
|
+
VALUE cNMatrix_Internal_LAPACK = rb_define_module_under(cNMatrix_Internal, "LAPACK");
|
518
495
|
|
496
|
+
/* ATLAS-CLAPACK Functions that are implemented internally */
|
497
|
+
rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_getrf", (METHOD)nm_clapack_getrf, 5);
|
498
|
+
rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_getrs", (METHOD)nm_clapack_getrs, 9);
|
499
|
+
rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_laswp", (METHOD)nm_clapack_laswp, 7);
|
519
500
|
|
520
|
-
|
521
|
-
* Interprets lapack jobvl and jobvr arguments, for which LAPACK needs character values N or V.
|
522
|
-
*
|
523
|
-
* Called by lapack_geev -- basically inline. evd stands for eigenvalue decomposition.
|
524
|
-
*/
|
525
|
-
static inline char lapack_evd_job_sym(VALUE op) {
|
526
|
-
if (op == Qfalse || op == Qnil || rb_to_id(op) == rb_intern("n")) return 'N';
|
527
|
-
else return 'V';
|
528
|
-
}
|
501
|
+
VALUE cNMatrix_Internal_BLAS = rb_define_module_under(cNMatrix_Internal, "BLAS");
|
529
502
|
|
503
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_scal", (METHOD)nm_cblas_scal, 4);
|
504
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_nrm2", (METHOD)nm_cblas_nrm2, 3);
|
505
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_asum", (METHOD)nm_cblas_asum, 3);
|
506
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_rot", (METHOD)nm_cblas_rot, 7);
|
507
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_rotg", (METHOD)nm_cblas_rotg, 1);
|
508
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_imax", (METHOD)nm_cblas_imax, 3);
|
530
509
|
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
* Called by nm_cblas_gemm -- basically inline.
|
535
|
-
*
|
536
|
-
*/
|
537
|
-
static inline enum CBLAS_TRANSPOSE blas_transpose_sym(VALUE op) {
|
538
|
-
if (op == Qfalse || rb_to_id(op) == nm_rb_no_transpose) return CblasNoTrans;
|
539
|
-
else if (rb_to_id(op) == nm_rb_transpose) return CblasTrans;
|
540
|
-
else if (rb_to_id(op) == nm_rb_complex_conjugate) return CblasConjTrans;
|
541
|
-
else rb_raise(rb_eArgError, "Expected false, :transpose, or :complex_conjugate");
|
542
|
-
return CblasNoTrans;
|
510
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_gemm", (METHOD)nm_cblas_gemm, 14);
|
511
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11);
|
512
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_trsm", (METHOD)nm_cblas_trsm, 12);
|
543
513
|
}
|
544
514
|
|
545
515
|
/*
|
@@ -572,55 +542,6 @@ static VALUE nm_cblas_scal(VALUE self, VALUE n, VALUE alpha, VALUE vector, VALUE
|
|
572
542
|
return vector;
|
573
543
|
}
|
574
544
|
|
575
|
-
/*
|
576
|
-
* Interprets cblas argument which could be :left or :right
|
577
|
-
*
|
578
|
-
* Called by nm_cblas_trsm -- basically inline
|
579
|
-
*/
|
580
|
-
static inline enum CBLAS_SIDE blas_side_sym(VALUE op) {
|
581
|
-
ID op_id = rb_to_id(op);
|
582
|
-
if (op_id == nm_rb_left) return CblasLeft;
|
583
|
-
if (op_id == nm_rb_right) return CblasRight;
|
584
|
-
rb_raise(rb_eArgError, "Expected :left or :right for side argument");
|
585
|
-
return CblasLeft;
|
586
|
-
}
|
587
|
-
|
588
|
-
/*
|
589
|
-
* Interprets cblas argument which could be :upper or :lower
|
590
|
-
*
|
591
|
-
* Called by nm_cblas_trsm -- basically inline
|
592
|
-
*/
|
593
|
-
static inline enum CBLAS_UPLO blas_uplo_sym(VALUE op) {
|
594
|
-
ID op_id = rb_to_id(op);
|
595
|
-
if (op_id == nm_rb_upper) return CblasUpper;
|
596
|
-
if (op_id == nm_rb_lower) return CblasLower;
|
597
|
-
rb_raise(rb_eArgError, "Expected :upper or :lower for uplo argument");
|
598
|
-
return CblasUpper;
|
599
|
-
}
|
600
|
-
|
601
|
-
|
602
|
-
/*
|
603
|
-
* Interprets cblas argument which could be :unit (true) or :nonunit (false or anything other than true/:unit)
|
604
|
-
*
|
605
|
-
* Called by nm_cblas_trsm -- basically inline
|
606
|
-
*/
|
607
|
-
static inline enum CBLAS_DIAG blas_diag_sym(VALUE op) {
|
608
|
-
if (rb_to_id(op) == nm_rb_unit || op == Qtrue) return CblasUnit;
|
609
|
-
return CblasNonUnit;
|
610
|
-
}
|
611
|
-
|
612
|
-
/*
|
613
|
-
* Interprets cblas argument which could be :row or :col
|
614
|
-
*/
|
615
|
-
static inline enum CBLAS_ORDER blas_order_sym(VALUE op) {
|
616
|
-
if (rb_to_id(op) == rb_intern("row") || rb_to_id(op) == rb_intern("row_major")) return CblasRowMajor;
|
617
|
-
else if (rb_to_id(op) == rb_intern("col") || rb_to_id(op) == rb_intern("col_major") ||
|
618
|
-
rb_to_id(op) == rb_intern("column") || rb_to_id(op) == rb_intern("column_major")) return CblasColMajor;
|
619
|
-
rb_raise(rb_eArgError, "Expected :row or :col for order argument");
|
620
|
-
return CblasRowMajor;
|
621
|
-
}
|
622
|
-
|
623
|
-
|
624
545
|
/*
|
625
546
|
* Call any of the cblas_xrotg functions as directly as possible.
|
626
547
|
*
|
@@ -641,10 +562,6 @@ static inline enum CBLAS_ORDER blas_order_sym(VALUE op) {
|
|
641
562
|
* The outputs [c,s] will be returned in a Ruby Array at the end; the input
|
642
563
|
* NMatrix will also be modified in-place.
|
643
564
|
*
|
644
|
-
* If you provide rationals, be aware that there's a high probability of an
|
645
|
-
* error, since rotg includes a square root -- and most rationals' square roots
|
646
|
-
* are irrational. You're better off converting to Float first.
|
647
|
-
*
|
648
565
|
* This function, like the other cblas_ functions, does minimal type-checking.
|
649
566
|
*/
|
650
567
|
static VALUE nm_cblas_rotg(VALUE self, VALUE ab) {
|
@@ -654,14 +571,13 @@ static VALUE nm_cblas_rotg(VALUE self, VALUE ab) {
|
|
654
571
|
nm::math::cblas_rotg<double>,
|
655
572
|
nm::math::cblas_rotg<nm::Complex64>,
|
656
573
|
nm::math::cblas_rotg<nm::Complex128>,
|
657
|
-
NULL, NULL, NULL, // no rationals
|
658
574
|
NULL //nm::math::cblas_rotg<nm::RubyObject>
|
659
575
|
};
|
660
576
|
|
661
577
|
nm::dtype_t dtype = NM_DTYPE(ab);
|
662
578
|
|
663
579
|
if (!ttable[dtype]) {
|
664
|
-
rb_raise(nm_eDataTypeError, "this operation undefined for integer
|
580
|
+
rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
|
665
581
|
return Qnil;
|
666
582
|
|
667
583
|
} else {
|
@@ -723,9 +639,6 @@ static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VAL
|
|
723
639
|
nm::math::cblas_rot<double,double>,
|
724
640
|
nm::math::cblas_rot<nm::Complex64,float>,
|
725
641
|
nm::math::cblas_rot<nm::Complex128,double>,
|
726
|
-
nm::math::cblas_rot<nm::Rational32,nm::Rational32>,
|
727
|
-
nm::math::cblas_rot<nm::Rational64,nm::Rational64>,
|
728
|
-
nm::math::cblas_rot<nm::Rational128,nm::Rational128>,
|
729
642
|
nm::math::cblas_rot<nm::RubyObject,nm::RubyObject>
|
730
643
|
};
|
731
644
|
|
@@ -792,17 +705,13 @@ static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx) {
|
|
792
705
|
nm::math::cblas_nrm2<float64_t,float64_t>,
|
793
706
|
nm::math::cblas_nrm2<float32_t,nm::Complex64>,
|
794
707
|
nm::math::cblas_nrm2<float64_t,nm::Complex128>,
|
795
|
-
//nm::math::cblas_nrm2<nm::Rational32,nm::Rational32>,
|
796
|
-
//nm::math::cblas_nrm2<nm::Rational64,nm::Rational64>,
|
797
|
-
//nm::math::cblas_nrm2<nm::Rational128,nm::Rational128>,
|
798
|
-
NULL, NULL, NULL,
|
799
708
|
nm::math::cblas_nrm2<nm::RubyObject,nm::RubyObject>
|
800
709
|
};
|
801
710
|
|
802
711
|
nm::dtype_t dtype = NM_DTYPE(x);
|
803
712
|
|
804
713
|
if (!ttable[dtype]) {
|
805
|
-
rb_raise(nm_eDataTypeError, "this operation undefined for integer
|
714
|
+
rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
|
806
715
|
return Qnil;
|
807
716
|
|
808
717
|
} else {
|
@@ -850,9 +759,6 @@ static VALUE nm_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx) {
|
|
850
759
|
nm::math::cblas_asum<float64_t,float64_t>,
|
851
760
|
nm::math::cblas_asum<float32_t,nm::Complex64>,
|
852
761
|
nm::math::cblas_asum<float64_t,nm::Complex128>,
|
853
|
-
nm::math::cblas_asum<nm::Rational32,nm::Rational32>,
|
854
|
-
nm::math::cblas_asum<nm::Rational64,nm::Rational64>,
|
855
|
-
nm::math::cblas_asum<nm::Rational128,nm::Rational128>,
|
856
762
|
nm::math::cblas_asum<nm::RubyObject,nm::RubyObject>
|
857
763
|
};
|
858
764
|
|
@@ -1000,10 +906,8 @@ static VALUE nm_cblas_trsm(VALUE self,
|
|
1000
906
|
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
1001
907
|
nm::math::cblas_trsm<float>,
|
1002
908
|
nm::math::cblas_trsm<double>,
|
1003
|
-
|
1004
|
-
nm::math::cblas_trsm<nm::
|
1005
|
-
nm::math::cblas_trsm<nm::Rational64>,
|
1006
|
-
nm::math::cblas_trsm<nm::Rational128>,
|
909
|
+
nm::math::cblas_trsm<nm::Complex64>,
|
910
|
+
nm::math::cblas_trsm<nm::Complex128>,
|
1007
911
|
nm::math::cblas_trsm<nm::RubyObject>
|
1008
912
|
};
|
1009
913
|
|
@@ -1021,358 +925,6 @@ static VALUE nm_cblas_trsm(VALUE self,
|
|
1021
925
|
return Qtrue;
|
1022
926
|
}
|
1023
927
|
|
1024
|
-
|
1025
|
-
static VALUE nm_cblas_trmm(VALUE self,
|
1026
|
-
VALUE order,
|
1027
|
-
VALUE side, VALUE uplo,
|
1028
|
-
VALUE trans_a, VALUE diag,
|
1029
|
-
VALUE m, VALUE n,
|
1030
|
-
VALUE alpha,
|
1031
|
-
VALUE a, VALUE lda,
|
1032
|
-
VALUE b, VALUE ldb)
|
1033
|
-
{
|
1034
|
-
static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER,
|
1035
|
-
const enum CBLAS_SIDE, const enum CBLAS_UPLO,
|
1036
|
-
const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
|
1037
|
-
const int m, const int n, const void* alpha, const void* a,
|
1038
|
-
const int lda, void* b, const int ldb) = {
|
1039
|
-
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
1040
|
-
nm::math::cblas_trmm<float>,
|
1041
|
-
nm::math::cblas_trmm<double>,
|
1042
|
-
cblas_ctrmm, cblas_ztrmm // call directly, same function signature!
|
1043
|
-
/*
|
1044
|
-
nm::math::cblas_trmm<nm::Rational32>,
|
1045
|
-
nm::math::cblas_trmm<nm::Rational64>,
|
1046
|
-
nm::math::cblas_trmm<nm::Rational128>,
|
1047
|
-
nm::math::cblas_trmm<nm::RubyObject>*/
|
1048
|
-
};
|
1049
|
-
|
1050
|
-
nm::dtype_t dtype = NM_DTYPE(a);
|
1051
|
-
|
1052
|
-
if (!ttable[dtype]) {
|
1053
|
-
rb_raise(nm_eDataTypeError, "this matrix operation not yet defined for non-BLAS dtypes");
|
1054
|
-
} else {
|
1055
|
-
void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
1056
|
-
rubyval_to_cval(alpha, dtype, pAlpha);
|
1057
|
-
|
1058
|
-
ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
|
1059
|
-
}
|
1060
|
-
|
1061
|
-
return b;
|
1062
|
-
}
|
1063
|
-
|
1064
|
-
|
1065
|
-
static VALUE nm_cblas_syrk(VALUE self,
|
1066
|
-
VALUE order,
|
1067
|
-
VALUE uplo,
|
1068
|
-
VALUE trans,
|
1069
|
-
VALUE n, VALUE k,
|
1070
|
-
VALUE alpha,
|
1071
|
-
VALUE a, VALUE lda,
|
1072
|
-
VALUE beta,
|
1073
|
-
VALUE c, VALUE ldc)
|
1074
|
-
{
|
1075
|
-
static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE,
|
1076
|
-
const int n, const int k, const void* alpha, const void* a,
|
1077
|
-
const int lda, const void* beta, void* c, const int ldc) = {
|
1078
|
-
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
1079
|
-
nm::math::cblas_syrk<float>,
|
1080
|
-
nm::math::cblas_syrk<double>,
|
1081
|
-
cblas_csyrk, cblas_zsyrk// call directly, same function signature!
|
1082
|
-
/*nm::math::cblas_trsm<nm::Rational32>,
|
1083
|
-
nm::math::cblas_trsm<nm::Rational64>,
|
1084
|
-
nm::math::cblas_trsm<nm::Rational128>,
|
1085
|
-
nm::math::cblas_trsm<nm::RubyObject>*/
|
1086
|
-
};
|
1087
|
-
|
1088
|
-
nm::dtype_t dtype = NM_DTYPE(a);
|
1089
|
-
|
1090
|
-
if (!ttable[dtype]) {
|
1091
|
-
rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
1092
|
-
} else {
|
1093
|
-
void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
|
1094
|
-
*pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
1095
|
-
rubyval_to_cval(alpha, dtype, pAlpha);
|
1096
|
-
rubyval_to_cval(beta, dtype, pBeta);
|
1097
|
-
|
1098
|
-
ttable[dtype](blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
|
1099
|
-
}
|
1100
|
-
|
1101
|
-
return Qtrue;
|
1102
|
-
}
|
1103
|
-
|
1104
|
-
|
1105
|
-
static VALUE nm_cblas_herk(VALUE self,
|
1106
|
-
VALUE order,
|
1107
|
-
VALUE uplo,
|
1108
|
-
VALUE trans,
|
1109
|
-
VALUE n, VALUE k,
|
1110
|
-
VALUE alpha,
|
1111
|
-
VALUE a, VALUE lda,
|
1112
|
-
VALUE beta,
|
1113
|
-
VALUE c, VALUE ldc)
|
1114
|
-
{
|
1115
|
-
|
1116
|
-
nm::dtype_t dtype = NM_DTYPE(a);
|
1117
|
-
|
1118
|
-
if (dtype == nm::COMPLEX64) {
|
1119
|
-
cblas_cherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
|
1120
|
-
} else if (dtype == nm::COMPLEX128) {
|
1121
|
-
cblas_zherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
|
1122
|
-
} else
|
1123
|
-
rb_raise(rb_eNotImpError, "this matrix operation undefined for non-complex dtypes");
|
1124
|
-
return Qtrue;
|
1125
|
-
}
|
1126
|
-
|
1127
|
-
|
1128
|
-
/*
|
1129
|
-
* Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
|
1130
|
-
*
|
1131
|
-
* xGESVD computes the singular value decomposition (SVD) of a real
|
1132
|
-
* M-by-N matrix A, optionally computing the left and/or right singular
|
1133
|
-
* vectors. The SVD is written
|
1134
|
-
*
|
1135
|
-
* A = U * SIGMA * transpose(V)
|
1136
|
-
*
|
1137
|
-
* where SIGMA is an M-by-N matrix which is zero except for its
|
1138
|
-
* min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
|
1139
|
-
* V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
|
1140
|
-
* are the singular values of A; they are real and non-negative, and
|
1141
|
-
* are returned in descending order. The first min(m,n) columns of
|
1142
|
-
* U and V are the left and right singular vectors of A.
|
1143
|
-
*
|
1144
|
-
* Note that the routine returns V**T, not V.
|
1145
|
-
*/
|
1146
|
-
static VALUE nm_lapack_gesvd(VALUE self, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lwork) {
|
1147
|
-
static int (*gesvd_table[nm::NUM_DTYPES])(char, char, int, int, void* a, int, void* s, void* u, int, void* vt, int, void* work, int, void* rwork) = {
|
1148
|
-
NULL, NULL, NULL, NULL, NULL, // no integer ops
|
1149
|
-
nm::math::lapack_gesvd<float,float>,
|
1150
|
-
nm::math::lapack_gesvd<double,double>,
|
1151
|
-
nm::math::lapack_gesvd<nm::Complex64,float>,
|
1152
|
-
nm::math::lapack_gesvd<nm::Complex128,double>,
|
1153
|
-
NULL, NULL, NULL, NULL // no rationals or Ruby objects
|
1154
|
-
};
|
1155
|
-
|
1156
|
-
nm::dtype_t dtype = NM_DTYPE(a);
|
1157
|
-
|
1158
|
-
|
1159
|
-
if (!gesvd_table[dtype]) {
|
1160
|
-
rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
|
1161
|
-
return Qfalse;
|
1162
|
-
} else {
|
1163
|
-
int M = FIX2INT(m),
|
1164
|
-
N = FIX2INT(n);
|
1165
|
-
|
1166
|
-
int min_mn = NM_MIN(M,N);
|
1167
|
-
int max_mn = NM_MAX(M,N);
|
1168
|
-
|
1169
|
-
char JOBU = lapack_svd_job_sym(jobu),
|
1170
|
-
JOBVT = lapack_svd_job_sym(jobvt);
|
1171
|
-
|
1172
|
-
// only need rwork for complex matrices
|
1173
|
-
int rwork_size = (dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128) ? 5 * min_mn : 0;
|
1174
|
-
void* rwork = rwork_size > 0 ? NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size) : NULL;
|
1175
|
-
int work_size = FIX2INT(lwork);
|
1176
|
-
|
1177
|
-
// ignore user argument for lwork if it's too small.
|
1178
|
-
work_size = NM_MAX((dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128 ? 2 * min_mn + max_mn : NM_MAX(3*min_mn + max_mn, 5*min_mn)), work_size);
|
1179
|
-
void* work = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);
|
1180
|
-
|
1181
|
-
int info = gesvd_table[dtype](JOBU, JOBVT, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
|
1182
|
-
NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
|
1183
|
-
work, work_size, rwork);
|
1184
|
-
return INT2FIX(info);
|
1185
|
-
}
|
1186
|
-
}
|
1187
|
-
|
1188
|
-
/*
|
1189
|
-
* Function signature conversion for calling CBLAS' gesdd functions as directly as possible.
|
1190
|
-
*
|
1191
|
-
* xGESDD uses a divide-and-conquer strategy to compute the singular value decomposition (SVD) of a real
|
1192
|
-
* M-by-N matrix A, optionally computing the left and/or right singular
|
1193
|
-
* vectors. The SVD is written
|
1194
|
-
*
|
1195
|
-
* A = U * SIGMA * transpose(V)
|
1196
|
-
*
|
1197
|
-
* where SIGMA is an M-by-N matrix which is zero except for its
|
1198
|
-
* min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
|
1199
|
-
* V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
|
1200
|
-
* are the singular values of A; they are real and non-negative, and
|
1201
|
-
* are returned in descending order. The first min(m,n) columns of
|
1202
|
-
* U and V are the left and right singular vectors of A.
|
1203
|
-
*
|
1204
|
-
* Note that the routine returns V**T, not V.
|
1205
|
-
*/
|
1206
|
-
static VALUE nm_lapack_gesdd(VALUE self, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lwork) {
|
1207
|
-
static int (*gesdd_table[nm::NUM_DTYPES])(char, int, int, void* a, int, void* s, void* u, int, void* vt, int, void* work, int, int* iwork, void* rwork) = {
|
1208
|
-
NULL, NULL, NULL, NULL, NULL, // no integer ops
|
1209
|
-
nm::math::lapack_gesdd<float,float>,
|
1210
|
-
nm::math::lapack_gesdd<double,double>,
|
1211
|
-
nm::math::lapack_gesdd<nm::Complex64,float>,
|
1212
|
-
nm::math::lapack_gesdd<nm::Complex128,double>,
|
1213
|
-
NULL, NULL, NULL, NULL // no rationals or Ruby objects
|
1214
|
-
};
|
1215
|
-
|
1216
|
-
nm::dtype_t dtype = NM_DTYPE(a);
|
1217
|
-
|
1218
|
-
if (!gesdd_table[dtype]) {
|
1219
|
-
rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
|
1220
|
-
return Qfalse;
|
1221
|
-
} else {
|
1222
|
-
int M = FIX2INT(m),
|
1223
|
-
N = FIX2INT(n);
|
1224
|
-
|
1225
|
-
int min_mn = NM_MIN(M,N);
|
1226
|
-
int max_mn = NM_MAX(M,N);
|
1227
|
-
|
1228
|
-
char JOBZ = lapack_svd_job_sym(jobz);
|
1229
|
-
|
1230
|
-
// only need rwork for complex matrices
|
1231
|
-
void* rwork = NULL;
|
1232
|
-
|
1233
|
-
int work_size = FIX2INT(lwork); // Make sure we allocate enough work, regardless of the user request.
|
1234
|
-
if (dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128) {
|
1235
|
-
int rwork_size = min_mn * (JOBZ == 'N' ? 5 : NM_MAX(5*min_mn + 7, 2*max_mn + 2*min_mn + 1));
|
1236
|
-
rwork = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size);
|
1237
|
-
|
1238
|
-
if (JOBZ == 'N') work_size = NM_MAX(work_size, 3*min_mn + NM_MAX(max_mn, 6*min_mn));
|
1239
|
-
else if (JOBZ == 'O') work_size = NM_MAX(work_size, 3*min_mn*min_mn + NM_MAX(max_mn, 5*min_mn*min_mn + 4*min_mn));
|
1240
|
-
else work_size = NM_MAX(work_size, 3*min_mn*min_mn + NM_MAX(max_mn, 4*min_mn*min_mn + 4*min_mn));
|
1241
|
-
} else {
|
1242
|
-
if (JOBZ == 'N') work_size = NM_MAX(work_size, 2*min_mn + max_mn);
|
1243
|
-
else if (JOBZ == 'O') work_size = NM_MAX(work_size, 2*min_mn*min_mn + max_mn + 2*min_mn);
|
1244
|
-
else work_size = NM_MAX(work_size, min_mn*min_mn + max_mn + 2*min_mn);
|
1245
|
-
}
|
1246
|
-
void* work = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);
|
1247
|
-
int* iwork = NM_ALLOCA_N(int, 8*min_mn);
|
1248
|
-
|
1249
|
-
int info = gesdd_table[dtype](JOBZ, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
|
1250
|
-
NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
|
1251
|
-
work, work_size, iwork, rwork);
|
1252
|
-
return INT2FIX(info);
|
1253
|
-
}
|
1254
|
-
}
|
1255
|
-
|
1256
|
-
|
1257
|
-
/*
|
1258
|
-
* Function signature conversion for calling CBLAS' geev functions as directly as possible.
|
1259
|
-
*
|
1260
|
-
* GEEV computes for an N-by-N real nonsymmetric matrix A, the
|
1261
|
-
* eigenvalues and, optionally, the left and/or right eigenvectors.
|
1262
|
-
*
|
1263
|
-
* The right eigenvector v(j) of A satisfies
|
1264
|
-
* A * v(j) = lambda(j) * v(j)
|
1265
|
-
* where lambda(j) is its eigenvalue.
|
1266
|
-
*
|
1267
|
-
* The left eigenvector u(j) of A satisfies
|
1268
|
-
* u(j)**H * A = lambda(j) * u(j)**H
|
1269
|
-
* where u(j)**H denotes the conjugate transpose of u(j).
|
1270
|
-
*
|
1271
|
-
* The computed eigenvectors are normalized to have Euclidean norm
|
1272
|
-
* equal to 1 and largest component real.
|
1273
|
-
*/
|
1274
|
-
static VALUE nm_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr, VALUE lwork) {
|
1275
|
-
static int (*geev_table[nm::NUM_DTYPES])(char, char, int, void* a, int, void* w, void* wi, void* vl, int, void* vr, int, void* work, int, void* rwork) = {
|
1276
|
-
NULL, NULL, NULL, NULL, NULL, // no integer ops
|
1277
|
-
nm::math::lapack_geev<float,float>,
|
1278
|
-
nm::math::lapack_geev<double,double>,
|
1279
|
-
nm::math::lapack_geev<nm::Complex64,float>,
|
1280
|
-
nm::math::lapack_geev<nm::Complex128,double>,
|
1281
|
-
NULL, NULL, NULL, NULL // no rationals or Ruby objects
|
1282
|
-
};
|
1283
|
-
|
1284
|
-
nm::dtype_t dtype = NM_DTYPE(a);
|
1285
|
-
|
1286
|
-
|
1287
|
-
if (!geev_table[dtype]) {
|
1288
|
-
rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
|
1289
|
-
return Qfalse;
|
1290
|
-
} else {
|
1291
|
-
int N = FIX2INT(n);
|
1292
|
-
|
1293
|
-
char JOBVL = lapack_evd_job_sym(compute_left),
|
1294
|
-
JOBVR = lapack_evd_job_sym(compute_right);
|
1295
|
-
|
1296
|
-
void* A = NM_STORAGE_DENSE(a)->elements;
|
1297
|
-
void* WR = NM_STORAGE_DENSE(w)->elements;
|
1298
|
-
void* WI = wi == Qnil ? NULL : NM_STORAGE_DENSE(wi)->elements;
|
1299
|
-
void* VL = NM_STORAGE_DENSE(vl)->elements;
|
1300
|
-
void* VR = NM_STORAGE_DENSE(vr)->elements;
|
1301
|
-
|
1302
|
-
// only need rwork for complex matrices (wi == Qnil for complex)
|
1303
|
-
int rwork_size = dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128 ? N * DTYPE_SIZES[dtype] : 0; // 2*N*floattype for complex only, otherwise 0
|
1304
|
-
void* rwork = rwork_size > 0 ? NM_ALLOCA_N(char, rwork_size) : NULL;
|
1305
|
-
int work_size = FIX2INT(lwork);
|
1306
|
-
void* work;
|
1307
|
-
|
1308
|
-
int info;
|
1309
|
-
|
1310
|
-
// if work size is 0 or -1, query.
|
1311
|
-
if (work_size <= 0) {
|
1312
|
-
work_size = -1;
|
1313
|
-
work = NM_ALLOC_N(char, DTYPE_SIZES[dtype]); //2*N * DTYPE_SIZES[dtype]);
|
1314
|
-
info = geev_table[dtype](JOBVL, JOBVR, N, A, FIX2INT(lda), WR, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr), work, work_size, rwork);
|
1315
|
-
work_size = (int)(dtype == nm::COMPLEX64 || dtype == nm::FLOAT32 ? reinterpret_cast<float*>(work)[0] : reinterpret_cast<double*>(work)[0]);
|
1316
|
-
// line above is basically: work_size = (int)(work[0]); // now have new work_size
|
1317
|
-
NM_FREE(work);
|
1318
|
-
if (info == 0)
|
1319
|
-
rb_warn("geev: calculated optimal lwork of %d; to eliminate this message, use a positive value for lwork (at least 2*shape[i])", work_size);
|
1320
|
-
else return INT2FIX(info); // error of some kind on query!
|
1321
|
-
}
|
1322
|
-
|
1323
|
-
// if work size is < 2*N, just set it to 2*N
|
1324
|
-
if (work_size < 2*N) work_size = 2*N;
|
1325
|
-
if (work_size < 3*N && (dtype == nm::FLOAT32 || dtype == nm::FLOAT64)) {
|
1326
|
-
work_size = JOBVL == 'V' || JOBVR == 'V' ? 4*N : 3*N;
|
1327
|
-
}
|
1328
|
-
|
1329
|
-
// Allocate work array for actual run
|
1330
|
-
work = NM_ALLOCA_N(char, work_size * DTYPE_SIZES[dtype]);
|
1331
|
-
|
1332
|
-
// Perform the actual calculation.
|
1333
|
-
info = geev_table[dtype](JOBVL, JOBVR, N, A, FIX2INT(lda), WR, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr), work, work_size, rwork);
|
1334
|
-
|
1335
|
-
return INT2FIX(info);
|
1336
|
-
}
|
1337
|
-
}
|
1338
|
-
|
1339
|
-
|
1340
|
-
static VALUE nm_clapack_lauum(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
|
1341
|
-
static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
|
1342
|
-
/*nm::math::clapack_lauum<uint8_t, false>,
|
1343
|
-
nm::math::clapack_lauum<int8_t, false>,
|
1344
|
-
nm::math::clapack_lauum<int16_t, false>,
|
1345
|
-
nm::math::clapack_lauum<uint32_t, false>,
|
1346
|
-
nm::math::clapack_lauum<uint64_t, false>,*/
|
1347
|
-
NULL, NULL, NULL, NULL, NULL,
|
1348
|
-
nm::math::clapack_lauum<false, float>,
|
1349
|
-
nm::math::clapack_lauum<false, double>,
|
1350
|
-
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
|
1351
|
-
clapack_clauum, clapack_zlauum, // call directly, same function signature!
|
1352
|
-
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
1353
|
-
nm::math::clapack_lauum<true, nm::Complex64>,
|
1354
|
-
nm::math::clapack_lauum<true, nm::Complex128>,
|
1355
|
-
#endif
|
1356
|
-
/*
|
1357
|
-
nm::math::clapack_lauum<nm::Rational32, false>,
|
1358
|
-
nm::math::clapack_lauum<nm::Rational64, false>,
|
1359
|
-
nm::math::clapack_lauum<nm::Rational128, false>,
|
1360
|
-
nm::math::clapack_lauum<nm::RubyObject, false>
|
1361
|
-
|
1362
|
-
*/
|
1363
|
-
};
|
1364
|
-
|
1365
|
-
if (!ttable[NM_DTYPE(a)]) {
|
1366
|
-
rb_raise(rb_eNotImpError, "does not yet work for non-BLAS dtypes (needs herk, syrk, trmm)");
|
1367
|
-
} else {
|
1368
|
-
// Call either our version of lauum or the LAPACK version.
|
1369
|
-
ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
|
1370
|
-
}
|
1371
|
-
|
1372
|
-
return a;
|
1373
|
-
}
|
1374
|
-
|
1375
|
-
|
1376
928
|
/* Call any of the clapack_xgetrf functions as directly as possible.
|
1377
929
|
*
|
1378
930
|
* The clapack_getrf functions (dgetrf, sgetrf, cgetrf, and zgetrf) compute an LU factorization of a general M-by-N
|
@@ -1402,15 +954,8 @@ static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a
|
|
1402
954
|
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
1403
955
|
nm::math::clapack_getrf<float>,
|
1404
956
|
nm::math::clapack_getrf<double>,
|
1405
|
-
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
|
1406
|
-
clapack_cgetrf, clapack_zgetrf, // call directly, same function signature!
|
1407
|
-
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
1408
957
|
nm::math::clapack_getrf<nm::Complex64>,
|
1409
958
|
nm::math::clapack_getrf<nm::Complex128>,
|
1410
|
-
#endif
|
1411
|
-
nm::math::clapack_getrf<nm::Rational32>,
|
1412
|
-
nm::math::clapack_getrf<nm::Rational64>,
|
1413
|
-
nm::math::clapack_getrf<nm::Rational128>,
|
1414
959
|
nm::math::clapack_getrf<nm::RubyObject>
|
1415
960
|
};
|
1416
961
|
|
@@ -1438,51 +983,6 @@ static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a
|
|
1438
983
|
}
|
1439
984
|
|
1440
985
|
|
1441
|
-
/* Call any of the clapack_xpotrf functions as directly as possible.
|
1442
|
-
*
|
1443
|
-
* You probably don't want to call this function. Instead, why don't you try clapack_potrf, which is more flexible
|
1444
|
-
* with its arguments?
|
1445
|
-
*
|
1446
|
-
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
1447
|
-
* handling, so you can easily crash Ruby!
|
1448
|
-
*
|
1449
|
-
* Returns an array giving the pivot indices (normally these are argument #5).
|
1450
|
-
*/
|
1451
|
-
static VALUE nm_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
|
1452
|
-
#if !defined(HAVE_CLAPACK_H) && !defined(HAVE_ATLAS_CLAPACK_H)
|
1453
|
-
rb_raise(rb_eNotImpError, "potrf currently requires CLAPACK");
|
1454
|
-
#endif
|
1455
|
-
|
1456
|
-
static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
|
1457
|
-
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
1458
|
-
nm::math::clapack_potrf<float>,
|
1459
|
-
nm::math::clapack_potrf<double>,
|
1460
|
-
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
|
1461
|
-
clapack_cpotrf, clapack_zpotrf, // call directly, same function signature!
|
1462
|
-
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
1463
|
-
nm::math::clapack_potrf<nm::Complex64>,
|
1464
|
-
nm::math::clapack_potrf<nm::Complex128>,
|
1465
|
-
#endif
|
1466
|
-
NULL, NULL, NULL, NULL /*
|
1467
|
-
nm::math::clapack_potrf<nm::Rational32>,
|
1468
|
-
nm::math::clapack_potrf<nm::Rational64>,
|
1469
|
-
nm::math::clapack_potrf<nm::Rational128>,
|
1470
|
-
nm::math::clapack_potrf<nm::RubyObject> */
|
1471
|
-
};
|
1472
|
-
|
1473
|
-
if (!ttable[NM_DTYPE(a)]) {
|
1474
|
-
rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
|
1475
|
-
// FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
|
1476
|
-
//rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
1477
|
-
} else {
|
1478
|
-
// Call either our version of potrf or the LAPACK version.
|
1479
|
-
ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
|
1480
|
-
}
|
1481
|
-
|
1482
|
-
return a;
|
1483
|
-
}
|
1484
|
-
|
1485
|
-
|
1486
986
|
/*
|
1487
987
|
* Call any of the clapack_xgetrs functions as directly as possible.
|
1488
988
|
*/
|
@@ -1493,15 +993,8 @@ static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VAL
|
|
1493
993
|
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
1494
994
|
nm::math::clapack_getrs<float>,
|
1495
995
|
nm::math::clapack_getrs<double>,
|
1496
|
-
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
|
1497
|
-
clapack_cgetrs, clapack_zgetrs, // call directly, same function signature!
|
1498
|
-
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
1499
996
|
nm::math::clapack_getrs<nm::Complex64>,
|
1500
997
|
nm::math::clapack_getrs<nm::Complex128>,
|
1501
|
-
#endif
|
1502
|
-
nm::math::clapack_getrs<nm::Rational32>,
|
1503
|
-
nm::math::clapack_getrs<nm::Rational64>,
|
1504
|
-
nm::math::clapack_getrs<nm::Rational128>,
|
1505
998
|
nm::math::clapack_getrs<nm::RubyObject>
|
1506
999
|
};
|
1507
1000
|
|
@@ -1529,157 +1022,14 @@ static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VAL
|
|
1529
1022
|
return b;
|
1530
1023
|
}
|
1531
1024
|
|
1532
|
-
|
1533
|
-
/*
|
1534
|
-
* Call any of the clapack_xpotrs functions as directly as possible.
|
1535
|
-
*/
|
1536
|
-
static VALUE nm_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb) {
|
1537
|
-
static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
|
1538
|
-
const int NRHS, const void* A, const int lda, void* B, const int ldb) = {
|
1539
|
-
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
1540
|
-
nm::math::clapack_potrs<float,false>,
|
1541
|
-
nm::math::clapack_potrs<double,false>,
|
1542
|
-
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
|
1543
|
-
clapack_cpotrs, clapack_zpotrs, // call directly, same function signature!
|
1544
|
-
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
1545
|
-
nm::math::clapack_potrs<nm::Complex64,true>,
|
1546
|
-
nm::math::clapack_potrs<nm::Complex128,true>,
|
1547
|
-
#endif
|
1548
|
-
nm::math::clapack_potrs<nm::Rational32,false>,
|
1549
|
-
nm::math::clapack_potrs<nm::Rational64,false>,
|
1550
|
-
nm::math::clapack_potrs<nm::Rational128,false>,
|
1551
|
-
nm::math::clapack_potrs<nm::RubyObject,false>
|
1552
|
-
};
|
1553
|
-
|
1554
|
-
|
1555
|
-
if (!ttable[NM_DTYPE(a)]) {
|
1556
|
-
rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
1557
|
-
} else {
|
1558
|
-
|
1559
|
-
// Call either our version of potrs or the LAPACK version.
|
1560
|
-
ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
|
1561
|
-
NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
|
1562
|
-
}
|
1563
|
-
|
1564
|
-
// b is both returned and modified directly in the argument list.
|
1565
|
-
return b;
|
1566
|
-
}
|
1567
|
-
|
1568
|
-
|
1569
1025
|
/*
|
1570
1026
|
* Simple way to check from within Ruby code if clapack functions are available, without
|
1571
1027
|
* having to wait around for an exception to be thrown.
|
1572
1028
|
*/
|
1573
1029
|
static VALUE nm_has_clapack(VALUE self) {
|
1574
|
-
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
|
1575
1030
|
return Qfalse;
|
1576
|
-
#else
|
1577
|
-
return Qtrue;
|
1578
|
-
#endif
|
1579
1031
|
}
|
1580
1032
|
|
1581
|
-
|
1582
|
-
/* Call any of the clapack_xgetri functions as directly as possible.
|
1583
|
-
*
|
1584
|
-
* You probably don't want to call this function. Instead, why don't you try clapack_getri, which is more flexible
|
1585
|
-
* with its arguments?
|
1586
|
-
*
|
1587
|
-
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
1588
|
-
* handling, so you can easily crash Ruby!
|
1589
|
-
*
|
1590
|
-
* Returns an array giving the pivot indices (normally these are argument #5).
|
1591
|
-
*/
|
1592
|
-
static VALUE nm_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv) {
|
1593
|
-
#if !defined (HAVE_CLAPACK_H) && !defined (HAVE_ATLAS_CLAPACK_H)
|
1594
|
-
rb_raise(rb_eNotImpError, "getri currently requires CLAPACK");
|
1595
|
-
#endif
|
1596
|
-
|
1597
|
-
static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int n, void* a, const int lda, const int* ipiv) = {
|
1598
|
-
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
1599
|
-
nm::math::clapack_getri<float>,
|
1600
|
-
nm::math::clapack_getri<double>,
|
1601
|
-
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
|
1602
|
-
clapack_cgetri, clapack_zgetri, // call directly, same function signature!
|
1603
|
-
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
1604
|
-
nm::math::clapack_getri<nm::Complex64>,
|
1605
|
-
nm::math::clapack_getri<nm::Complex128>,
|
1606
|
-
#endif
|
1607
|
-
NULL, NULL, NULL, NULL /*
|
1608
|
-
nm::math::clapack_getri<nm::Rational32>,
|
1609
|
-
nm::math::clapack_getri<nm::Rational64>,
|
1610
|
-
nm::math::clapack_getri<nm::Rational128>,
|
1611
|
-
nm::math::clapack_getri<nm::RubyObject> */
|
1612
|
-
};
|
1613
|
-
|
1614
|
-
// Allocate the C version of the pivot index array
|
1615
|
-
int* ipiv_;
|
1616
|
-
if (TYPE(ipiv) != T_ARRAY) {
|
1617
|
-
rb_raise(rb_eArgError, "ipiv must be of type Array");
|
1618
|
-
} else {
|
1619
|
-
ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
|
1620
|
-
for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
|
1621
|
-
ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
|
1622
|
-
}
|
1623
|
-
}
|
1624
|
-
|
1625
|
-
if (!ttable[NM_DTYPE(a)]) {
|
1626
|
-
rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
|
1627
|
-
// FIXME: Once non-BLAS dtypes are implemented, replace error above with the error below.
|
1628
|
-
//rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
1629
|
-
} else {
|
1630
|
-
// Call either our version of getri or the LAPACK version.
|
1631
|
-
ttable[NM_DTYPE(a)](blas_order_sym(order), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv_);
|
1632
|
-
}
|
1633
|
-
|
1634
|
-
return a;
|
1635
|
-
}
|
1636
|
-
|
1637
|
-
|
1638
|
-
/* Call any of the clapack_xpotri functions as directly as possible.
|
1639
|
-
*
|
1640
|
-
* You probably don't want to call this function. Instead, why don't you try clapack_potri, which is more flexible
|
1641
|
-
* with its arguments?
|
1642
|
-
*
|
1643
|
-
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
1644
|
-
* handling, so you can easily crash Ruby!
|
1645
|
-
*
|
1646
|
-
* Returns an array giving the pivot indices (normally these are argument #5).
|
1647
|
-
*/
|
1648
|
-
static VALUE nm_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
|
1649
|
-
#if !defined (HAVE_CLAPACK_H) && !defined (HAVE_ATLAS_CLAPACK_H)
|
1650
|
-
rb_raise(rb_eNotImpError, "getri currently requires CLAPACK");
|
1651
|
-
#endif
|
1652
|
-
|
1653
|
-
static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
|
1654
|
-
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
1655
|
-
nm::math::clapack_potri<float>,
|
1656
|
-
nm::math::clapack_potri<double>,
|
1657
|
-
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
|
1658
|
-
clapack_cpotri, clapack_zpotri, // call directly, same function signature!
|
1659
|
-
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
1660
|
-
nm::math::clapack_potri<nm::Complex64>,
|
1661
|
-
nm::math::clapack_potri<nm::Complex128>,
|
1662
|
-
#endif
|
1663
|
-
NULL, NULL, NULL, NULL /*
|
1664
|
-
nm::math::clapack_getri<nm::Rational32>,
|
1665
|
-
nm::math::clapack_getri<nm::Rational64>,
|
1666
|
-
nm::math::clapack_getri<nm::Rational128>,
|
1667
|
-
nm::math::clapack_getri<nm::RubyObject> */
|
1668
|
-
};
|
1669
|
-
|
1670
|
-
if (!ttable[NM_DTYPE(a)]) {
|
1671
|
-
rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
|
1672
|
-
// FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
|
1673
|
-
//rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
1674
|
-
} else {
|
1675
|
-
// Call either our version of getri or the LAPACK version.
|
1676
|
-
ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
|
1677
|
-
}
|
1678
|
-
|
1679
|
-
return a;
|
1680
|
-
}
|
1681
|
-
|
1682
|
-
|
1683
1033
|
/*
|
1684
1034
|
* Call any of the clapack_xlaswp functions as directly as possible.
|
1685
1035
|
*
|
@@ -1695,15 +1045,8 @@ static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1,
|
|
1695
1045
|
nm::math::clapack_laswp<int64_t>,
|
1696
1046
|
nm::math::clapack_laswp<float>,
|
1697
1047
|
nm::math::clapack_laswp<double>,
|
1698
|
-
//#ifdef HAVE_CLAPACK_H // laswp doesn't actually exist in clapack.h!
|
1699
|
-
// clapack_claswp, clapack_zlaswp, // call directly, same function signature!
|
1700
|
-
//#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
1701
1048
|
nm::math::clapack_laswp<nm::Complex64>,
|
1702
1049
|
nm::math::clapack_laswp<nm::Complex128>,
|
1703
|
-
//#endif
|
1704
|
-
nm::math::clapack_laswp<nm::Rational32>,
|
1705
|
-
nm::math::clapack_laswp<nm::Rational64>,
|
1706
|
-
nm::math::clapack_laswp<nm::Rational128>,
|
1707
1050
|
nm::math::clapack_laswp<nm::RubyObject>
|
1708
1051
|
};
|
1709
1052
|
|
@@ -1735,6 +1078,20 @@ void nm_math_det_exact(const int M, const void* elements, const int lda, nm::dty
|
|
1735
1078
|
ttable[dtype](M, elements, lda, result);
|
1736
1079
|
}
|
1737
1080
|
|
1081
|
+
/*
|
1082
|
+
* C accessor for reducing a matrix to hessenberg form.
|
1083
|
+
*/
|
1084
|
+
void nm_math_hessenberg(VALUE a) {
|
1085
|
+
static void (*ttable[nm::NUM_DTYPES])(const int, void*) = {
|
1086
|
+
NULL, NULL, NULL, NULL, NULL, // does not support ints
|
1087
|
+
nm::math::hessenberg<float>,
|
1088
|
+
nm::math::hessenberg<double>,
|
1089
|
+
NULL, NULL, // does not support Complex
|
1090
|
+
NULL // no support for Ruby Object
|
1091
|
+
};
|
1092
|
+
|
1093
|
+
ttable[NM_DTYPE(a)](NM_SHAPE0(a), NM_STORAGE_DENSE(a)->elements);
|
1094
|
+
}
|
1738
1095
|
/*
|
1739
1096
|
* C accessor for calculating an in-place inverse.
|
1740
1097
|
*/
|