nmatrix 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nmatrix/data/complex.h +20 -55
- data/ext/nmatrix/data/data.cpp +11 -44
- data/ext/nmatrix/data/data.h +174 -311
- data/ext/nmatrix/data/meta.h +1 -7
- data/ext/nmatrix/data/ruby_object.h +3 -85
- data/ext/nmatrix/extconf.rb +2 -73
- data/ext/nmatrix/math.cpp +170 -813
- data/ext/nmatrix/math/asum.h +2 -25
- data/ext/nmatrix/math/{inc.h → cblas_enums.h} +11 -22
- data/ext/nmatrix/math/cblas_templates_core.h +507 -0
- data/ext/nmatrix/math/gemm.h +2 -32
- data/ext/nmatrix/math/gemv.h +1 -35
- data/ext/nmatrix/math/getrf.h +21 -6
- data/ext/nmatrix/math/getrs.h +0 -8
- data/ext/nmatrix/math/imax.h +0 -22
- data/ext/nmatrix/math/long_dtype.h +0 -3
- data/ext/nmatrix/math/math.h +11 -337
- data/ext/nmatrix/math/nrm2.h +2 -23
- data/ext/nmatrix/math/rot.h +1 -25
- data/ext/nmatrix/math/rotg.h +4 -13
- data/ext/nmatrix/math/scal.h +0 -22
- data/ext/nmatrix/math/trsm.h +0 -55
- data/ext/nmatrix/math/util.h +148 -0
- data/ext/nmatrix/nmatrix.cpp +0 -14
- data/ext/nmatrix/nmatrix.h +92 -84
- data/ext/nmatrix/ruby_constants.cpp +0 -2
- data/ext/nmatrix/ruby_constants.h +0 -2
- data/ext/nmatrix/ruby_nmatrix.c +86 -45
- data/ext/nmatrix/storage/dense/dense.cpp +1 -7
- data/ext/nmatrix/storage/storage.h +0 -1
- data/ext/nmatrix/ttable_helper.rb +0 -6
- data/ext/nmatrix/util/io.cpp +1 -1
- data/lib/nmatrix.rb +1 -19
- data/lib/nmatrix/blas.rb +33 -11
- data/lib/nmatrix/io/market.rb +3 -3
- data/lib/nmatrix/lapack_core.rb +181 -0
- data/lib/nmatrix/lapack_plugin.rb +44 -0
- data/lib/nmatrix/math.rb +382 -131
- data/lib/nmatrix/monkeys.rb +2 -3
- data/lib/nmatrix/nmatrix.rb +166 -13
- data/lib/nmatrix/shortcuts.rb +72 -7
- data/lib/nmatrix/version.rb +2 -2
- data/spec/00_nmatrix_spec.rb +154 -5
- data/spec/02_slice_spec.rb +2 -6
- data/spec/03_nmatrix_monkeys_spec.rb +7 -1
- data/spec/blas_spec.rb +60 -33
- data/spec/homogeneous_spec.rb +10 -10
- data/spec/lapack_core_spec.rb +482 -0
- data/spec/math_spec.rb +436 -52
- data/spec/shortcuts_spec.rb +28 -4
- data/spec/spec_helper.rb +14 -2
- data/spec/utm5940.mtx +83844 -0
- metadata +49 -76
- data/.gitignore +0 -27
- data/.rspec +0 -2
- data/.travis.yml +0 -15
- data/CONTRIBUTING.md +0 -82
- data/Gemfile +0 -2
- data/History.txt +0 -677
- data/LICENSE.txt +0 -23
- data/Manifest.txt +0 -92
- data/README.rdoc +0 -150
- data/Rakefile +0 -216
- data/ext/nmatrix/data/rational.h +0 -440
- data/ext/nmatrix/math/geev.h +0 -82
- data/ext/nmatrix/math/ger.h +0 -96
- data/ext/nmatrix/math/gesdd.h +0 -80
- data/ext/nmatrix/math/gesvd.h +0 -78
- data/ext/nmatrix/math/getf2.h +0 -86
- data/ext/nmatrix/math/getri.h +0 -108
- data/ext/nmatrix/math/potrs.h +0 -129
- data/ext/nmatrix/math/swap.h +0 -52
- data/lib/nmatrix/lapack.rb +0 -240
- data/nmatrix.gemspec +0 -55
- data/scripts/mac-brew-gcc.sh +0 -50
- data/scripts/mac-mavericks-brew-gcc.sh +0 -22
- data/spec/lapack_spec.rb +0 -459
data/ext/nmatrix/data/meta.h
CHANGED
@@ -44,9 +44,6 @@ namespace nm {
|
|
44
44
|
template <> struct ctype_to_dtype_enum<double> { static const nm::dtype_t value_type = nm::FLOAT64; };
|
45
45
|
template <> struct ctype_to_dtype_enum<Complex64> { static const nm::dtype_t value_type = nm::COMPLEX64; };
|
46
46
|
template <> struct ctype_to_dtype_enum<Complex128> { static const nm::dtype_t value_type = nm::COMPLEX128; };
|
47
|
-
template <> struct ctype_to_dtype_enum<Rational32> { static const nm::dtype_t value_type = nm::RATIONAL32; };
|
48
|
-
template <> struct ctype_to_dtype_enum<Rational64> { static const nm::dtype_t value_type = nm::RATIONAL64; };
|
49
|
-
template <> struct ctype_to_dtype_enum<Rational128> { static const nm::dtype_t value_type = nm::RATIONAL128; };
|
50
47
|
template <> struct ctype_to_dtype_enum<RubyObject> { static const nm::dtype_t value_type = nm::RUBYOBJ; };
|
51
48
|
|
52
49
|
|
@@ -60,11 +57,8 @@ namespace nm {
|
|
60
57
|
template <> struct dtype_enum_T<nm::FLOAT64> { typedef double type; };
|
61
58
|
template <> struct dtype_enum_T<nm::COMPLEX64> { typedef nm::Complex64 type; };
|
62
59
|
template <> struct dtype_enum_T<nm::COMPLEX128> { typedef nm::Complex128 type; };
|
63
|
-
template <> struct dtype_enum_T<nm::RATIONAL32> { typedef nm::Rational32 type; };
|
64
|
-
template <> struct dtype_enum_T<nm::RATIONAL64> { typedef nm::Rational64 type; };
|
65
|
-
template <> struct dtype_enum_T<nm::RATIONAL128> { typedef nm::Rational128 type; };
|
66
60
|
template <> struct dtype_enum_T<nm::RUBYOBJ> { typedef nm::RubyObject type; };
|
67
61
|
|
68
62
|
} // end namespace nm
|
69
63
|
|
70
|
-
#endif
|
64
|
+
#endif
|
@@ -45,7 +45,7 @@
|
|
45
45
|
/*
|
46
46
|
* Macros
|
47
47
|
*/
|
48
|
-
#define NM_RUBYVAL_IS_NUMERIC(val) (FIXNUM_P(val) or (TYPE(val) == T_FLOAT) or (TYPE(val) == T_COMPLEX)
|
48
|
+
#define NM_RUBYVAL_IS_NUMERIC(val) (FIXNUM_P(val) or (TYPE(val) == T_FLOAT) or (TYPE(val) == T_COMPLEX))
|
49
49
|
#define NMATRIX_CHECK_TYPE(val) \
|
50
50
|
if (TYPE(val) != T_DATA || (RDATA(val)->dfree != (RUBY_DATA_FUNC)nm_delete && RDATA(val)->dfree != (RUBY_DATA_FUNC)nm_delete_ref)) \
|
51
51
|
rb_raise(rb_eTypeError, "Expected NMatrix on left-hand side of operation.");
|
@@ -76,12 +76,6 @@ class RubyObject {
|
|
76
76
|
template <typename FloatType, typename = typename std::enable_if<std::is_floating_point<FloatType>::value>::type>
|
77
77
|
inline RubyObject(const Complex<FloatType>& other) : rval(rb_complex_new(rb_float_new(other.r), rb_float_new(other.i))) {}
|
78
78
|
|
79
|
-
/*
|
80
|
-
* Rational number constructor.
|
81
|
-
*/
|
82
|
-
template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
|
83
|
-
inline RubyObject(const Rational<IntType>& other) : rval(rb_rational_new(INT2FIX(other.n), INT2FIX(other.d))) {}
|
84
|
-
|
85
79
|
/*
|
86
80
|
* Integer constructor.
|
87
81
|
*
|
@@ -123,10 +117,6 @@ class RubyObject {
|
|
123
117
|
inline operator double() const { RETURN_OBJ2NUM(NUM2DBL) }
|
124
118
|
inline operator float() const { RETURN_OBJ2NUM(NUM2DBL) }
|
125
119
|
|
126
|
-
inline operator Rational32() const { return this->to<Rational32>(); }
|
127
|
-
inline operator Rational64() const { return this->to<Rational64>(); }
|
128
|
-
inline operator Rational128() const { return this->to<Rational128>(); }
|
129
|
-
|
130
120
|
inline operator Complex64() const { return this->to<Complex64>(); }
|
131
121
|
inline operator Complex128() const { return this->to<Complex128>(); }
|
132
122
|
/*
|
@@ -255,20 +245,6 @@ class RubyObject {
|
|
255
245
|
return *this != RubyObject(other);
|
256
246
|
}
|
257
247
|
*/
|
258
|
-
//////////////////////////////
|
259
|
-
// RUBY-RATIONAL OPERATIONS //
|
260
|
-
//////////////////////////////
|
261
|
-
|
262
|
-
template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
|
263
|
-
inline bool operator==(const Rational<IntType>& other) const {
|
264
|
-
return *this == RubyObject(other);
|
265
|
-
}
|
266
|
-
|
267
|
-
template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
|
268
|
-
inline bool operator!=(const Rational<IntType>& other) const {
|
269
|
-
return *this != RubyObject(other);
|
270
|
-
}
|
271
|
-
|
272
248
|
//////////////////////////////
|
273
249
|
// RUBY-COMPLEX OPERATIONS //
|
274
250
|
//////////////////////////////
|
@@ -304,7 +280,7 @@ class RubyObject {
|
|
304
280
|
*/
|
305
281
|
template <typename ComplexType>
|
306
282
|
inline typename std::enable_if<made_from_same_template<ComplexType, Complex64>::value, ComplexType>::type to(void) const {
|
307
|
-
if (FIXNUM_P(this->rval) or TYPE(this->rval) == T_FLOAT
|
283
|
+
if (FIXNUM_P(this->rval) or TYPE(this->rval) == T_FLOAT) {
|
308
284
|
return ComplexType(NUM2DBL(this->rval));
|
309
285
|
|
310
286
|
} else if (TYPE(this->rval) == T_COMPLEX) {
|
@@ -314,25 +290,8 @@ class RubyObject {
|
|
314
290
|
rb_raise(rb_eTypeError, "Invalid conversion to Complex type.");
|
315
291
|
}
|
316
292
|
}
|
317
|
-
|
318
|
-
/*
|
319
|
-
* Convert a Ruby object to a rational number.
|
320
|
-
*/
|
321
|
-
template <typename RationalType>
|
322
|
-
inline typename std::enable_if<made_from_same_template<RationalType, Rational32>::value, RationalType>::type to(void) const {
|
323
|
-
if (FIXNUM_P(this->rval) or TYPE(this->rval) == T_FLOAT or TYPE(this->rval) == T_COMPLEX) {
|
324
|
-
return RationalType(NUM2INT(this->rval));
|
325
|
-
|
326
|
-
} else if (TYPE(this->rval) == T_RATIONAL) {
|
327
|
-
return RationalType(NUM2INT(rb_funcall(this->rval, nm_rb_numer, 0)), NUM2INT(rb_funcall(this->rval, nm_rb_denom, 0)));
|
328
|
-
|
329
|
-
} else {
|
330
|
-
rb_raise(rb_eTypeError, "Invalid conversion to Rational type.");
|
331
|
-
}
|
332
|
-
}
|
333
|
-
|
334
293
|
};
|
335
|
-
|
294
|
+
|
336
295
|
// Negative operator
|
337
296
|
inline RubyObject operator-(const RubyObject& rhs) {
|
338
297
|
return RubyObject(rb_funcall(rhs.rval, nm_rb_negate, 0));
|
@@ -413,47 +372,6 @@ inline bool operator>(const Complex<FloatType>& left, const RubyObject& right) {
|
|
413
372
|
return RubyObject(left) > right;
|
414
373
|
}
|
415
374
|
|
416
|
-
|
417
|
-
|
418
|
-
//////////////////////////////
|
419
|
-
// RATIONAL-RUBY OPERATIONS //
|
420
|
-
//////////////////////////////
|
421
|
-
|
422
|
-
template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
|
423
|
-
inline bool operator==(const Rational<IntType>& left, const RubyObject& right) {
|
424
|
-
return RubyObject(left) == right;
|
425
|
-
}
|
426
|
-
|
427
|
-
template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
|
428
|
-
inline bool operator!=(const Rational<IntType>& left, const RubyObject& right) {
|
429
|
-
return RubyObject(left) != right;
|
430
|
-
}
|
431
|
-
|
432
|
-
template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
|
433
|
-
inline bool operator>=(const Rational<IntType>& left, const RubyObject& right) {
|
434
|
-
return RubyObject(left) >= right;
|
435
|
-
}
|
436
|
-
|
437
|
-
template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
|
438
|
-
inline bool operator<=(const Rational<IntType>& left, const RubyObject& right) {
|
439
|
-
return RubyObject(left) <= right;
|
440
|
-
}
|
441
|
-
|
442
|
-
template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
|
443
|
-
inline bool operator<(const Rational<IntType>& left, const RubyObject& right) {
|
444
|
-
return RubyObject(left) < right;
|
445
|
-
}
|
446
|
-
|
447
|
-
template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
|
448
|
-
inline bool operator>(const Rational<IntType>& left, const RubyObject& right) {
|
449
|
-
return RubyObject(left) > right;
|
450
|
-
}
|
451
|
-
|
452
|
-
inline std::ostream& operator<<(std::ostream& out, const RubyObject& rhs) {
|
453
|
-
out << "RUBYOBJECT" << std::flush; // FIXME: Try calling inspect or something on the Ruby object if we really need to debug it.
|
454
|
-
return out;
|
455
|
-
}
|
456
|
-
|
457
375
|
} // end of namespace nm
|
458
376
|
|
459
377
|
namespace std {
|
data/ext/nmatrix/extconf.rb
CHANGED
@@ -155,83 +155,12 @@ else
|
|
155
155
|
puts "g++ reports version... " + `#{CONFIG['CXX']} --version|head -n 1|cut -f 3 -d " "`
|
156
156
|
end
|
157
157
|
|
158
|
-
# add smmp in to get generic transp; remove smmp2 to eliminate funcptr transp
|
159
|
-
|
160
|
-
# The next line allows the user to supply --with-atlas-dir=/usr/local/atlas,
|
161
|
-
# --with-atlas-lib or --with-atlas-include and tell the compiler where to look
|
162
|
-
# for ATLAS. The same for all the others
|
163
|
-
#
|
164
|
-
#dir_config("clapack", ["/usr/local/atlas/include"], [])
|
165
|
-
#
|
166
|
-
#
|
167
|
-
|
168
|
-
# Is g++ having trouble finding your header files?
|
169
|
-
# Try this:
|
170
|
-
# export C_INCLUDE_PATH=/usr/local/atlas/include
|
171
|
-
# export CPLUS_INCLUDE_PATH=/usr/local/atlas/include
|
172
|
-
# (substituting in the path of your cblas.h and clapack.h for the path I used). -- JW 8/27/12
|
173
|
-
|
174
|
-
idefaults = {lapack: ["/usr/include/atlas"],
|
175
|
-
cblas: ["/usr/local/atlas/include", "/usr/include/atlas"],
|
176
|
-
atlas: ["/usr/local/atlas/include", "/usr/include/atlas"]}
|
177
|
-
|
178
|
-
# For some reason, if we try to look for /usr/lib64/atlas on a Mac OS X Mavericks system, and the directory does not
|
179
|
-
# exist, it will give a linker error -- even if the lib dir is already correctly included with -L. So we need to check
|
180
|
-
# that Dir.exists?(d) for each.
|
181
|
-
ldefaults = {lapack: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) },
|
182
|
-
cblas: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) },
|
183
|
-
atlas: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) }}
|
184
|
-
|
185
|
-
if have_library("clapack") # Usually only applies for Mac OS X
|
186
|
-
$libs += " -lclapack "
|
187
|
-
end
|
188
|
-
|
189
|
-
unless have_library("lapack")
|
190
|
-
dir_config("lapack", idefaults[:lapack], ldefaults[:lapack])
|
191
|
-
end
|
192
|
-
|
193
|
-
unless have_library("cblas")
|
194
|
-
dir_config("cblas", idefaults[:cblas], ldefaults[:cblas])
|
195
|
-
end
|
196
|
-
|
197
|
-
unless have_library("atlas")
|
198
|
-
dir_config("atlas", idefaults[:atlas], ldefaults[:atlas])
|
199
|
-
end
|
200
|
-
|
201
|
-
# If BLAS and LAPACK headers are in an atlas directory, prefer those. Otherwise,
|
202
|
-
# we try our luck with the default location.
|
203
|
-
if have_header("atlas/cblas.h")
|
204
|
-
have_header("atlas/clapack.h")
|
205
|
-
else
|
206
|
-
have_header("cblas.h")
|
207
|
-
have_header("clapack.h")
|
208
|
-
end
|
209
|
-
|
210
|
-
|
211
|
-
# Although have_func is supposed to take a list as its second argument, I find that it simply
|
212
|
-
# applies a :to_s to the second arg and doesn't actually check each one. We may want to put
|
213
|
-
# have_func calls inside an :each block which checks atlas/clapack.h, cblas.h, clapack.h, and
|
214
|
-
# lastly lapack.h. On Ubuntu, it only works if I use atlas/clapack.h. --@mohawkjohn 8/20/14
|
215
|
-
have_func("clapack_dgetrf", "atlas/clapack.h")
|
216
|
-
have_func("clapack_dgetri", "atlas/clapack.h")
|
217
|
-
have_func("dgesvd_", "clapack.h") # This may not do anything. dgesvd_ seems to be in LAPACK, not CLAPACK.
|
218
|
-
|
219
|
-
have_func("cblas_dgemm", "cblas.h")
|
220
|
-
|
221
|
-
#have_func("rb_scan_args", "ruby.h")
|
222
|
-
|
223
|
-
#find_library("lapack", "clapack_dgetrf")
|
224
|
-
#find_library("cblas", "cblas_dgemm")
|
225
|
-
#find_library("atlas", "ATL_dgemmNN")
|
226
|
-
# Order matters here: ATLAS has to go after LAPACK: http://mail.scipy.org/pipermail/scipy-user/2007-January/010717.html
|
227
|
-
$libs += " -llapack -lcblas -latlas "
|
228
158
|
#$libs += " -lprofiler "
|
229
159
|
|
230
|
-
|
231
160
|
# For release, these next two should both be changed to -O3.
|
232
|
-
$CFLAGS += " -O3
|
161
|
+
$CFLAGS += " -O3 "
|
233
162
|
#$CFLAGS += " -static -O0 -g "
|
234
|
-
$CPPFLAGS += " -O3 -std=#{$CPP_STANDARD}
|
163
|
+
$CPPFLAGS += " -O3 -std=#{$CPP_STANDARD} " #-fmax-errors=10 -save-temps
|
235
164
|
#$CPPFLAGS += " -static -O0 -g -std=#{$CPP_STANDARD} "
|
236
165
|
|
237
166
|
CONFIG['warnflags'].gsub!('-Wshorten-64-to-32', '') # doesn't work except in Mac-patched gcc (4.2)
|
data/ext/nmatrix/math.cpp
CHANGED
@@ -23,90 +23,103 @@
|
|
23
23
|
//
|
24
24
|
// == math.cpp
|
25
25
|
//
|
26
|
-
// Ruby-exposed
|
26
|
+
// Ruby-exposed CBLAS and LAPACK functions that are available without
|
27
|
+
// an external library.
|
27
28
|
//
|
28
|
-
// === Procedure for adding
|
29
|
+
// === Procedure for adding CBLAS functions to math.cpp/math.h:
|
29
30
|
//
|
30
31
|
// This procedure is written as if for a fictional function with double
|
31
|
-
// version dbacon, which we'll say is from
|
32
|
+
// version dbacon, which we'll say is from CBLAS.
|
32
33
|
//
|
33
34
|
// 1. Write a default templated version which probably returns a boolean.
|
34
35
|
// Call it bacon, and put it in math.h.
|
35
36
|
//
|
36
|
-
// Order will always be row-major, so we don't need to pass that.
|
37
|
-
// CBLAS_TRANSPOSE-type arguments, however, should be passed.
|
38
|
-
//
|
39
|
-
// Otherwise, arguments should look like those in cblas.h or clapack.h:
|
40
|
-
//
|
41
37
|
// template <typename DType>
|
42
38
|
// bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, DType* A, ...) {
|
43
39
|
// rb_raise(rb_eNotImpError, "only implemented for ATLAS types (float32, float64, complex64, complex128)");
|
44
40
|
// }
|
45
41
|
//
|
42
|
+
// Make sure this is in namespace nm::math
|
43
|
+
//
|
46
44
|
// 2. In math.cpp, add a templated inline static version of the function which takes
|
47
|
-
// only void* pointers and uses
|
48
|
-
// proper dtype.
|
45
|
+
// only void* pointers and uses static_cast to convert them to the
|
46
|
+
// proper dtype. This should also be in namespace nm::math
|
49
47
|
//
|
50
48
|
// This function may also need to switch m and n if these arguments are given.
|
51
49
|
//
|
52
50
|
// For an example, see cblas_gemm. This function should do nothing other than cast
|
53
|
-
// appropriately. If
|
51
|
+
// appropriately. If cblas_dbacon, cblas_sbacon, cblas_cbacon, and cblas_zbacon
|
54
52
|
// all take void* only, and no other pointers that vary between functions, you can skip
|
55
53
|
// this particular step -- as we can call them directly using a custom function pointer
|
56
54
|
// array (same function signature!).
|
57
55
|
//
|
58
|
-
// This version of the function will be the one exposed through NMatrix::
|
59
|
-
// want it to be as close to the actual
|
56
|
+
// This version of the function will be the one exposed through NMatrix::BLAS. We
|
57
|
+
// want it to be as close to the actual BLAS version of the function as possible,
|
60
58
|
// and with as few checks as possible.
|
61
59
|
//
|
62
60
|
// You will probably need a forward declaration in the extern "C" block.
|
63
61
|
//
|
64
62
|
// Note: In that case, the function you wrote in Step 1 should also take exactly the
|
65
|
-
// same arguments as
|
63
|
+
// same arguments as cblas_xbacon. Otherwise Bad Things will happen.
|
66
64
|
//
|
67
|
-
// 3. In
|
65
|
+
// 3. In cblas_templates_core.h, add a default template like in step 1 (which will just
|
66
|
+
// call nm::math::bacon()) and also
|
67
|
+
// inline specialized versions of bacon for the different BLAS types.
|
68
|
+
// This will allow both nmatrix-atlas and nmatrix-lapacke to use the optimized version
|
69
|
+
// of bacon from whatever external library is available, as well as the internal version
|
70
|
+
// if an external version is not available. These functions will end up in a namsespace
|
71
|
+
// like nm::math::atlas, but don't explicitly put them in a namespace, they will get
|
72
|
+
// put in the appropriate namespace when cblas_templates_core.h is included.
|
68
73
|
//
|
69
|
-
//
|
70
|
-
//
|
74
|
+
// template <typename DType>
|
75
|
+
// inline bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, DType* A, ...) {
|
76
|
+
// nm::math::bacon(trans, M, N, A, ...);
|
77
|
+
// }
|
71
78
|
//
|
72
79
|
// template <>
|
73
80
|
// inline bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, float* A, ...) {
|
74
|
-
//
|
81
|
+
// cblas_sbacon(trans, M, N, A, ...);
|
75
82
|
// return true;
|
76
83
|
// }
|
77
84
|
//
|
78
|
-
// Make sure these functions are in the namespace nm::math.
|
79
|
-
//
|
80
85
|
// Note that you should do everything in your power here to parse any return values
|
81
|
-
//
|
86
|
+
// cblas_sbacon may give you. We're not trying very hard in this example, but you might
|
82
87
|
// look at getrf to see how it might be done.
|
83
88
|
//
|
84
|
-
// 4.
|
89
|
+
// 4. Write the C function nm_cblas_bacon, which is what Ruby will call. Use the example
|
90
|
+
// of nm_cblas_gemm below. Also you must add a similar function in math_atlas.cpp
|
91
|
+
// and math_lapacke.cpp
|
85
92
|
//
|
86
|
-
//
|
93
|
+
// 5. Expose the function in nm_math_init_blas(), in math.cpp:
|
87
94
|
//
|
88
|
-
//
|
95
|
+
// rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_bacon", (METHOD)nm_cblas_bacon, 5);
|
89
96
|
//
|
90
|
-
//
|
97
|
+
// Do something similar in math_atlas.cpp and math_lapacke.cpp to add the function
|
98
|
+
// to the plugin gems.
|
99
|
+
//
|
100
|
+
// Here, we're telling Ruby that nm_cblas_bacon takes five arguments as a Ruby function.
|
101
|
+
//
|
102
|
+
// 6. In blas.rb, write a bacon function which accesses cblas_bacon, but does all the
|
91
103
|
// sanity checks we left out in step 2.
|
92
104
|
//
|
93
|
-
//
|
105
|
+
// 7. Write tests for NMatrix::BLAS::bacon, confirming that it works for the ATLAS dtypes.
|
94
106
|
//
|
95
|
-
//
|
107
|
+
// 8. After you get it working properly with CBLAS, download dbacon.f from NETLIB, and use
|
96
108
|
// f2c to convert it to C. Clean it up so it's readable. Remove the extra indices -- f2c
|
97
109
|
// inserts a lot of unnecessary stuff.
|
98
110
|
//
|
99
111
|
// Copy and paste the output into the default templated function you wrote in Step 1.
|
100
112
|
// Fix it so it works as a template instead of just for doubles.
|
101
113
|
//
|
102
|
-
//
|
114
|
+
// Because of step 3, this will automatically also work for the nmatrix-atlas
|
115
|
+
// and nmatrix-lapacke implementations.
|
116
|
+
//
|
117
|
+
// 9. Write tests to confirm that it works for all data types.
|
103
118
|
//
|
104
|
-
//
|
119
|
+
// 10. See about adding a Ruby-like interface, such as matrix_matrix_multiply for cblas_gemm,
|
105
120
|
// or matrix_vector_multiply for cblas_gemv. This step is not mandatory.
|
106
121
|
//
|
107
|
-
//
|
108
|
-
|
109
|
-
|
122
|
+
// 11. Pull request!
|
110
123
|
|
111
124
|
/*
|
112
125
|
* Project Includes
|
@@ -117,30 +130,23 @@
|
|
117
130
|
#include <limits>
|
118
131
|
#include <cmath>
|
119
132
|
|
120
|
-
#include "math/
|
133
|
+
#include "math/cblas_enums.h"
|
134
|
+
|
121
135
|
#include "data/data.h"
|
122
|
-
#include "math/gesdd.h"
|
123
|
-
#include "math/gesvd.h"
|
124
|
-
#include "math/geev.h"
|
125
|
-
#include "math/swap.h"
|
126
136
|
#include "math/imax.h"
|
127
137
|
#include "math/scal.h"
|
128
|
-
#include "math/ger.h"
|
129
|
-
#include "math/getf2.h"
|
130
138
|
#include "math/laswp.h"
|
131
139
|
#include "math/trsm.h"
|
132
|
-
#include "math/long_dtype.h" // for gemm.h
|
133
140
|
#include "math/gemm.h"
|
134
141
|
#include "math/gemv.h"
|
135
142
|
#include "math/asum.h"
|
136
143
|
#include "math/nrm2.h"
|
137
144
|
#include "math/getrf.h"
|
138
|
-
#include "math/getri.h"
|
139
145
|
#include "math/getrs.h"
|
140
|
-
#include "math/potrs.h"
|
141
146
|
#include "math/rot.h"
|
142
147
|
#include "math/rotg.h"
|
143
148
|
#include "math/math.h"
|
149
|
+
#include "math/util.h"
|
144
150
|
#include "storage/dense/dense.h"
|
145
151
|
|
146
152
|
#include "nmatrix.h"
|
@@ -151,12 +157,6 @@
|
|
151
157
|
*/
|
152
158
|
|
153
159
|
extern "C" {
|
154
|
-
#if defined HAVE_CLAPACK_H
|
155
|
-
#include <clapack.h>
|
156
|
-
#elif defined HAVE_ATLAS_CLAPACK_H
|
157
|
-
#include <atlas/clapack.h>
|
158
|
-
#endif
|
159
|
-
|
160
160
|
/* BLAS Level 1. */
|
161
161
|
static VALUE nm_cblas_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx);
|
162
162
|
static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx);
|
@@ -166,34 +166,20 @@ extern "C" {
|
|
166
166
|
static VALUE nm_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx);
|
167
167
|
|
168
168
|
/* BLAS Level 2. */
|
169
|
+
static VALUE nm_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
|
170
|
+
VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);
|
171
|
+
|
169
172
|
/* BLAS Level 3. */
|
170
173
|
static VALUE nm_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha,
|
171
174
|
VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc);
|
172
|
-
static VALUE nm_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
|
173
|
-
VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);
|
174
175
|
static VALUE nm_cblas_trsm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
|
175
176
|
VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
|
176
|
-
static VALUE nm_cblas_trmm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
|
177
|
-
VALUE alpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
|
178
|
-
static VALUE nm_cblas_herk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
|
179
|
-
VALUE lda, VALUE beta, VALUE c, VALUE ldc);
|
180
|
-
static VALUE nm_cblas_syrk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
|
181
|
-
VALUE lda, VALUE beta, VALUE c, VALUE ldc);
|
182
177
|
|
183
178
|
/* LAPACK. */
|
184
179
|
static VALUE nm_has_clapack(VALUE self);
|
185
180
|
static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda);
|
186
|
-
static VALUE nm_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
|
187
181
|
static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb);
|
188
|
-
static VALUE nm_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb);
|
189
|
-
static VALUE nm_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv);
|
190
|
-
static VALUE nm_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
|
191
182
|
static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx);
|
192
|
-
static VALUE nm_clapack_lauum(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
|
193
|
-
|
194
|
-
static VALUE nm_lapack_gesvd(VALUE self, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lworkspace_size);
|
195
|
-
static VALUE nm_lapack_gesdd(VALUE self, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lworkspace_size);
|
196
|
-
static VALUE nm_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr, VALUE lwork);
|
197
183
|
} // end of extern "C" block
|
198
184
|
|
199
185
|
////////////////////
|
@@ -230,6 +216,11 @@ namespace nm {
|
|
230
216
|
}
|
231
217
|
}
|
232
218
|
|
219
|
+
//we can't do det_exact on byte, because it will want to return a byte (unsigned), but determinants can be negative, even if all elements of the matrix are positive
|
220
|
+
template <>
|
221
|
+
void det_exact<uint8_t>(const int M, const void* A_elements, const int lda, void* result_arg) {
|
222
|
+
rb_raise(nm_eDataTypeError, "cannot call det_exact on unsigned type");
|
223
|
+
}
|
233
224
|
|
234
225
|
/*
|
235
226
|
* Calculates in-place inverse of A_elements. Uses Gauss-Jordan elimination technique.
|
@@ -308,6 +299,80 @@ namespace nm {
|
|
308
299
|
delete[] col_index;
|
309
300
|
}
|
310
301
|
|
302
|
+
/*
|
303
|
+
* Reduce a square matrix to hessenberg form with householder transforms
|
304
|
+
*
|
305
|
+
* == Arguments
|
306
|
+
*
|
307
|
+
* nrows - The number of rows present in matrix a.
|
308
|
+
* a_elements - Elements of the matrix to be reduced in 1D array form.
|
309
|
+
*
|
310
|
+
* == References
|
311
|
+
*
|
312
|
+
* http://www.mymathlib.com/c_source/matrices/eigen/hessenberg_orthog.c
|
313
|
+
* This code has been included by permission of the author.
|
314
|
+
*/
|
315
|
+
template <typename DType>
|
316
|
+
void hessenberg(const int nrows, void* a_elements) {
|
317
|
+
DType* a = reinterpret_cast<DType*>(a_elements);
|
318
|
+
DType* u = new DType[nrows]; // auxillary storage for the chosen vector
|
319
|
+
DType sum_of_squares, *p_row, *psubdiag, *p_a, scale, innerproduct;
|
320
|
+
int i, k, col;
|
321
|
+
|
322
|
+
// For each column use a Householder transformation to zero all entries
|
323
|
+
// below the subdiagonal.
|
324
|
+
for (psubdiag = a + nrows, col = 0; col < nrows - 2; psubdiag += nrows + 1,
|
325
|
+
col++) {
|
326
|
+
// Calculate the signed square root of the sum of squares of the
|
327
|
+
// elements below the diagonal.
|
328
|
+
|
329
|
+
for (p_a = psubdiag, sum_of_squares = 0.0, i = col + 1; i < nrows;
|
330
|
+
p_a += nrows, i++) {
|
331
|
+
sum_of_squares += *p_a * *p_a;
|
332
|
+
}
|
333
|
+
if (sum_of_squares == 0.0) { continue; }
|
334
|
+
sum_of_squares = std::sqrt(sum_of_squares);
|
335
|
+
|
336
|
+
if ( *psubdiag >= 0.0 ) { sum_of_squares = -sum_of_squares; }
|
337
|
+
|
338
|
+
// Calculate the Householder transformation Q = I - 2uu'/u'u.
|
339
|
+
u[col + 1] = *psubdiag - sum_of_squares;
|
340
|
+
*psubdiag = sum_of_squares;
|
341
|
+
|
342
|
+
for (p_a = psubdiag + nrows, i = col + 2; i < nrows; p_a += nrows, i++) {
|
343
|
+
u[i] = *p_a;
|
344
|
+
*p_a = 0.0;
|
345
|
+
}
|
346
|
+
|
347
|
+
// Premultiply A by Q
|
348
|
+
scale = -1.0 / (sum_of_squares * u[col+1]);
|
349
|
+
for (p_row = psubdiag - col, i = col + 1; i < nrows; i++) {
|
350
|
+
p_a = a + nrows * (col + 1) + i;
|
351
|
+
for (innerproduct = 0.0, k = col + 1; k < nrows; p_a += nrows, k++) {
|
352
|
+
innerproduct += u[k] * *p_a;
|
353
|
+
}
|
354
|
+
innerproduct *= scale;
|
355
|
+
for (p_a = p_row + i, k = col + 1; k < nrows; p_a += nrows, k++) {
|
356
|
+
*p_a -= u[k] * innerproduct;
|
357
|
+
}
|
358
|
+
}
|
359
|
+
|
360
|
+
// Postmultiply QA by Q
|
361
|
+
for (p_row = a, i = 0; i < nrows; p_row += nrows, i++) {
|
362
|
+
for (innerproduct = 0.0, k = col + 1; k < nrows; k++) {
|
363
|
+
innerproduct += u[k] * *(p_row + k);
|
364
|
+
}
|
365
|
+
innerproduct *= scale;
|
366
|
+
|
367
|
+
for (k = col + 1; k < nrows; k++) {
|
368
|
+
*(p_row + k) -= u[k] * innerproduct;
|
369
|
+
}
|
370
|
+
}
|
371
|
+
}
|
372
|
+
|
373
|
+
delete[] u;
|
374
|
+
}
|
375
|
+
|
311
376
|
/*
|
312
377
|
* Calculate the exact inverse for a dense matrix (A [elements]) of size 2 or 3. Places the result in B_elements.
|
313
378
|
*/
|
@@ -318,6 +383,10 @@ namespace nm {
|
|
318
383
|
|
319
384
|
if (M == 2) {
|
320
385
|
DType det = A[0] * A[lda+1] - A[1] * A[lda];
|
386
|
+
if (det == 0) {
|
387
|
+
rb_raise(nm_eNotInvertibleError,
|
388
|
+
"matrix must have non-zero determinant to be invertible (not getting this error does not mean matrix is invertible if you're dealing with floating points)");
|
389
|
+
}
|
321
390
|
B[0] = A[lda+1] / det;
|
322
391
|
B[1] = -A[1] / det;
|
323
392
|
B[ldb] = -A[lda] / det;
|
@@ -328,7 +397,8 @@ namespace nm {
|
|
328
397
|
DType det;
|
329
398
|
det_exact<DType>(M, A_elements, lda, reinterpret_cast<void*>(&det));
|
330
399
|
if (det == 0) {
|
331
|
-
rb_raise(nm_eNotInvertibleError,
|
400
|
+
rb_raise(nm_eNotInvertibleError,
|
401
|
+
"matrix must have non-zero determinant to be invertible (not getting this error does not mean matrix is invertible if you're dealing with floating points)");
|
332
402
|
}
|
333
403
|
|
334
404
|
B[0] = ( A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]) / det; // A = ei - fh
|
@@ -347,22 +417,6 @@ namespace nm {
|
|
347
417
|
}
|
348
418
|
}
|
349
419
|
|
350
|
-
/*
|
351
|
-
* Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
|
352
|
-
*/
|
353
|
-
template <typename DType, typename CType>
|
354
|
-
inline static int lapack_gesvd(char jobu, char jobvt, int m, int n, void* a, int lda, void* s, void* u, int ldu, void* vt, int ldvt, void* work, int lwork, void* rwork) {
|
355
|
-
return gesvd<DType,CType>(jobu, jobvt, m, n, reinterpret_cast<DType*>(a), lda, reinterpret_cast<DType*>(s), reinterpret_cast<DType*>(u), ldu, reinterpret_cast<DType*>(vt), ldvt, reinterpret_cast<DType*>(work), lwork, reinterpret_cast<CType*>(rwork));
|
356
|
-
}
|
357
|
-
|
358
|
-
/*
|
359
|
-
* Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
|
360
|
-
*/
|
361
|
-
template <typename DType, typename CType>
|
362
|
-
inline static int lapack_gesdd(char jobz, int m, int n, void* a, int lda, void* s, void* u, int ldu, void* vt, int ldvt, void* work, int lwork, int* iwork, void* rwork) {
|
363
|
-
return gesdd<DType,CType>(jobz, m, n, reinterpret_cast<DType*>(a), lda, reinterpret_cast<DType*>(s), reinterpret_cast<DType*>(u), ldu, reinterpret_cast<DType*>(vt), ldvt, reinterpret_cast<DType*>(work), lwork, iwork, reinterpret_cast<CType*>(rwork));
|
364
|
-
}
|
365
|
-
|
366
420
|
/*
|
367
421
|
* Function signature conversion for calling CBLAS' gemm functions as directly as possible.
|
368
422
|
*
|
@@ -422,39 +476,6 @@ namespace nm {
|
|
422
476
|
reinterpret_cast<const DType*>(a), lda, reinterpret_cast<DType*>(b), ldb);
|
423
477
|
}
|
424
478
|
|
425
|
-
|
426
|
-
/*
|
427
|
-
* Function signature conversion for calling CBLAS' trmm functions as directly as possible.
|
428
|
-
*
|
429
|
-
* For documentation: http://www.netlib.org/blas/dtrmm.f
|
430
|
-
*/
|
431
|
-
template <typename DType>
|
432
|
-
inline static void cblas_trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
433
|
-
const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const void* alpha,
|
434
|
-
const void* A, const int lda, void* B, const int ldb)
|
435
|
-
{
|
436
|
-
trmm<DType>(order, side, uplo, ta, diag, m, n, reinterpret_cast<const DType*>(alpha),
|
437
|
-
reinterpret_cast<const DType*>(A), lda, reinterpret_cast<DType*>(B), ldb);
|
438
|
-
}
|
439
|
-
|
440
|
-
|
441
|
-
/*
|
442
|
-
* Function signature conversion for calling CBLAS' syrk functions as directly as possible.
|
443
|
-
*
|
444
|
-
* For documentation: http://www.netlib.org/blas/dsyrk.f
|
445
|
-
*/
|
446
|
-
template <typename DType>
|
447
|
-
inline static void cblas_syrk(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const enum CBLAS_TRANSPOSE trans,
|
448
|
-
const int n, const int k, const void* alpha,
|
449
|
-
const void* A, const int lda, const void* beta, void* C, const int ldc)
|
450
|
-
{
|
451
|
-
syrk<DType>(order, uplo, trans, n, k, reinterpret_cast<const DType*>(alpha),
|
452
|
-
reinterpret_cast<const DType*>(A), lda, reinterpret_cast<const DType*>(beta), reinterpret_cast<DType*>(C), ldc);
|
453
|
-
}
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
479
|
}
|
459
480
|
} // end of namespace nm::math
|
460
481
|
|
@@ -466,80 +487,29 @@ extern "C" {
|
|
466
487
|
///////////////////
|
467
488
|
|
468
489
|
void nm_math_init_blas() {
|
469
|
-
|
490
|
+
VALUE cNMatrix_Internal = rb_define_module_under(cNMatrix, "Internal");
|
470
491
|
|
471
492
|
rb_define_singleton_method(cNMatrix, "has_clapack?", (METHOD)nm_has_clapack, 0);
|
472
493
|
|
473
|
-
|
474
|
-
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getrf", (METHOD)nm_clapack_getrf, 5);
|
475
|
-
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potrf", (METHOD)nm_clapack_potrf, 5);
|
476
|
-
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getrs", (METHOD)nm_clapack_getrs, 9);
|
477
|
-
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potrs", (METHOD)nm_clapack_potrs, 8);
|
478
|
-
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getri", (METHOD)nm_clapack_getri, 5);
|
479
|
-
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potri", (METHOD)nm_clapack_potri, 5);
|
480
|
-
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_laswp", (METHOD)nm_clapack_laswp, 7);
|
481
|
-
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_lauum", (METHOD)nm_clapack_lauum, 5);
|
482
|
-
|
483
|
-
/* Non-ATLAS regular LAPACK Functions called via Fortran interface */
|
484
|
-
rb_define_singleton_method(cNMatrix_LAPACK, "lapack_gesvd", (METHOD)nm_lapack_gesvd, 12);
|
485
|
-
rb_define_singleton_method(cNMatrix_LAPACK, "lapack_gesdd", (METHOD)nm_lapack_gesdd, 11);
|
486
|
-
rb_define_singleton_method(cNMatrix_LAPACK, "lapack_geev", (METHOD)nm_lapack_geev, 12);
|
487
|
-
|
488
|
-
cNMatrix_BLAS = rb_define_module_under(cNMatrix, "BLAS");
|
489
|
-
|
490
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_scal", (METHOD)nm_cblas_scal, 4);
|
491
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_nrm2", (METHOD)nm_cblas_nrm2, 3);
|
492
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_asum", (METHOD)nm_cblas_asum, 3);
|
493
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_rot", (METHOD)nm_cblas_rot, 7);
|
494
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_rotg", (METHOD)nm_cblas_rotg, 1);
|
495
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_imax", (METHOD)nm_cblas_imax, 3);
|
496
|
-
|
497
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemm", (METHOD)nm_cblas_gemm, 14);
|
498
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11);
|
499
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_trsm", (METHOD)nm_cblas_trsm, 12);
|
500
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_trmm", (METHOD)nm_cblas_trmm, 12);
|
501
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_syrk", (METHOD)nm_cblas_syrk, 11);
|
502
|
-
rb_define_singleton_method(cNMatrix_BLAS, "cblas_herk", (METHOD)nm_cblas_herk, 11);
|
503
|
-
}
|
504
|
-
|
505
|
-
/*
|
506
|
-
* Interprets lapack jobu and jobvt arguments, for which LAPACK needs character values A, S, O, or N.
|
507
|
-
*
|
508
|
-
* Called by lapack_gesvd -- basically inline. svd stands for singular value decomposition.
|
509
|
-
*/
|
510
|
-
static inline char lapack_svd_job_sym(VALUE op) {
|
511
|
-
if (rb_to_id(op) == rb_intern("all") || rb_to_id(op) == rb_intern("a")) return 'A';
|
512
|
-
else if (rb_to_id(op) == rb_intern("return") || rb_to_id(op) == rb_intern("s")) return 'S';
|
513
|
-
else if (rb_to_id(op) == rb_intern("overwrite") || rb_to_id(op) == rb_intern("o")) return 'O';
|
514
|
-
else if (rb_to_id(op) == rb_intern("none") || rb_to_id(op) == rb_intern("n")) return 'N';
|
515
|
-
else rb_raise(rb_eArgError, "Expected :all, :return, :overwrite, :none (or :a, :s, :o, :n, respectively)");
|
516
|
-
return 'a';
|
517
|
-
}
|
494
|
+
VALUE cNMatrix_Internal_LAPACK = rb_define_module_under(cNMatrix_Internal, "LAPACK");
|
518
495
|
|
496
|
+
/* ATLAS-CLAPACK Functions that are implemented internally */
|
497
|
+
rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_getrf", (METHOD)nm_clapack_getrf, 5);
|
498
|
+
rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_getrs", (METHOD)nm_clapack_getrs, 9);
|
499
|
+
rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_laswp", (METHOD)nm_clapack_laswp, 7);
|
519
500
|
|
520
|
-
|
521
|
-
* Interprets lapack jobvl and jobvr arguments, for which LAPACK needs character values N or V.
|
522
|
-
*
|
523
|
-
* Called by lapack_geev -- basically inline. evd stands for eigenvalue decomposition.
|
524
|
-
*/
|
525
|
-
static inline char lapack_evd_job_sym(VALUE op) {
|
526
|
-
if (op == Qfalse || op == Qnil || rb_to_id(op) == rb_intern("n")) return 'N';
|
527
|
-
else return 'V';
|
528
|
-
}
|
501
|
+
VALUE cNMatrix_Internal_BLAS = rb_define_module_under(cNMatrix_Internal, "BLAS");
|
529
502
|
|
503
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_scal", (METHOD)nm_cblas_scal, 4);
|
504
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_nrm2", (METHOD)nm_cblas_nrm2, 3);
|
505
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_asum", (METHOD)nm_cblas_asum, 3);
|
506
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_rot", (METHOD)nm_cblas_rot, 7);
|
507
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_rotg", (METHOD)nm_cblas_rotg, 1);
|
508
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_imax", (METHOD)nm_cblas_imax, 3);
|
530
509
|
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
* Called by nm_cblas_gemm -- basically inline.
|
535
|
-
*
|
536
|
-
*/
|
537
|
-
static inline enum CBLAS_TRANSPOSE blas_transpose_sym(VALUE op) {
|
538
|
-
if (op == Qfalse || rb_to_id(op) == nm_rb_no_transpose) return CblasNoTrans;
|
539
|
-
else if (rb_to_id(op) == nm_rb_transpose) return CblasTrans;
|
540
|
-
else if (rb_to_id(op) == nm_rb_complex_conjugate) return CblasConjTrans;
|
541
|
-
else rb_raise(rb_eArgError, "Expected false, :transpose, or :complex_conjugate");
|
542
|
-
return CblasNoTrans;
|
510
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_gemm", (METHOD)nm_cblas_gemm, 14);
|
511
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11);
|
512
|
+
rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_trsm", (METHOD)nm_cblas_trsm, 12);
|
543
513
|
}
|
544
514
|
|
545
515
|
/*
|
@@ -572,55 +542,6 @@ static VALUE nm_cblas_scal(VALUE self, VALUE n, VALUE alpha, VALUE vector, VALUE
|
|
572
542
|
return vector;
|
573
543
|
}
|
574
544
|
|
575
|
-
/*
|
576
|
-
* Interprets cblas argument which could be :left or :right
|
577
|
-
*
|
578
|
-
* Called by nm_cblas_trsm -- basically inline
|
579
|
-
*/
|
580
|
-
static inline enum CBLAS_SIDE blas_side_sym(VALUE op) {
|
581
|
-
ID op_id = rb_to_id(op);
|
582
|
-
if (op_id == nm_rb_left) return CblasLeft;
|
583
|
-
if (op_id == nm_rb_right) return CblasRight;
|
584
|
-
rb_raise(rb_eArgError, "Expected :left or :right for side argument");
|
585
|
-
return CblasLeft;
|
586
|
-
}
|
587
|
-
|
588
|
-
/*
|
589
|
-
* Interprets cblas argument which could be :upper or :lower
|
590
|
-
*
|
591
|
-
* Called by nm_cblas_trsm -- basically inline
|
592
|
-
*/
|
593
|
-
static inline enum CBLAS_UPLO blas_uplo_sym(VALUE op) {
|
594
|
-
ID op_id = rb_to_id(op);
|
595
|
-
if (op_id == nm_rb_upper) return CblasUpper;
|
596
|
-
if (op_id == nm_rb_lower) return CblasLower;
|
597
|
-
rb_raise(rb_eArgError, "Expected :upper or :lower for uplo argument");
|
598
|
-
return CblasUpper;
|
599
|
-
}
|
600
|
-
|
601
|
-
|
602
|
-
/*
|
603
|
-
* Interprets cblas argument which could be :unit (true) or :nonunit (false or anything other than true/:unit)
|
604
|
-
*
|
605
|
-
* Called by nm_cblas_trsm -- basically inline
|
606
|
-
*/
|
607
|
-
static inline enum CBLAS_DIAG blas_diag_sym(VALUE op) {
|
608
|
-
if (rb_to_id(op) == nm_rb_unit || op == Qtrue) return CblasUnit;
|
609
|
-
return CblasNonUnit;
|
610
|
-
}
|
611
|
-
|
612
|
-
/*
|
613
|
-
* Interprets cblas argument which could be :row or :col
|
614
|
-
*/
|
615
|
-
static inline enum CBLAS_ORDER blas_order_sym(VALUE op) {
|
616
|
-
if (rb_to_id(op) == rb_intern("row") || rb_to_id(op) == rb_intern("row_major")) return CblasRowMajor;
|
617
|
-
else if (rb_to_id(op) == rb_intern("col") || rb_to_id(op) == rb_intern("col_major") ||
|
618
|
-
rb_to_id(op) == rb_intern("column") || rb_to_id(op) == rb_intern("column_major")) return CblasColMajor;
|
619
|
-
rb_raise(rb_eArgError, "Expected :row or :col for order argument");
|
620
|
-
return CblasRowMajor;
|
621
|
-
}
|
622
|
-
|
623
|
-
|
624
545
|
/*
|
625
546
|
* Call any of the cblas_xrotg functions as directly as possible.
|
626
547
|
*
|
@@ -641,10 +562,6 @@ static inline enum CBLAS_ORDER blas_order_sym(VALUE op) {
|
|
641
562
|
* The outputs [c,s] will be returned in a Ruby Array at the end; the input
|
642
563
|
* NMatrix will also be modified in-place.
|
643
564
|
*
|
644
|
-
* If you provide rationals, be aware that there's a high probability of an
|
645
|
-
* error, since rotg includes a square root -- and most rationals' square roots
|
646
|
-
* are irrational. You're better off converting to Float first.
|
647
|
-
*
|
648
565
|
* This function, like the other cblas_ functions, does minimal type-checking.
|
649
566
|
*/
|
650
567
|
static VALUE nm_cblas_rotg(VALUE self, VALUE ab) {
|
@@ -654,14 +571,13 @@ static VALUE nm_cblas_rotg(VALUE self, VALUE ab) {
|
|
654
571
|
nm::math::cblas_rotg<double>,
|
655
572
|
nm::math::cblas_rotg<nm::Complex64>,
|
656
573
|
nm::math::cblas_rotg<nm::Complex128>,
|
657
|
-
NULL, NULL, NULL, // no rationals
|
658
574
|
NULL //nm::math::cblas_rotg<nm::RubyObject>
|
659
575
|
};
|
660
576
|
|
661
577
|
nm::dtype_t dtype = NM_DTYPE(ab);
|
662
578
|
|
663
579
|
if (!ttable[dtype]) {
|
664
|
-
rb_raise(nm_eDataTypeError, "this operation undefined for integer
|
580
|
+
rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
|
665
581
|
return Qnil;
|
666
582
|
|
667
583
|
} else {
|
@@ -723,9 +639,6 @@ static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VAL
|
|
723
639
|
nm::math::cblas_rot<double,double>,
|
724
640
|
nm::math::cblas_rot<nm::Complex64,float>,
|
725
641
|
nm::math::cblas_rot<nm::Complex128,double>,
|
726
|
-
nm::math::cblas_rot<nm::Rational32,nm::Rational32>,
|
727
|
-
nm::math::cblas_rot<nm::Rational64,nm::Rational64>,
|
728
|
-
nm::math::cblas_rot<nm::Rational128,nm::Rational128>,
|
729
642
|
nm::math::cblas_rot<nm::RubyObject,nm::RubyObject>
|
730
643
|
};
|
731
644
|
|
@@ -792,17 +705,13 @@ static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx) {
|
|
792
705
|
nm::math::cblas_nrm2<float64_t,float64_t>,
|
793
706
|
nm::math::cblas_nrm2<float32_t,nm::Complex64>,
|
794
707
|
nm::math::cblas_nrm2<float64_t,nm::Complex128>,
|
795
|
-
//nm::math::cblas_nrm2<nm::Rational32,nm::Rational32>,
|
796
|
-
//nm::math::cblas_nrm2<nm::Rational64,nm::Rational64>,
|
797
|
-
//nm::math::cblas_nrm2<nm::Rational128,nm::Rational128>,
|
798
|
-
NULL, NULL, NULL,
|
799
708
|
nm::math::cblas_nrm2<nm::RubyObject,nm::RubyObject>
|
800
709
|
};
|
801
710
|
|
802
711
|
nm::dtype_t dtype = NM_DTYPE(x);
|
803
712
|
|
804
713
|
if (!ttable[dtype]) {
|
805
|
-
rb_raise(nm_eDataTypeError, "this operation undefined for integer
|
714
|
+
rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
|
806
715
|
return Qnil;
|
807
716
|
|
808
717
|
} else {
|
@@ -850,9 +759,6 @@ static VALUE nm_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx) {
|
|
850
759
|
nm::math::cblas_asum<float64_t,float64_t>,
|
851
760
|
nm::math::cblas_asum<float32_t,nm::Complex64>,
|
852
761
|
nm::math::cblas_asum<float64_t,nm::Complex128>,
|
853
|
-
nm::math::cblas_asum<nm::Rational32,nm::Rational32>,
|
854
|
-
nm::math::cblas_asum<nm::Rational64,nm::Rational64>,
|
855
|
-
nm::math::cblas_asum<nm::Rational128,nm::Rational128>,
|
856
762
|
nm::math::cblas_asum<nm::RubyObject,nm::RubyObject>
|
857
763
|
};
|
858
764
|
|
@@ -1000,10 +906,8 @@ static VALUE nm_cblas_trsm(VALUE self,
|
|
1000
906
|
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
1001
907
|
nm::math::cblas_trsm<float>,
|
1002
908
|
nm::math::cblas_trsm<double>,
|
1003
|
-
|
1004
|
-
nm::math::cblas_trsm<nm::
|
1005
|
-
nm::math::cblas_trsm<nm::Rational64>,
|
1006
|
-
nm::math::cblas_trsm<nm::Rational128>,
|
909
|
+
nm::math::cblas_trsm<nm::Complex64>,
|
910
|
+
nm::math::cblas_trsm<nm::Complex128>,
|
1007
911
|
nm::math::cblas_trsm<nm::RubyObject>
|
1008
912
|
};
|
1009
913
|
|
@@ -1021,358 +925,6 @@ static VALUE nm_cblas_trsm(VALUE self,
|
|
1021
925
|
return Qtrue;
|
1022
926
|
}
|
1023
927
|
|
1024
|
-
|
1025
|
-
static VALUE nm_cblas_trmm(VALUE self,
|
1026
|
-
VALUE order,
|
1027
|
-
VALUE side, VALUE uplo,
|
1028
|
-
VALUE trans_a, VALUE diag,
|
1029
|
-
VALUE m, VALUE n,
|
1030
|
-
VALUE alpha,
|
1031
|
-
VALUE a, VALUE lda,
|
1032
|
-
VALUE b, VALUE ldb)
|
1033
|
-
{
|
1034
|
-
static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER,
|
1035
|
-
const enum CBLAS_SIDE, const enum CBLAS_UPLO,
|
1036
|
-
const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
|
1037
|
-
const int m, const int n, const void* alpha, const void* a,
|
1038
|
-
const int lda, void* b, const int ldb) = {
|
1039
|
-
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
1040
|
-
nm::math::cblas_trmm<float>,
|
1041
|
-
nm::math::cblas_trmm<double>,
|
1042
|
-
cblas_ctrmm, cblas_ztrmm // call directly, same function signature!
|
1043
|
-
/*
|
1044
|
-
nm::math::cblas_trmm<nm::Rational32>,
|
1045
|
-
nm::math::cblas_trmm<nm::Rational64>,
|
1046
|
-
nm::math::cblas_trmm<nm::Rational128>,
|
1047
|
-
nm::math::cblas_trmm<nm::RubyObject>*/
|
1048
|
-
};
|
1049
|
-
|
1050
|
-
nm::dtype_t dtype = NM_DTYPE(a);
|
1051
|
-
|
1052
|
-
if (!ttable[dtype]) {
|
1053
|
-
rb_raise(nm_eDataTypeError, "this matrix operation not yet defined for non-BLAS dtypes");
|
1054
|
-
} else {
|
1055
|
-
void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
1056
|
-
rubyval_to_cval(alpha, dtype, pAlpha);
|
1057
|
-
|
1058
|
-
ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
|
1059
|
-
}
|
1060
|
-
|
1061
|
-
return b;
|
1062
|
-
}
|
1063
|
-
|
1064
|
-
|
1065
|
-
static VALUE nm_cblas_syrk(VALUE self,
|
1066
|
-
VALUE order,
|
1067
|
-
VALUE uplo,
|
1068
|
-
VALUE trans,
|
1069
|
-
VALUE n, VALUE k,
|
1070
|
-
VALUE alpha,
|
1071
|
-
VALUE a, VALUE lda,
|
1072
|
-
VALUE beta,
|
1073
|
-
VALUE c, VALUE ldc)
|
1074
|
-
{
|
1075
|
-
static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE,
|
1076
|
-
const int n, const int k, const void* alpha, const void* a,
|
1077
|
-
const int lda, const void* beta, void* c, const int ldc) = {
|
1078
|
-
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
1079
|
-
nm::math::cblas_syrk<float>,
|
1080
|
-
nm::math::cblas_syrk<double>,
|
1081
|
-
cblas_csyrk, cblas_zsyrk// call directly, same function signature!
|
1082
|
-
/*nm::math::cblas_trsm<nm::Rational32>,
|
1083
|
-
nm::math::cblas_trsm<nm::Rational64>,
|
1084
|
-
nm::math::cblas_trsm<nm::Rational128>,
|
1085
|
-
nm::math::cblas_trsm<nm::RubyObject>*/
|
1086
|
-
};
|
1087
|
-
|
1088
|
-
nm::dtype_t dtype = NM_DTYPE(a);
|
1089
|
-
|
1090
|
-
if (!ttable[dtype]) {
|
1091
|
-
rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
1092
|
-
} else {
|
1093
|
-
void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
|
1094
|
-
*pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
|
1095
|
-
rubyval_to_cval(alpha, dtype, pAlpha);
|
1096
|
-
rubyval_to_cval(beta, dtype, pBeta);
|
1097
|
-
|
1098
|
-
ttable[dtype](blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
|
1099
|
-
}
|
1100
|
-
|
1101
|
-
return Qtrue;
|
1102
|
-
}
|
1103
|
-
|
1104
|
-
|
1105
|
-
static VALUE nm_cblas_herk(VALUE self,
|
1106
|
-
VALUE order,
|
1107
|
-
VALUE uplo,
|
1108
|
-
VALUE trans,
|
1109
|
-
VALUE n, VALUE k,
|
1110
|
-
VALUE alpha,
|
1111
|
-
VALUE a, VALUE lda,
|
1112
|
-
VALUE beta,
|
1113
|
-
VALUE c, VALUE ldc)
|
1114
|
-
{
|
1115
|
-
|
1116
|
-
nm::dtype_t dtype = NM_DTYPE(a);
|
1117
|
-
|
1118
|
-
if (dtype == nm::COMPLEX64) {
|
1119
|
-
cblas_cherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
|
1120
|
-
} else if (dtype == nm::COMPLEX128) {
|
1121
|
-
cblas_zherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
|
1122
|
-
} else
|
1123
|
-
rb_raise(rb_eNotImpError, "this matrix operation undefined for non-complex dtypes");
|
1124
|
-
return Qtrue;
|
1125
|
-
}
|
1126
|
-
|
1127
|
-
|
1128
|
-
/*
|
1129
|
-
* Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
|
1130
|
-
*
|
1131
|
-
* xGESVD computes the singular value decomposition (SVD) of a real
|
1132
|
-
* M-by-N matrix A, optionally computing the left and/or right singular
|
1133
|
-
* vectors. The SVD is written
|
1134
|
-
*
|
1135
|
-
* A = U * SIGMA * transpose(V)
|
1136
|
-
*
|
1137
|
-
* where SIGMA is an M-by-N matrix which is zero except for its
|
1138
|
-
* min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
|
1139
|
-
* V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
|
1140
|
-
* are the singular values of A; they are real and non-negative, and
|
1141
|
-
* are returned in descending order. The first min(m,n) columns of
|
1142
|
-
* U and V are the left and right singular vectors of A.
|
1143
|
-
*
|
1144
|
-
* Note that the routine returns V**T, not V.
|
1145
|
-
*/
|
1146
|
-
static VALUE nm_lapack_gesvd(VALUE self, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lwork) {
|
1147
|
-
static int (*gesvd_table[nm::NUM_DTYPES])(char, char, int, int, void* a, int, void* s, void* u, int, void* vt, int, void* work, int, void* rwork) = {
|
1148
|
-
NULL, NULL, NULL, NULL, NULL, // no integer ops
|
1149
|
-
nm::math::lapack_gesvd<float,float>,
|
1150
|
-
nm::math::lapack_gesvd<double,double>,
|
1151
|
-
nm::math::lapack_gesvd<nm::Complex64,float>,
|
1152
|
-
nm::math::lapack_gesvd<nm::Complex128,double>,
|
1153
|
-
NULL, NULL, NULL, NULL // no rationals or Ruby objects
|
1154
|
-
};
|
1155
|
-
|
1156
|
-
nm::dtype_t dtype = NM_DTYPE(a);
|
1157
|
-
|
1158
|
-
|
1159
|
-
if (!gesvd_table[dtype]) {
|
1160
|
-
rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
|
1161
|
-
return Qfalse;
|
1162
|
-
} else {
|
1163
|
-
int M = FIX2INT(m),
|
1164
|
-
N = FIX2INT(n);
|
1165
|
-
|
1166
|
-
int min_mn = NM_MIN(M,N);
|
1167
|
-
int max_mn = NM_MAX(M,N);
|
1168
|
-
|
1169
|
-
char JOBU = lapack_svd_job_sym(jobu),
|
1170
|
-
JOBVT = lapack_svd_job_sym(jobvt);
|
1171
|
-
|
1172
|
-
// only need rwork for complex matrices
|
1173
|
-
int rwork_size = (dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128) ? 5 * min_mn : 0;
|
1174
|
-
void* rwork = rwork_size > 0 ? NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size) : NULL;
|
1175
|
-
int work_size = FIX2INT(lwork);
|
1176
|
-
|
1177
|
-
// ignore user argument for lwork if it's too small.
|
1178
|
-
work_size = NM_MAX((dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128 ? 2 * min_mn + max_mn : NM_MAX(3*min_mn + max_mn, 5*min_mn)), work_size);
|
1179
|
-
void* work = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);
|
1180
|
-
|
1181
|
-
int info = gesvd_table[dtype](JOBU, JOBVT, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
|
1182
|
-
NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
|
1183
|
-
work, work_size, rwork);
|
1184
|
-
return INT2FIX(info);
|
1185
|
-
}
|
1186
|
-
}
|
1187
|
-
|
1188
|
-
/*
|
1189
|
-
* Function signature conversion for calling CBLAS' gesdd functions as directly as possible.
|
1190
|
-
*
|
1191
|
-
* xGESDD uses a divide-and-conquer strategy to compute the singular value decomposition (SVD) of a real
|
1192
|
-
* M-by-N matrix A, optionally computing the left and/or right singular
|
1193
|
-
* vectors. The SVD is written
|
1194
|
-
*
|
1195
|
-
* A = U * SIGMA * transpose(V)
|
1196
|
-
*
|
1197
|
-
* where SIGMA is an M-by-N matrix which is zero except for its
|
1198
|
-
* min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
|
1199
|
-
* V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
|
1200
|
-
* are the singular values of A; they are real and non-negative, and
|
1201
|
-
* are returned in descending order. The first min(m,n) columns of
|
1202
|
-
* U and V are the left and right singular vectors of A.
|
1203
|
-
*
|
1204
|
-
* Note that the routine returns V**T, not V.
|
1205
|
-
*/
|
1206
|
-
static VALUE nm_lapack_gesdd(VALUE self, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lwork) {
|
1207
|
-
static int (*gesdd_table[nm::NUM_DTYPES])(char, int, int, void* a, int, void* s, void* u, int, void* vt, int, void* work, int, int* iwork, void* rwork) = {
|
1208
|
-
NULL, NULL, NULL, NULL, NULL, // no integer ops
|
1209
|
-
nm::math::lapack_gesdd<float,float>,
|
1210
|
-
nm::math::lapack_gesdd<double,double>,
|
1211
|
-
nm::math::lapack_gesdd<nm::Complex64,float>,
|
1212
|
-
nm::math::lapack_gesdd<nm::Complex128,double>,
|
1213
|
-
NULL, NULL, NULL, NULL // no rationals or Ruby objects
|
1214
|
-
};
|
1215
|
-
|
1216
|
-
nm::dtype_t dtype = NM_DTYPE(a);
|
1217
|
-
|
1218
|
-
if (!gesdd_table[dtype]) {
|
1219
|
-
rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
|
1220
|
-
return Qfalse;
|
1221
|
-
} else {
|
1222
|
-
int M = FIX2INT(m),
|
1223
|
-
N = FIX2INT(n);
|
1224
|
-
|
1225
|
-
int min_mn = NM_MIN(M,N);
|
1226
|
-
int max_mn = NM_MAX(M,N);
|
1227
|
-
|
1228
|
-
char JOBZ = lapack_svd_job_sym(jobz);
|
1229
|
-
|
1230
|
-
// only need rwork for complex matrices
|
1231
|
-
void* rwork = NULL;
|
1232
|
-
|
1233
|
-
int work_size = FIX2INT(lwork); // Make sure we allocate enough work, regardless of the user request.
|
1234
|
-
if (dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128) {
|
1235
|
-
int rwork_size = min_mn * (JOBZ == 'N' ? 5 : NM_MAX(5*min_mn + 7, 2*max_mn + 2*min_mn + 1));
|
1236
|
-
rwork = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size);
|
1237
|
-
|
1238
|
-
if (JOBZ == 'N') work_size = NM_MAX(work_size, 3*min_mn + NM_MAX(max_mn, 6*min_mn));
|
1239
|
-
else if (JOBZ == 'O') work_size = NM_MAX(work_size, 3*min_mn*min_mn + NM_MAX(max_mn, 5*min_mn*min_mn + 4*min_mn));
|
1240
|
-
else work_size = NM_MAX(work_size, 3*min_mn*min_mn + NM_MAX(max_mn, 4*min_mn*min_mn + 4*min_mn));
|
1241
|
-
} else {
|
1242
|
-
if (JOBZ == 'N') work_size = NM_MAX(work_size, 2*min_mn + max_mn);
|
1243
|
-
else if (JOBZ == 'O') work_size = NM_MAX(work_size, 2*min_mn*min_mn + max_mn + 2*min_mn);
|
1244
|
-
else work_size = NM_MAX(work_size, min_mn*min_mn + max_mn + 2*min_mn);
|
1245
|
-
}
|
1246
|
-
void* work = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);
|
1247
|
-
int* iwork = NM_ALLOCA_N(int, 8*min_mn);
|
1248
|
-
|
1249
|
-
int info = gesdd_table[dtype](JOBZ, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
|
1250
|
-
NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
|
1251
|
-
work, work_size, iwork, rwork);
|
1252
|
-
return INT2FIX(info);
|
1253
|
-
}
|
1254
|
-
}
|
1255
|
-
|
1256
|
-
|
1257
|
-
/*
|
1258
|
-
* Function signature conversion for calling CBLAS' geev functions as directly as possible.
|
1259
|
-
*
|
1260
|
-
* GEEV computes for an N-by-N real nonsymmetric matrix A, the
|
1261
|
-
* eigenvalues and, optionally, the left and/or right eigenvectors.
|
1262
|
-
*
|
1263
|
-
* The right eigenvector v(j) of A satisfies
|
1264
|
-
* A * v(j) = lambda(j) * v(j)
|
1265
|
-
* where lambda(j) is its eigenvalue.
|
1266
|
-
*
|
1267
|
-
* The left eigenvector u(j) of A satisfies
|
1268
|
-
* u(j)**H * A = lambda(j) * u(j)**H
|
1269
|
-
* where u(j)**H denotes the conjugate transpose of u(j).
|
1270
|
-
*
|
1271
|
-
* The computed eigenvectors are normalized to have Euclidean norm
|
1272
|
-
* equal to 1 and largest component real.
|
1273
|
-
*/
|
1274
|
-
static VALUE nm_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr, VALUE lwork) {
|
1275
|
-
static int (*geev_table[nm::NUM_DTYPES])(char, char, int, void* a, int, void* w, void* wi, void* vl, int, void* vr, int, void* work, int, void* rwork) = {
|
1276
|
-
NULL, NULL, NULL, NULL, NULL, // no integer ops
|
1277
|
-
nm::math::lapack_geev<float,float>,
|
1278
|
-
nm::math::lapack_geev<double,double>,
|
1279
|
-
nm::math::lapack_geev<nm::Complex64,float>,
|
1280
|
-
nm::math::lapack_geev<nm::Complex128,double>,
|
1281
|
-
NULL, NULL, NULL, NULL // no rationals or Ruby objects
|
1282
|
-
};
|
1283
|
-
|
1284
|
-
nm::dtype_t dtype = NM_DTYPE(a);
|
1285
|
-
|
1286
|
-
|
1287
|
-
if (!geev_table[dtype]) {
|
1288
|
-
rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
|
1289
|
-
return Qfalse;
|
1290
|
-
} else {
|
1291
|
-
int N = FIX2INT(n);
|
1292
|
-
|
1293
|
-
char JOBVL = lapack_evd_job_sym(compute_left),
|
1294
|
-
JOBVR = lapack_evd_job_sym(compute_right);
|
1295
|
-
|
1296
|
-
void* A = NM_STORAGE_DENSE(a)->elements;
|
1297
|
-
void* WR = NM_STORAGE_DENSE(w)->elements;
|
1298
|
-
void* WI = wi == Qnil ? NULL : NM_STORAGE_DENSE(wi)->elements;
|
1299
|
-
void* VL = NM_STORAGE_DENSE(vl)->elements;
|
1300
|
-
void* VR = NM_STORAGE_DENSE(vr)->elements;
|
1301
|
-
|
1302
|
-
// only need rwork for complex matrices (wi == Qnil for complex)
|
1303
|
-
int rwork_size = dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128 ? N * DTYPE_SIZES[dtype] : 0; // 2*N*floattype for complex only, otherwise 0
|
1304
|
-
void* rwork = rwork_size > 0 ? NM_ALLOCA_N(char, rwork_size) : NULL;
|
1305
|
-
int work_size = FIX2INT(lwork);
|
1306
|
-
void* work;
|
1307
|
-
|
1308
|
-
int info;
|
1309
|
-
|
1310
|
-
// if work size is 0 or -1, query.
|
1311
|
-
if (work_size <= 0) {
|
1312
|
-
work_size = -1;
|
1313
|
-
work = NM_ALLOC_N(char, DTYPE_SIZES[dtype]); //2*N * DTYPE_SIZES[dtype]);
|
1314
|
-
info = geev_table[dtype](JOBVL, JOBVR, N, A, FIX2INT(lda), WR, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr), work, work_size, rwork);
|
1315
|
-
work_size = (int)(dtype == nm::COMPLEX64 || dtype == nm::FLOAT32 ? reinterpret_cast<float*>(work)[0] : reinterpret_cast<double*>(work)[0]);
|
1316
|
-
// line above is basically: work_size = (int)(work[0]); // now have new work_size
|
1317
|
-
NM_FREE(work);
|
1318
|
-
if (info == 0)
|
1319
|
-
rb_warn("geev: calculated optimal lwork of %d; to eliminate this message, use a positive value for lwork (at least 2*shape[i])", work_size);
|
1320
|
-
else return INT2FIX(info); // error of some kind on query!
|
1321
|
-
}
|
1322
|
-
|
1323
|
-
// if work size is < 2*N, just set it to 2*N
|
1324
|
-
if (work_size < 2*N) work_size = 2*N;
|
1325
|
-
if (work_size < 3*N && (dtype == nm::FLOAT32 || dtype == nm::FLOAT64)) {
|
1326
|
-
work_size = JOBVL == 'V' || JOBVR == 'V' ? 4*N : 3*N;
|
1327
|
-
}
|
1328
|
-
|
1329
|
-
// Allocate work array for actual run
|
1330
|
-
work = NM_ALLOCA_N(char, work_size * DTYPE_SIZES[dtype]);
|
1331
|
-
|
1332
|
-
// Perform the actual calculation.
|
1333
|
-
info = geev_table[dtype](JOBVL, JOBVR, N, A, FIX2INT(lda), WR, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr), work, work_size, rwork);
|
1334
|
-
|
1335
|
-
return INT2FIX(info);
|
1336
|
-
}
|
1337
|
-
}
|
1338
|
-
|
1339
|
-
|
1340
|
-
static VALUE nm_clapack_lauum(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
|
1341
|
-
static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
|
1342
|
-
/*nm::math::clapack_lauum<uint8_t, false>,
|
1343
|
-
nm::math::clapack_lauum<int8_t, false>,
|
1344
|
-
nm::math::clapack_lauum<int16_t, false>,
|
1345
|
-
nm::math::clapack_lauum<uint32_t, false>,
|
1346
|
-
nm::math::clapack_lauum<uint64_t, false>,*/
|
1347
|
-
NULL, NULL, NULL, NULL, NULL,
|
1348
|
-
nm::math::clapack_lauum<false, float>,
|
1349
|
-
nm::math::clapack_lauum<false, double>,
|
1350
|
-
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
|
1351
|
-
clapack_clauum, clapack_zlauum, // call directly, same function signature!
|
1352
|
-
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
1353
|
-
nm::math::clapack_lauum<true, nm::Complex64>,
|
1354
|
-
nm::math::clapack_lauum<true, nm::Complex128>,
|
1355
|
-
#endif
|
1356
|
-
/*
|
1357
|
-
nm::math::clapack_lauum<nm::Rational32, false>,
|
1358
|
-
nm::math::clapack_lauum<nm::Rational64, false>,
|
1359
|
-
nm::math::clapack_lauum<nm::Rational128, false>,
|
1360
|
-
nm::math::clapack_lauum<nm::RubyObject, false>
|
1361
|
-
|
1362
|
-
*/
|
1363
|
-
};
|
1364
|
-
|
1365
|
-
if (!ttable[NM_DTYPE(a)]) {
|
1366
|
-
rb_raise(rb_eNotImpError, "does not yet work for non-BLAS dtypes (needs herk, syrk, trmm)");
|
1367
|
-
} else {
|
1368
|
-
// Call either our version of lauum or the LAPACK version.
|
1369
|
-
ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
|
1370
|
-
}
|
1371
|
-
|
1372
|
-
return a;
|
1373
|
-
}
|
1374
|
-
|
1375
|
-
|
1376
928
|
/* Call any of the clapack_xgetrf functions as directly as possible.
|
1377
929
|
*
|
1378
930
|
* The clapack_getrf functions (dgetrf, sgetrf, cgetrf, and zgetrf) compute an LU factorization of a general M-by-N
|
@@ -1402,15 +954,8 @@ static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a
|
|
1402
954
|
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
1403
955
|
nm::math::clapack_getrf<float>,
|
1404
956
|
nm::math::clapack_getrf<double>,
|
1405
|
-
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
|
1406
|
-
clapack_cgetrf, clapack_zgetrf, // call directly, same function signature!
|
1407
|
-
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
1408
957
|
nm::math::clapack_getrf<nm::Complex64>,
|
1409
958
|
nm::math::clapack_getrf<nm::Complex128>,
|
1410
|
-
#endif
|
1411
|
-
nm::math::clapack_getrf<nm::Rational32>,
|
1412
|
-
nm::math::clapack_getrf<nm::Rational64>,
|
1413
|
-
nm::math::clapack_getrf<nm::Rational128>,
|
1414
959
|
nm::math::clapack_getrf<nm::RubyObject>
|
1415
960
|
};
|
1416
961
|
|
@@ -1438,51 +983,6 @@ static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a
|
|
1438
983
|
}
|
1439
984
|
|
1440
985
|
|
1441
|
-
/* Call any of the clapack_xpotrf functions as directly as possible.
|
1442
|
-
*
|
1443
|
-
* You probably don't want to call this function. Instead, why don't you try clapack_potrf, which is more flexible
|
1444
|
-
* with its arguments?
|
1445
|
-
*
|
1446
|
-
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
1447
|
-
* handling, so you can easily crash Ruby!
|
1448
|
-
*
|
1449
|
-
* Returns an array giving the pivot indices (normally these are argument #5).
|
1450
|
-
*/
|
1451
|
-
static VALUE nm_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
|
1452
|
-
#if !defined(HAVE_CLAPACK_H) && !defined(HAVE_ATLAS_CLAPACK_H)
|
1453
|
-
rb_raise(rb_eNotImpError, "potrf currently requires CLAPACK");
|
1454
|
-
#endif
|
1455
|
-
|
1456
|
-
static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
|
1457
|
-
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
1458
|
-
nm::math::clapack_potrf<float>,
|
1459
|
-
nm::math::clapack_potrf<double>,
|
1460
|
-
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
|
1461
|
-
clapack_cpotrf, clapack_zpotrf, // call directly, same function signature!
|
1462
|
-
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
1463
|
-
nm::math::clapack_potrf<nm::Complex64>,
|
1464
|
-
nm::math::clapack_potrf<nm::Complex128>,
|
1465
|
-
#endif
|
1466
|
-
NULL, NULL, NULL, NULL /*
|
1467
|
-
nm::math::clapack_potrf<nm::Rational32>,
|
1468
|
-
nm::math::clapack_potrf<nm::Rational64>,
|
1469
|
-
nm::math::clapack_potrf<nm::Rational128>,
|
1470
|
-
nm::math::clapack_potrf<nm::RubyObject> */
|
1471
|
-
};
|
1472
|
-
|
1473
|
-
if (!ttable[NM_DTYPE(a)]) {
|
1474
|
-
rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
|
1475
|
-
// FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
|
1476
|
-
//rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
1477
|
-
} else {
|
1478
|
-
// Call either our version of potrf or the LAPACK version.
|
1479
|
-
ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
|
1480
|
-
}
|
1481
|
-
|
1482
|
-
return a;
|
1483
|
-
}
|
1484
|
-
|
1485
|
-
|
1486
986
|
/*
|
1487
987
|
* Call any of the clapack_xgetrs functions as directly as possible.
|
1488
988
|
*/
|
@@ -1493,15 +993,8 @@ static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VAL
|
|
1493
993
|
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
1494
994
|
nm::math::clapack_getrs<float>,
|
1495
995
|
nm::math::clapack_getrs<double>,
|
1496
|
-
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
|
1497
|
-
clapack_cgetrs, clapack_zgetrs, // call directly, same function signature!
|
1498
|
-
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
1499
996
|
nm::math::clapack_getrs<nm::Complex64>,
|
1500
997
|
nm::math::clapack_getrs<nm::Complex128>,
|
1501
|
-
#endif
|
1502
|
-
nm::math::clapack_getrs<nm::Rational32>,
|
1503
|
-
nm::math::clapack_getrs<nm::Rational64>,
|
1504
|
-
nm::math::clapack_getrs<nm::Rational128>,
|
1505
998
|
nm::math::clapack_getrs<nm::RubyObject>
|
1506
999
|
};
|
1507
1000
|
|
@@ -1529,157 +1022,14 @@ static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VAL
|
|
1529
1022
|
return b;
|
1530
1023
|
}
|
1531
1024
|
|
1532
|
-
|
1533
|
-
/*
|
1534
|
-
* Call any of the clapack_xpotrs functions as directly as possible.
|
1535
|
-
*/
|
1536
|
-
static VALUE nm_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb) {
|
1537
|
-
static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
|
1538
|
-
const int NRHS, const void* A, const int lda, void* B, const int ldb) = {
|
1539
|
-
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
1540
|
-
nm::math::clapack_potrs<float,false>,
|
1541
|
-
nm::math::clapack_potrs<double,false>,
|
1542
|
-
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
|
1543
|
-
clapack_cpotrs, clapack_zpotrs, // call directly, same function signature!
|
1544
|
-
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
1545
|
-
nm::math::clapack_potrs<nm::Complex64,true>,
|
1546
|
-
nm::math::clapack_potrs<nm::Complex128,true>,
|
1547
|
-
#endif
|
1548
|
-
nm::math::clapack_potrs<nm::Rational32,false>,
|
1549
|
-
nm::math::clapack_potrs<nm::Rational64,false>,
|
1550
|
-
nm::math::clapack_potrs<nm::Rational128,false>,
|
1551
|
-
nm::math::clapack_potrs<nm::RubyObject,false>
|
1552
|
-
};
|
1553
|
-
|
1554
|
-
|
1555
|
-
if (!ttable[NM_DTYPE(a)]) {
|
1556
|
-
rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
1557
|
-
} else {
|
1558
|
-
|
1559
|
-
// Call either our version of potrs or the LAPACK version.
|
1560
|
-
ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
|
1561
|
-
NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
|
1562
|
-
}
|
1563
|
-
|
1564
|
-
// b is both returned and modified directly in the argument list.
|
1565
|
-
return b;
|
1566
|
-
}
|
1567
|
-
|
1568
|
-
|
1569
1025
|
/*
|
1570
1026
|
* Simple way to check from within Ruby code if clapack functions are available, without
|
1571
1027
|
* having to wait around for an exception to be thrown.
|
1572
1028
|
*/
|
1573
1029
|
static VALUE nm_has_clapack(VALUE self) {
|
1574
|
-
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
|
1575
1030
|
return Qfalse;
|
1576
|
-
#else
|
1577
|
-
return Qtrue;
|
1578
|
-
#endif
|
1579
1031
|
}
|
1580
1032
|
|
1581
|
-
|
1582
|
-
/* Call any of the clapack_xgetri functions as directly as possible.
|
1583
|
-
*
|
1584
|
-
* You probably don't want to call this function. Instead, why don't you try clapack_getri, which is more flexible
|
1585
|
-
* with its arguments?
|
1586
|
-
*
|
1587
|
-
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
1588
|
-
* handling, so you can easily crash Ruby!
|
1589
|
-
*
|
1590
|
-
* Returns an array giving the pivot indices (normally these are argument #5).
|
1591
|
-
*/
|
1592
|
-
static VALUE nm_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv) {
|
1593
|
-
#if !defined (HAVE_CLAPACK_H) && !defined (HAVE_ATLAS_CLAPACK_H)
|
1594
|
-
rb_raise(rb_eNotImpError, "getri currently requires CLAPACK");
|
1595
|
-
#endif
|
1596
|
-
|
1597
|
-
static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int n, void* a, const int lda, const int* ipiv) = {
|
1598
|
-
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
1599
|
-
nm::math::clapack_getri<float>,
|
1600
|
-
nm::math::clapack_getri<double>,
|
1601
|
-
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
|
1602
|
-
clapack_cgetri, clapack_zgetri, // call directly, same function signature!
|
1603
|
-
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
1604
|
-
nm::math::clapack_getri<nm::Complex64>,
|
1605
|
-
nm::math::clapack_getri<nm::Complex128>,
|
1606
|
-
#endif
|
1607
|
-
NULL, NULL, NULL, NULL /*
|
1608
|
-
nm::math::clapack_getri<nm::Rational32>,
|
1609
|
-
nm::math::clapack_getri<nm::Rational64>,
|
1610
|
-
nm::math::clapack_getri<nm::Rational128>,
|
1611
|
-
nm::math::clapack_getri<nm::RubyObject> */
|
1612
|
-
};
|
1613
|
-
|
1614
|
-
// Allocate the C version of the pivot index array
|
1615
|
-
int* ipiv_;
|
1616
|
-
if (TYPE(ipiv) != T_ARRAY) {
|
1617
|
-
rb_raise(rb_eArgError, "ipiv must be of type Array");
|
1618
|
-
} else {
|
1619
|
-
ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
|
1620
|
-
for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
|
1621
|
-
ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
|
1622
|
-
}
|
1623
|
-
}
|
1624
|
-
|
1625
|
-
if (!ttable[NM_DTYPE(a)]) {
|
1626
|
-
rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
|
1627
|
-
// FIXME: Once non-BLAS dtypes are implemented, replace error above with the error below.
|
1628
|
-
//rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
1629
|
-
} else {
|
1630
|
-
// Call either our version of getri or the LAPACK version.
|
1631
|
-
ttable[NM_DTYPE(a)](blas_order_sym(order), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv_);
|
1632
|
-
}
|
1633
|
-
|
1634
|
-
return a;
|
1635
|
-
}
|
1636
|
-
|
1637
|
-
|
1638
|
-
/* Call any of the clapack_xpotri functions as directly as possible.
|
1639
|
-
*
|
1640
|
-
* You probably don't want to call this function. Instead, why don't you try clapack_potri, which is more flexible
|
1641
|
-
* with its arguments?
|
1642
|
-
*
|
1643
|
-
* This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
|
1644
|
-
* handling, so you can easily crash Ruby!
|
1645
|
-
*
|
1646
|
-
* Returns an array giving the pivot indices (normally these are argument #5).
|
1647
|
-
*/
|
1648
|
-
static VALUE nm_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
|
1649
|
-
#if !defined (HAVE_CLAPACK_H) && !defined (HAVE_ATLAS_CLAPACK_H)
|
1650
|
-
rb_raise(rb_eNotImpError, "getri currently requires CLAPACK");
|
1651
|
-
#endif
|
1652
|
-
|
1653
|
-
static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
|
1654
|
-
NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
|
1655
|
-
nm::math::clapack_potri<float>,
|
1656
|
-
nm::math::clapack_potri<double>,
|
1657
|
-
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
|
1658
|
-
clapack_cpotri, clapack_zpotri, // call directly, same function signature!
|
1659
|
-
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
1660
|
-
nm::math::clapack_potri<nm::Complex64>,
|
1661
|
-
nm::math::clapack_potri<nm::Complex128>,
|
1662
|
-
#endif
|
1663
|
-
NULL, NULL, NULL, NULL /*
|
1664
|
-
nm::math::clapack_getri<nm::Rational32>,
|
1665
|
-
nm::math::clapack_getri<nm::Rational64>,
|
1666
|
-
nm::math::clapack_getri<nm::Rational128>,
|
1667
|
-
nm::math::clapack_getri<nm::RubyObject> */
|
1668
|
-
};
|
1669
|
-
|
1670
|
-
if (!ttable[NM_DTYPE(a)]) {
|
1671
|
-
rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
|
1672
|
-
// FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
|
1673
|
-
//rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
|
1674
|
-
} else {
|
1675
|
-
// Call either our version of getri or the LAPACK version.
|
1676
|
-
ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
|
1677
|
-
}
|
1678
|
-
|
1679
|
-
return a;
|
1680
|
-
}
|
1681
|
-
|
1682
|
-
|
1683
1033
|
/*
|
1684
1034
|
* Call any of the clapack_xlaswp functions as directly as possible.
|
1685
1035
|
*
|
@@ -1695,15 +1045,8 @@ static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1,
|
|
1695
1045
|
nm::math::clapack_laswp<int64_t>,
|
1696
1046
|
nm::math::clapack_laswp<float>,
|
1697
1047
|
nm::math::clapack_laswp<double>,
|
1698
|
-
//#ifdef HAVE_CLAPACK_H // laswp doesn't actually exist in clapack.h!
|
1699
|
-
// clapack_claswp, clapack_zlaswp, // call directly, same function signature!
|
1700
|
-
//#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
|
1701
1048
|
nm::math::clapack_laswp<nm::Complex64>,
|
1702
1049
|
nm::math::clapack_laswp<nm::Complex128>,
|
1703
|
-
//#endif
|
1704
|
-
nm::math::clapack_laswp<nm::Rational32>,
|
1705
|
-
nm::math::clapack_laswp<nm::Rational64>,
|
1706
|
-
nm::math::clapack_laswp<nm::Rational128>,
|
1707
1050
|
nm::math::clapack_laswp<nm::RubyObject>
|
1708
1051
|
};
|
1709
1052
|
|
@@ -1735,6 +1078,20 @@ void nm_math_det_exact(const int M, const void* elements, const int lda, nm::dty
|
|
1735
1078
|
ttable[dtype](M, elements, lda, result);
|
1736
1079
|
}
|
1737
1080
|
|
1081
|
+
/*
|
1082
|
+
* C accessor for reducing a matrix to hessenberg form.
|
1083
|
+
*/
|
1084
|
+
void nm_math_hessenberg(VALUE a) {
|
1085
|
+
static void (*ttable[nm::NUM_DTYPES])(const int, void*) = {
|
1086
|
+
NULL, NULL, NULL, NULL, NULL, // does not support ints
|
1087
|
+
nm::math::hessenberg<float>,
|
1088
|
+
nm::math::hessenberg<double>,
|
1089
|
+
NULL, NULL, // does not support Complex
|
1090
|
+
NULL // no support for Ruby Object
|
1091
|
+
};
|
1092
|
+
|
1093
|
+
ttable[NM_DTYPE(a)](NM_SHAPE0(a), NM_STORAGE_DENSE(a)->elements);
|
1094
|
+
}
|
1738
1095
|
/*
|
1739
1096
|
* C accessor for calculating an in-place inverse.
|
1740
1097
|
*/
|