nmatrix 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/ext/nmatrix/data/complex.h +20 -55
  3. data/ext/nmatrix/data/data.cpp +11 -44
  4. data/ext/nmatrix/data/data.h +174 -311
  5. data/ext/nmatrix/data/meta.h +1 -7
  6. data/ext/nmatrix/data/ruby_object.h +3 -85
  7. data/ext/nmatrix/extconf.rb +2 -73
  8. data/ext/nmatrix/math.cpp +170 -813
  9. data/ext/nmatrix/math/asum.h +2 -25
  10. data/ext/nmatrix/math/{inc.h → cblas_enums.h} +11 -22
  11. data/ext/nmatrix/math/cblas_templates_core.h +507 -0
  12. data/ext/nmatrix/math/gemm.h +2 -32
  13. data/ext/nmatrix/math/gemv.h +1 -35
  14. data/ext/nmatrix/math/getrf.h +21 -6
  15. data/ext/nmatrix/math/getrs.h +0 -8
  16. data/ext/nmatrix/math/imax.h +0 -22
  17. data/ext/nmatrix/math/long_dtype.h +0 -3
  18. data/ext/nmatrix/math/math.h +11 -337
  19. data/ext/nmatrix/math/nrm2.h +2 -23
  20. data/ext/nmatrix/math/rot.h +1 -25
  21. data/ext/nmatrix/math/rotg.h +4 -13
  22. data/ext/nmatrix/math/scal.h +0 -22
  23. data/ext/nmatrix/math/trsm.h +0 -55
  24. data/ext/nmatrix/math/util.h +148 -0
  25. data/ext/nmatrix/nmatrix.cpp +0 -14
  26. data/ext/nmatrix/nmatrix.h +92 -84
  27. data/ext/nmatrix/ruby_constants.cpp +0 -2
  28. data/ext/nmatrix/ruby_constants.h +0 -2
  29. data/ext/nmatrix/ruby_nmatrix.c +86 -45
  30. data/ext/nmatrix/storage/dense/dense.cpp +1 -7
  31. data/ext/nmatrix/storage/storage.h +0 -1
  32. data/ext/nmatrix/ttable_helper.rb +0 -6
  33. data/ext/nmatrix/util/io.cpp +1 -1
  34. data/lib/nmatrix.rb +1 -19
  35. data/lib/nmatrix/blas.rb +33 -11
  36. data/lib/nmatrix/io/market.rb +3 -3
  37. data/lib/nmatrix/lapack_core.rb +181 -0
  38. data/lib/nmatrix/lapack_plugin.rb +44 -0
  39. data/lib/nmatrix/math.rb +382 -131
  40. data/lib/nmatrix/monkeys.rb +2 -3
  41. data/lib/nmatrix/nmatrix.rb +166 -13
  42. data/lib/nmatrix/shortcuts.rb +72 -7
  43. data/lib/nmatrix/version.rb +2 -2
  44. data/spec/00_nmatrix_spec.rb +154 -5
  45. data/spec/02_slice_spec.rb +2 -6
  46. data/spec/03_nmatrix_monkeys_spec.rb +7 -1
  47. data/spec/blas_spec.rb +60 -33
  48. data/spec/homogeneous_spec.rb +10 -10
  49. data/spec/lapack_core_spec.rb +482 -0
  50. data/spec/math_spec.rb +436 -52
  51. data/spec/shortcuts_spec.rb +28 -4
  52. data/spec/spec_helper.rb +14 -2
  53. data/spec/utm5940.mtx +83844 -0
  54. metadata +49 -76
  55. data/.gitignore +0 -27
  56. data/.rspec +0 -2
  57. data/.travis.yml +0 -15
  58. data/CONTRIBUTING.md +0 -82
  59. data/Gemfile +0 -2
  60. data/History.txt +0 -677
  61. data/LICENSE.txt +0 -23
  62. data/Manifest.txt +0 -92
  63. data/README.rdoc +0 -150
  64. data/Rakefile +0 -216
  65. data/ext/nmatrix/data/rational.h +0 -440
  66. data/ext/nmatrix/math/geev.h +0 -82
  67. data/ext/nmatrix/math/ger.h +0 -96
  68. data/ext/nmatrix/math/gesdd.h +0 -80
  69. data/ext/nmatrix/math/gesvd.h +0 -78
  70. data/ext/nmatrix/math/getf2.h +0 -86
  71. data/ext/nmatrix/math/getri.h +0 -108
  72. data/ext/nmatrix/math/potrs.h +0 -129
  73. data/ext/nmatrix/math/swap.h +0 -52
  74. data/lib/nmatrix/lapack.rb +0 -240
  75. data/nmatrix.gemspec +0 -55
  76. data/scripts/mac-brew-gcc.sh +0 -50
  77. data/scripts/mac-mavericks-brew-gcc.sh +0 -22
  78. data/spec/lapack_spec.rb +0 -459
@@ -44,9 +44,6 @@ namespace nm {
44
44
  template <> struct ctype_to_dtype_enum<double> { static const nm::dtype_t value_type = nm::FLOAT64; };
45
45
  template <> struct ctype_to_dtype_enum<Complex64> { static const nm::dtype_t value_type = nm::COMPLEX64; };
46
46
  template <> struct ctype_to_dtype_enum<Complex128> { static const nm::dtype_t value_type = nm::COMPLEX128; };
47
- template <> struct ctype_to_dtype_enum<Rational32> { static const nm::dtype_t value_type = nm::RATIONAL32; };
48
- template <> struct ctype_to_dtype_enum<Rational64> { static const nm::dtype_t value_type = nm::RATIONAL64; };
49
- template <> struct ctype_to_dtype_enum<Rational128> { static const nm::dtype_t value_type = nm::RATIONAL128; };
50
47
  template <> struct ctype_to_dtype_enum<RubyObject> { static const nm::dtype_t value_type = nm::RUBYOBJ; };
51
48
 
52
49
 
@@ -60,11 +57,8 @@ namespace nm {
60
57
  template <> struct dtype_enum_T<nm::FLOAT64> { typedef double type; };
61
58
  template <> struct dtype_enum_T<nm::COMPLEX64> { typedef nm::Complex64 type; };
62
59
  template <> struct dtype_enum_T<nm::COMPLEX128> { typedef nm::Complex128 type; };
63
- template <> struct dtype_enum_T<nm::RATIONAL32> { typedef nm::Rational32 type; };
64
- template <> struct dtype_enum_T<nm::RATIONAL64> { typedef nm::Rational64 type; };
65
- template <> struct dtype_enum_T<nm::RATIONAL128> { typedef nm::Rational128 type; };
66
60
  template <> struct dtype_enum_T<nm::RUBYOBJ> { typedef nm::RubyObject type; };
67
61
 
68
62
  } // end namespace nm
69
63
 
70
- #endif
64
+ #endif
@@ -45,7 +45,7 @@
45
45
  /*
46
46
  * Macros
47
47
  */
48
- #define NM_RUBYVAL_IS_NUMERIC(val) (FIXNUM_P(val) or (TYPE(val) == T_FLOAT) or (TYPE(val) == T_COMPLEX) or (TYPE(val) == T_RATIONAL))
48
+ #define NM_RUBYVAL_IS_NUMERIC(val) (FIXNUM_P(val) or (TYPE(val) == T_FLOAT) or (TYPE(val) == T_COMPLEX))
49
49
  #define NMATRIX_CHECK_TYPE(val) \
50
50
  if (TYPE(val) != T_DATA || (RDATA(val)->dfree != (RUBY_DATA_FUNC)nm_delete && RDATA(val)->dfree != (RUBY_DATA_FUNC)nm_delete_ref)) \
51
51
  rb_raise(rb_eTypeError, "Expected NMatrix on left-hand side of operation.");
@@ -76,12 +76,6 @@ class RubyObject {
76
76
  template <typename FloatType, typename = typename std::enable_if<std::is_floating_point<FloatType>::value>::type>
77
77
  inline RubyObject(const Complex<FloatType>& other) : rval(rb_complex_new(rb_float_new(other.r), rb_float_new(other.i))) {}
78
78
 
79
- /*
80
- * Rational number constructor.
81
- */
82
- template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
83
- inline RubyObject(const Rational<IntType>& other) : rval(rb_rational_new(INT2FIX(other.n), INT2FIX(other.d))) {}
84
-
85
79
  /*
86
80
  * Integer constructor.
87
81
  *
@@ -123,10 +117,6 @@ class RubyObject {
123
117
  inline operator double() const { RETURN_OBJ2NUM(NUM2DBL) }
124
118
  inline operator float() const { RETURN_OBJ2NUM(NUM2DBL) }
125
119
 
126
- inline operator Rational32() const { return this->to<Rational32>(); }
127
- inline operator Rational64() const { return this->to<Rational64>(); }
128
- inline operator Rational128() const { return this->to<Rational128>(); }
129
-
130
120
  inline operator Complex64() const { return this->to<Complex64>(); }
131
121
  inline operator Complex128() const { return this->to<Complex128>(); }
132
122
  /*
@@ -255,20 +245,6 @@ class RubyObject {
255
245
  return *this != RubyObject(other);
256
246
  }
257
247
  */
258
- //////////////////////////////
259
- // RUBY-RATIONAL OPERATIONS //
260
- //////////////////////////////
261
-
262
- template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
263
- inline bool operator==(const Rational<IntType>& other) const {
264
- return *this == RubyObject(other);
265
- }
266
-
267
- template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
268
- inline bool operator!=(const Rational<IntType>& other) const {
269
- return *this != RubyObject(other);
270
- }
271
-
272
248
  //////////////////////////////
273
249
  // RUBY-COMPLEX OPERATIONS //
274
250
  //////////////////////////////
@@ -304,7 +280,7 @@ class RubyObject {
304
280
  */
305
281
  template <typename ComplexType>
306
282
  inline typename std::enable_if<made_from_same_template<ComplexType, Complex64>::value, ComplexType>::type to(void) const {
307
- if (FIXNUM_P(this->rval) or TYPE(this->rval) == T_FLOAT or TYPE(this->rval) == T_RATIONAL) {
283
+ if (FIXNUM_P(this->rval) or TYPE(this->rval) == T_FLOAT) {
308
284
  return ComplexType(NUM2DBL(this->rval));
309
285
 
310
286
  } else if (TYPE(this->rval) == T_COMPLEX) {
@@ -314,25 +290,8 @@ class RubyObject {
314
290
  rb_raise(rb_eTypeError, "Invalid conversion to Complex type.");
315
291
  }
316
292
  }
317
-
318
- /*
319
- * Convert a Ruby object to a rational number.
320
- */
321
- template <typename RationalType>
322
- inline typename std::enable_if<made_from_same_template<RationalType, Rational32>::value, RationalType>::type to(void) const {
323
- if (FIXNUM_P(this->rval) or TYPE(this->rval) == T_FLOAT or TYPE(this->rval) == T_COMPLEX) {
324
- return RationalType(NUM2INT(this->rval));
325
-
326
- } else if (TYPE(this->rval) == T_RATIONAL) {
327
- return RationalType(NUM2INT(rb_funcall(this->rval, nm_rb_numer, 0)), NUM2INT(rb_funcall(this->rval, nm_rb_denom, 0)));
328
-
329
- } else {
330
- rb_raise(rb_eTypeError, "Invalid conversion to Rational type.");
331
- }
332
- }
333
-
334
293
  };
335
-
294
+
336
295
  // Negative operator
337
296
  inline RubyObject operator-(const RubyObject& rhs) {
338
297
  return RubyObject(rb_funcall(rhs.rval, nm_rb_negate, 0));
@@ -413,47 +372,6 @@ inline bool operator>(const Complex<FloatType>& left, const RubyObject& right) {
413
372
  return RubyObject(left) > right;
414
373
  }
415
374
 
416
-
417
-
418
- //////////////////////////////
419
- // RATIONAL-RUBY OPERATIONS //
420
- //////////////////////////////
421
-
422
- template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
423
- inline bool operator==(const Rational<IntType>& left, const RubyObject& right) {
424
- return RubyObject(left) == right;
425
- }
426
-
427
- template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
428
- inline bool operator!=(const Rational<IntType>& left, const RubyObject& right) {
429
- return RubyObject(left) != right;
430
- }
431
-
432
- template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
433
- inline bool operator>=(const Rational<IntType>& left, const RubyObject& right) {
434
- return RubyObject(left) >= right;
435
- }
436
-
437
- template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
438
- inline bool operator<=(const Rational<IntType>& left, const RubyObject& right) {
439
- return RubyObject(left) <= right;
440
- }
441
-
442
- template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
443
- inline bool operator<(const Rational<IntType>& left, const RubyObject& right) {
444
- return RubyObject(left) < right;
445
- }
446
-
447
- template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
448
- inline bool operator>(const Rational<IntType>& left, const RubyObject& right) {
449
- return RubyObject(left) > right;
450
- }
451
-
452
- inline std::ostream& operator<<(std::ostream& out, const RubyObject& rhs) {
453
- out << "RUBYOBJECT" << std::flush; // FIXME: Try calling inspect or something on the Ruby object if we really need to debug it.
454
- return out;
455
- }
456
-
457
375
  } // end of namespace nm
458
376
 
459
377
  namespace std {
@@ -155,83 +155,12 @@ else
155
155
  puts "g++ reports version... " + `#{CONFIG['CXX']} --version|head -n 1|cut -f 3 -d " "`
156
156
  end
157
157
 
158
- # add smmp in to get generic transp; remove smmp2 to eliminate funcptr transp
159
-
160
- # The next line allows the user to supply --with-atlas-dir=/usr/local/atlas,
161
- # --with-atlas-lib or --with-atlas-include and tell the compiler where to look
162
- # for ATLAS. The same for all the others
163
- #
164
- #dir_config("clapack", ["/usr/local/atlas/include"], [])
165
- #
166
- #
167
-
168
- # Is g++ having trouble finding your header files?
169
- # Try this:
170
- # export C_INCLUDE_PATH=/usr/local/atlas/include
171
- # export CPLUS_INCLUDE_PATH=/usr/local/atlas/include
172
- # (substituting in the path of your cblas.h and clapack.h for the path I used). -- JW 8/27/12
173
-
174
- idefaults = {lapack: ["/usr/include/atlas"],
175
- cblas: ["/usr/local/atlas/include", "/usr/include/atlas"],
176
- atlas: ["/usr/local/atlas/include", "/usr/include/atlas"]}
177
-
178
- # For some reason, if we try to look for /usr/lib64/atlas on a Mac OS X Mavericks system, and the directory does not
179
- # exist, it will give a linker error -- even if the lib dir is already correctly included with -L. So we need to check
180
- # that Dir.exists?(d) for each.
181
- ldefaults = {lapack: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) },
182
- cblas: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) },
183
- atlas: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) }}
184
-
185
- if have_library("clapack") # Usually only applies for Mac OS X
186
- $libs += " -lclapack "
187
- end
188
-
189
- unless have_library("lapack")
190
- dir_config("lapack", idefaults[:lapack], ldefaults[:lapack])
191
- end
192
-
193
- unless have_library("cblas")
194
- dir_config("cblas", idefaults[:cblas], ldefaults[:cblas])
195
- end
196
-
197
- unless have_library("atlas")
198
- dir_config("atlas", idefaults[:atlas], ldefaults[:atlas])
199
- end
200
-
201
- # If BLAS and LAPACK headers are in an atlas directory, prefer those. Otherwise,
202
- # we try our luck with the default location.
203
- if have_header("atlas/cblas.h")
204
- have_header("atlas/clapack.h")
205
- else
206
- have_header("cblas.h")
207
- have_header("clapack.h")
208
- end
209
-
210
-
211
- # Although have_func is supposed to take a list as its second argument, I find that it simply
212
- # applies a :to_s to the second arg and doesn't actually check each one. We may want to put
213
- # have_func calls inside an :each block which checks atlas/clapack.h, cblas.h, clapack.h, and
214
- # lastly lapack.h. On Ubuntu, it only works if I use atlas/clapack.h. --@mohawkjohn 8/20/14
215
- have_func("clapack_dgetrf", "atlas/clapack.h")
216
- have_func("clapack_dgetri", "atlas/clapack.h")
217
- have_func("dgesvd_", "clapack.h") # This may not do anything. dgesvd_ seems to be in LAPACK, not CLAPACK.
218
-
219
- have_func("cblas_dgemm", "cblas.h")
220
-
221
- #have_func("rb_scan_args", "ruby.h")
222
-
223
- #find_library("lapack", "clapack_dgetrf")
224
- #find_library("cblas", "cblas_dgemm")
225
- #find_library("atlas", "ATL_dgemmNN")
226
- # Order matters here: ATLAS has to go after LAPACK: http://mail.scipy.org/pipermail/scipy-user/2007-January/010717.html
227
- $libs += " -llapack -lcblas -latlas "
228
158
  #$libs += " -lprofiler "
229
159
 
230
-
231
160
  # For release, these next two should both be changed to -O3.
232
- $CFLAGS += " -O3" #" -O0 -g "
161
+ $CFLAGS += " -O3 "
233
162
  #$CFLAGS += " -static -O0 -g "
234
- $CPPFLAGS += " -O3 -std=#{$CPP_STANDARD}" #" -O0 -g -std=#{$CPP_STANDARD} " #-fmax-errors=10 -save-temps
163
+ $CPPFLAGS += " -O3 -std=#{$CPP_STANDARD} " #-fmax-errors=10 -save-temps
235
164
  #$CPPFLAGS += " -static -O0 -g -std=#{$CPP_STANDARD} "
236
165
 
237
166
  CONFIG['warnflags'].gsub!('-Wshorten-64-to-32', '') # doesn't work except in Mac-patched gcc (4.2)
@@ -23,90 +23,103 @@
23
23
  //
24
24
  // == math.cpp
25
25
  //
26
- // Ruby-exposed BLAS functions.
26
+ // Ruby-exposed CBLAS and LAPACK functions that are available without
27
+ // an external library.
27
28
  //
28
- // === Procedure for adding LAPACK or CBLAS functions to math.cpp/math.h:
29
+ // === Procedure for adding CBLAS functions to math.cpp/math.h:
29
30
  //
30
31
  // This procedure is written as if for a fictional function with double
31
- // version dbacon, which we'll say is from LAPACK.
32
+ // version dbacon, which we'll say is from CBLAS.
32
33
  //
33
34
  // 1. Write a default templated version which probably returns a boolean.
34
35
  // Call it bacon, and put it in math.h.
35
36
  //
36
- // Order will always be row-major, so we don't need to pass that.
37
- // CBLAS_TRANSPOSE-type arguments, however, should be passed.
38
- //
39
- // Otherwise, arguments should look like those in cblas.h or clapack.h:
40
- //
41
37
  // template <typename DType>
42
38
  // bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, DType* A, ...) {
43
39
  // rb_raise(rb_eNotImpError, "only implemented for ATLAS types (float32, float64, complex64, complex128)");
44
40
  // }
45
41
  //
42
+ // Make sure this is in namespace nm::math
43
+ //
46
44
  // 2. In math.cpp, add a templated inline static version of the function which takes
47
- // only void* pointers and uses reinterpret_cast to convert them to the
48
- // proper dtype.
45
+ // only void* pointers and uses static_cast to convert them to the
46
+ // proper dtype. This should also be in namespace nm::math
49
47
  //
50
48
  // This function may also need to switch m and n if these arguments are given.
51
49
  //
52
50
  // For an example, see cblas_gemm. This function should do nothing other than cast
53
- // appropriately. If clapack_dbacon, clapack_sbacon, clapack_cbacon, and clapack_zbacon
51
+ // appropriately. If cblas_dbacon, cblas_sbacon, cblas_cbacon, and cblas_zbacon
54
52
  // all take void* only, and no other pointers that vary between functions, you can skip
55
53
  // this particular step -- as we can call them directly using a custom function pointer
56
54
  // array (same function signature!).
57
55
  //
58
- // This version of the function will be the one exposed through NMatrix::LAPACK. We
59
- // want it to be as close to the actual LAPACK version of the function as possible,
56
+ // This version of the function will be the one exposed through NMatrix::BLAS. We
57
+ // want it to be as close to the actual BLAS version of the function as possible,
60
58
  // and with as few checks as possible.
61
59
  //
62
60
  // You will probably need a forward declaration in the extern "C" block.
63
61
  //
64
62
  // Note: In that case, the function you wrote in Step 1 should also take exactly the
65
- // same arguments as clapack_xbacon. Otherwise Bad Things will happen.
63
+ // same arguments as cblas_xbacon. Otherwise Bad Things will happen.
66
64
  //
67
- // 3. In math.cpp, add inline specialized versions of bacon for the different ATLAS types.
65
+ // 3. In cblas_templates_core.h, add a default template like in step 1 (which will just
66
+ // call nm::math::bacon()) and also
67
+ // inline specialized versions of bacon for the different BLAS types.
68
+ // This will allow both nmatrix-atlas and nmatrix-lapacke to use the optimized version
69
+ // of bacon from whatever external library is available, as well as the internal version
70
+ // if an external version is not available. These functions will end up in a namsespace
71
+ // like nm::math::atlas, but don't explicitly put them in a namespace, they will get
72
+ // put in the appropriate namespace when cblas_templates_core.h is included.
68
73
  //
69
- // You could do this with a macro, if the arguments are all similar (see #define LAPACK_GETRF).
70
- // Or you may prefer to do it by hand:
74
+ // template <typename DType>
75
+ // inline bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, DType* A, ...) {
76
+ // nm::math::bacon(trans, M, N, A, ...);
77
+ // }
71
78
  //
72
79
  // template <>
73
80
  // inline bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, float* A, ...) {
74
- // clapack_sbacon(trans, M, N, A, ...);
81
+ // cblas_sbacon(trans, M, N, A, ...);
75
82
  // return true;
76
83
  // }
77
84
  //
78
- // Make sure these functions are in the namespace nm::math.
79
- //
80
85
  // Note that you should do everything in your power here to parse any return values
81
- // clapack_sbacon may give you. We're not trying very hard in this example, but you might
86
+ // cblas_sbacon may give you. We're not trying very hard in this example, but you might
82
87
  // look at getrf to see how it might be done.
83
88
  //
84
- // 4. Expose the function in nm_math_init_blas(), in math.cpp:
89
+ // 4. Write the C function nm_cblas_bacon, which is what Ruby will call. Use the example
90
+ // of nm_cblas_gemm below. Also you must add a similar function in math_atlas.cpp
91
+ // and math_lapacke.cpp
85
92
  //
86
- // rb_define_singleton_method(cNMatrix_LAPACK, "clapack_bacon", (METHOD)nm_lapack_bacon, 5);
93
+ // 5. Expose the function in nm_math_init_blas(), in math.cpp:
87
94
  //
88
- // Here, we're telling Ruby that nm_lapack_bacon takes five arguments as a Ruby function.
95
+ // rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_bacon", (METHOD)nm_cblas_bacon, 5);
89
96
  //
90
- // 5. In blas.rb, write a bacon function which accesses clapack_bacon, but does all the
97
+ // Do something similar in math_atlas.cpp and math_lapacke.cpp to add the function
98
+ // to the plugin gems.
99
+ //
100
+ // Here, we're telling Ruby that nm_cblas_bacon takes five arguments as a Ruby function.
101
+ //
102
+ // 6. In blas.rb, write a bacon function which accesses cblas_bacon, but does all the
91
103
  // sanity checks we left out in step 2.
92
104
  //
93
- // 6. Write tests for NMatrix::LAPACK::getrf, confirming that it works for the ATLAS dtypes.
105
+ // 7. Write tests for NMatrix::BLAS::bacon, confirming that it works for the ATLAS dtypes.
94
106
  //
95
- // 7. After you get it working properly with ATLAS, download dbacon.f from NETLIB, and use
107
+ // 8. After you get it working properly with CBLAS, download dbacon.f from NETLIB, and use
96
108
  // f2c to convert it to C. Clean it up so it's readable. Remove the extra indices -- f2c
97
109
  // inserts a lot of unnecessary stuff.
98
110
  //
99
111
  // Copy and paste the output into the default templated function you wrote in Step 1.
100
112
  // Fix it so it works as a template instead of just for doubles.
101
113
  //
102
- // 8. Write tests to confirm that it works for integers, rationals, and Ruby objects.
114
+ // Because of step 3, this will automatically also work for the nmatrix-atlas
115
+ // and nmatrix-lapacke implementations.
116
+ //
117
+ // 9. Write tests to confirm that it works for all data types.
103
118
  //
104
- // 9. See about adding a Ruby-like interface, such as matrix_matrix_multiply for cblas_gemm,
119
+ // 10. See about adding a Ruby-like interface, such as matrix_matrix_multiply for cblas_gemm,
105
120
  // or matrix_vector_multiply for cblas_gemv. This step is not mandatory.
106
121
  //
107
- // 10. Pull request!
108
-
109
-
122
+ // 11. Pull request!
110
123
 
111
124
  /*
112
125
  * Project Includes
@@ -117,30 +130,23 @@
117
130
  #include <limits>
118
131
  #include <cmath>
119
132
 
120
- #include "math/inc.h"
133
+ #include "math/cblas_enums.h"
134
+
121
135
  #include "data/data.h"
122
- #include "math/gesdd.h"
123
- #include "math/gesvd.h"
124
- #include "math/geev.h"
125
- #include "math/swap.h"
126
136
  #include "math/imax.h"
127
137
  #include "math/scal.h"
128
- #include "math/ger.h"
129
- #include "math/getf2.h"
130
138
  #include "math/laswp.h"
131
139
  #include "math/trsm.h"
132
- #include "math/long_dtype.h" // for gemm.h
133
140
  #include "math/gemm.h"
134
141
  #include "math/gemv.h"
135
142
  #include "math/asum.h"
136
143
  #include "math/nrm2.h"
137
144
  #include "math/getrf.h"
138
- #include "math/getri.h"
139
145
  #include "math/getrs.h"
140
- #include "math/potrs.h"
141
146
  #include "math/rot.h"
142
147
  #include "math/rotg.h"
143
148
  #include "math/math.h"
149
+ #include "math/util.h"
144
150
  #include "storage/dense/dense.h"
145
151
 
146
152
  #include "nmatrix.h"
@@ -151,12 +157,6 @@
151
157
  */
152
158
 
153
159
  extern "C" {
154
- #if defined HAVE_CLAPACK_H
155
- #include <clapack.h>
156
- #elif defined HAVE_ATLAS_CLAPACK_H
157
- #include <atlas/clapack.h>
158
- #endif
159
-
160
160
  /* BLAS Level 1. */
161
161
  static VALUE nm_cblas_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx);
162
162
  static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx);
@@ -166,34 +166,20 @@ extern "C" {
166
166
  static VALUE nm_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx);
167
167
 
168
168
  /* BLAS Level 2. */
169
+ static VALUE nm_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
170
+ VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);
171
+
169
172
  /* BLAS Level 3. */
170
173
  static VALUE nm_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha,
171
174
  VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc);
172
- static VALUE nm_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
173
- VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);
174
175
  static VALUE nm_cblas_trsm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
175
176
  VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
176
- static VALUE nm_cblas_trmm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
177
- VALUE alpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
178
- static VALUE nm_cblas_herk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
179
- VALUE lda, VALUE beta, VALUE c, VALUE ldc);
180
- static VALUE nm_cblas_syrk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
181
- VALUE lda, VALUE beta, VALUE c, VALUE ldc);
182
177
 
183
178
  /* LAPACK. */
184
179
  static VALUE nm_has_clapack(VALUE self);
185
180
  static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda);
186
- static VALUE nm_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
187
181
  static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb);
188
- static VALUE nm_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb);
189
- static VALUE nm_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv);
190
- static VALUE nm_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
191
182
  static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx);
192
- static VALUE nm_clapack_lauum(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
193
-
194
- static VALUE nm_lapack_gesvd(VALUE self, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lworkspace_size);
195
- static VALUE nm_lapack_gesdd(VALUE self, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lworkspace_size);
196
- static VALUE nm_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr, VALUE lwork);
197
183
  } // end of extern "C" block
198
184
 
199
185
  ////////////////////
@@ -230,6 +216,11 @@ namespace nm {
230
216
  }
231
217
  }
232
218
 
219
+ //we can't do det_exact on byte, because it will want to return a byte (unsigned), but determinants can be negative, even if all elements of the matrix are positive
220
+ template <>
221
+ void det_exact<uint8_t>(const int M, const void* A_elements, const int lda, void* result_arg) {
222
+ rb_raise(nm_eDataTypeError, "cannot call det_exact on unsigned type");
223
+ }
233
224
 
234
225
  /*
235
226
  * Calculates in-place inverse of A_elements. Uses Gauss-Jordan elimination technique.
@@ -308,6 +299,80 @@ namespace nm {
308
299
  delete[] col_index;
309
300
  }
310
301
 
302
+ /*
303
+ * Reduce a square matrix to hessenberg form with householder transforms
304
+ *
305
+ * == Arguments
306
+ *
307
+ * nrows - The number of rows present in matrix a.
308
+ * a_elements - Elements of the matrix to be reduced in 1D array form.
309
+ *
310
+ * == References
311
+ *
312
+ * http://www.mymathlib.com/c_source/matrices/eigen/hessenberg_orthog.c
313
+ * This code has been included by permission of the author.
314
+ */
315
+ template <typename DType>
316
+ void hessenberg(const int nrows, void* a_elements) {
317
+ DType* a = reinterpret_cast<DType*>(a_elements);
318
+ DType* u = new DType[nrows]; // auxillary storage for the chosen vector
319
+ DType sum_of_squares, *p_row, *psubdiag, *p_a, scale, innerproduct;
320
+ int i, k, col;
321
+
322
+ // For each column use a Householder transformation to zero all entries
323
+ // below the subdiagonal.
324
+ for (psubdiag = a + nrows, col = 0; col < nrows - 2; psubdiag += nrows + 1,
325
+ col++) {
326
+ // Calculate the signed square root of the sum of squares of the
327
+ // elements below the diagonal.
328
+
329
+ for (p_a = psubdiag, sum_of_squares = 0.0, i = col + 1; i < nrows;
330
+ p_a += nrows, i++) {
331
+ sum_of_squares += *p_a * *p_a;
332
+ }
333
+ if (sum_of_squares == 0.0) { continue; }
334
+ sum_of_squares = std::sqrt(sum_of_squares);
335
+
336
+ if ( *psubdiag >= 0.0 ) { sum_of_squares = -sum_of_squares; }
337
+
338
+ // Calculate the Householder transformation Q = I - 2uu'/u'u.
339
+ u[col + 1] = *psubdiag - sum_of_squares;
340
+ *psubdiag = sum_of_squares;
341
+
342
+ for (p_a = psubdiag + nrows, i = col + 2; i < nrows; p_a += nrows, i++) {
343
+ u[i] = *p_a;
344
+ *p_a = 0.0;
345
+ }
346
+
347
+ // Premultiply A by Q
348
+ scale = -1.0 / (sum_of_squares * u[col+1]);
349
+ for (p_row = psubdiag - col, i = col + 1; i < nrows; i++) {
350
+ p_a = a + nrows * (col + 1) + i;
351
+ for (innerproduct = 0.0, k = col + 1; k < nrows; p_a += nrows, k++) {
352
+ innerproduct += u[k] * *p_a;
353
+ }
354
+ innerproduct *= scale;
355
+ for (p_a = p_row + i, k = col + 1; k < nrows; p_a += nrows, k++) {
356
+ *p_a -= u[k] * innerproduct;
357
+ }
358
+ }
359
+
360
+ // Postmultiply QA by Q
361
+ for (p_row = a, i = 0; i < nrows; p_row += nrows, i++) {
362
+ for (innerproduct = 0.0, k = col + 1; k < nrows; k++) {
363
+ innerproduct += u[k] * *(p_row + k);
364
+ }
365
+ innerproduct *= scale;
366
+
367
+ for (k = col + 1; k < nrows; k++) {
368
+ *(p_row + k) -= u[k] * innerproduct;
369
+ }
370
+ }
371
+ }
372
+
373
+ delete[] u;
374
+ }
375
+
311
376
  /*
312
377
  * Calculate the exact inverse for a dense matrix (A [elements]) of size 2 or 3. Places the result in B_elements.
313
378
  */
@@ -318,6 +383,10 @@ namespace nm {
318
383
 
319
384
  if (M == 2) {
320
385
  DType det = A[0] * A[lda+1] - A[1] * A[lda];
386
+ if (det == 0) {
387
+ rb_raise(nm_eNotInvertibleError,
388
+ "matrix must have non-zero determinant to be invertible (not getting this error does not mean matrix is invertible if you're dealing with floating points)");
389
+ }
321
390
  B[0] = A[lda+1] / det;
322
391
  B[1] = -A[1] / det;
323
392
  B[ldb] = -A[lda] / det;
@@ -328,7 +397,8 @@ namespace nm {
328
397
  DType det;
329
398
  det_exact<DType>(M, A_elements, lda, reinterpret_cast<void*>(&det));
330
399
  if (det == 0) {
331
- rb_raise(nm_eNotInvertibleError, "matrix must have non-zero determinant to be invertible (not getting this error does not mean matrix is invertible if you're dealing with floating points)");
400
+ rb_raise(nm_eNotInvertibleError,
401
+ "matrix must have non-zero determinant to be invertible (not getting this error does not mean matrix is invertible if you're dealing with floating points)");
332
402
  }
333
403
 
334
404
  B[0] = ( A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]) / det; // A = ei - fh
@@ -347,22 +417,6 @@ namespace nm {
347
417
  }
348
418
  }
349
419
 
350
- /*
351
- * Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
352
- */
353
- template <typename DType, typename CType>
354
- inline static int lapack_gesvd(char jobu, char jobvt, int m, int n, void* a, int lda, void* s, void* u, int ldu, void* vt, int ldvt, void* work, int lwork, void* rwork) {
355
- return gesvd<DType,CType>(jobu, jobvt, m, n, reinterpret_cast<DType*>(a), lda, reinterpret_cast<DType*>(s), reinterpret_cast<DType*>(u), ldu, reinterpret_cast<DType*>(vt), ldvt, reinterpret_cast<DType*>(work), lwork, reinterpret_cast<CType*>(rwork));
356
- }
357
-
358
- /*
359
- * Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
360
- */
361
- template <typename DType, typename CType>
362
- inline static int lapack_gesdd(char jobz, int m, int n, void* a, int lda, void* s, void* u, int ldu, void* vt, int ldvt, void* work, int lwork, int* iwork, void* rwork) {
363
- return gesdd<DType,CType>(jobz, m, n, reinterpret_cast<DType*>(a), lda, reinterpret_cast<DType*>(s), reinterpret_cast<DType*>(u), ldu, reinterpret_cast<DType*>(vt), ldvt, reinterpret_cast<DType*>(work), lwork, iwork, reinterpret_cast<CType*>(rwork));
364
- }
365
-
366
420
  /*
367
421
  * Function signature conversion for calling CBLAS' gemm functions as directly as possible.
368
422
  *
@@ -422,39 +476,6 @@ namespace nm {
422
476
  reinterpret_cast<const DType*>(a), lda, reinterpret_cast<DType*>(b), ldb);
423
477
  }
424
478
 
425
-
426
- /*
427
- * Function signature conversion for calling CBLAS' trmm functions as directly as possible.
428
- *
429
- * For documentation: http://www.netlib.org/blas/dtrmm.f
430
- */
431
- template <typename DType>
432
- inline static void cblas_trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
433
- const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const void* alpha,
434
- const void* A, const int lda, void* B, const int ldb)
435
- {
436
- trmm<DType>(order, side, uplo, ta, diag, m, n, reinterpret_cast<const DType*>(alpha),
437
- reinterpret_cast<const DType*>(A), lda, reinterpret_cast<DType*>(B), ldb);
438
- }
439
-
440
-
441
- /*
442
- * Function signature conversion for calling CBLAS' syrk functions as directly as possible.
443
- *
444
- * For documentation: http://www.netlib.org/blas/dsyrk.f
445
- */
446
- template <typename DType>
447
- inline static void cblas_syrk(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const enum CBLAS_TRANSPOSE trans,
448
- const int n, const int k, const void* alpha,
449
- const void* A, const int lda, const void* beta, void* C, const int ldc)
450
- {
451
- syrk<DType>(order, uplo, trans, n, k, reinterpret_cast<const DType*>(alpha),
452
- reinterpret_cast<const DType*>(A), lda, reinterpret_cast<const DType*>(beta), reinterpret_cast<DType*>(C), ldc);
453
- }
454
-
455
-
456
-
457
-
458
479
  }
459
480
  } // end of namespace nm::math
460
481
 
@@ -466,80 +487,29 @@ extern "C" {
466
487
  ///////////////////
467
488
 
468
489
  void nm_math_init_blas() {
469
- cNMatrix_LAPACK = rb_define_module_under(cNMatrix, "LAPACK");
490
+ VALUE cNMatrix_Internal = rb_define_module_under(cNMatrix, "Internal");
470
491
 
471
492
  rb_define_singleton_method(cNMatrix, "has_clapack?", (METHOD)nm_has_clapack, 0);
472
493
 
473
- /* ATLAS-CLAPACK Functions */
474
- rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getrf", (METHOD)nm_clapack_getrf, 5);
475
- rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potrf", (METHOD)nm_clapack_potrf, 5);
476
- rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getrs", (METHOD)nm_clapack_getrs, 9);
477
- rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potrs", (METHOD)nm_clapack_potrs, 8);
478
- rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getri", (METHOD)nm_clapack_getri, 5);
479
- rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potri", (METHOD)nm_clapack_potri, 5);
480
- rb_define_singleton_method(cNMatrix_LAPACK, "clapack_laswp", (METHOD)nm_clapack_laswp, 7);
481
- rb_define_singleton_method(cNMatrix_LAPACK, "clapack_lauum", (METHOD)nm_clapack_lauum, 5);
482
-
483
- /* Non-ATLAS regular LAPACK Functions called via Fortran interface */
484
- rb_define_singleton_method(cNMatrix_LAPACK, "lapack_gesvd", (METHOD)nm_lapack_gesvd, 12);
485
- rb_define_singleton_method(cNMatrix_LAPACK, "lapack_gesdd", (METHOD)nm_lapack_gesdd, 11);
486
- rb_define_singleton_method(cNMatrix_LAPACK, "lapack_geev", (METHOD)nm_lapack_geev, 12);
487
-
488
- cNMatrix_BLAS = rb_define_module_under(cNMatrix, "BLAS");
489
-
490
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_scal", (METHOD)nm_cblas_scal, 4);
491
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_nrm2", (METHOD)nm_cblas_nrm2, 3);
492
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_asum", (METHOD)nm_cblas_asum, 3);
493
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_rot", (METHOD)nm_cblas_rot, 7);
494
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_rotg", (METHOD)nm_cblas_rotg, 1);
495
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_imax", (METHOD)nm_cblas_imax, 3);
496
-
497
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemm", (METHOD)nm_cblas_gemm, 14);
498
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11);
499
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_trsm", (METHOD)nm_cblas_trsm, 12);
500
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_trmm", (METHOD)nm_cblas_trmm, 12);
501
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_syrk", (METHOD)nm_cblas_syrk, 11);
502
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_herk", (METHOD)nm_cblas_herk, 11);
503
- }
504
-
505
- /*
506
- * Interprets lapack jobu and jobvt arguments, for which LAPACK needs character values A, S, O, or N.
507
- *
508
- * Called by lapack_gesvd -- basically inline. svd stands for singular value decomposition.
509
- */
510
- static inline char lapack_svd_job_sym(VALUE op) {
511
- if (rb_to_id(op) == rb_intern("all") || rb_to_id(op) == rb_intern("a")) return 'A';
512
- else if (rb_to_id(op) == rb_intern("return") || rb_to_id(op) == rb_intern("s")) return 'S';
513
- else if (rb_to_id(op) == rb_intern("overwrite") || rb_to_id(op) == rb_intern("o")) return 'O';
514
- else if (rb_to_id(op) == rb_intern("none") || rb_to_id(op) == rb_intern("n")) return 'N';
515
- else rb_raise(rb_eArgError, "Expected :all, :return, :overwrite, :none (or :a, :s, :o, :n, respectively)");
516
- return 'a';
517
- }
494
+ VALUE cNMatrix_Internal_LAPACK = rb_define_module_under(cNMatrix_Internal, "LAPACK");
518
495
 
496
+ /* ATLAS-CLAPACK Functions that are implemented internally */
497
+ rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_getrf", (METHOD)nm_clapack_getrf, 5);
498
+ rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_getrs", (METHOD)nm_clapack_getrs, 9);
499
+ rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_laswp", (METHOD)nm_clapack_laswp, 7);
519
500
 
520
- /*
521
- * Interprets lapack jobvl and jobvr arguments, for which LAPACK needs character values N or V.
522
- *
523
- * Called by lapack_geev -- basically inline. evd stands for eigenvalue decomposition.
524
- */
525
- static inline char lapack_evd_job_sym(VALUE op) {
526
- if (op == Qfalse || op == Qnil || rb_to_id(op) == rb_intern("n")) return 'N';
527
- else return 'V';
528
- }
501
+ VALUE cNMatrix_Internal_BLAS = rb_define_module_under(cNMatrix_Internal, "BLAS");
529
502
 
503
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_scal", (METHOD)nm_cblas_scal, 4);
504
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_nrm2", (METHOD)nm_cblas_nrm2, 3);
505
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_asum", (METHOD)nm_cblas_asum, 3);
506
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_rot", (METHOD)nm_cblas_rot, 7);
507
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_rotg", (METHOD)nm_cblas_rotg, 1);
508
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_imax", (METHOD)nm_cblas_imax, 3);
530
509
 
531
- /* Interprets cblas argument which could be any of false/:no_transpose, :transpose, or :complex_conjugate,
532
- * into an enum recognized by cblas.
533
- *
534
- * Called by nm_cblas_gemm -- basically inline.
535
- *
536
- */
537
- static inline enum CBLAS_TRANSPOSE blas_transpose_sym(VALUE op) {
538
- if (op == Qfalse || rb_to_id(op) == nm_rb_no_transpose) return CblasNoTrans;
539
- else if (rb_to_id(op) == nm_rb_transpose) return CblasTrans;
540
- else if (rb_to_id(op) == nm_rb_complex_conjugate) return CblasConjTrans;
541
- else rb_raise(rb_eArgError, "Expected false, :transpose, or :complex_conjugate");
542
- return CblasNoTrans;
510
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_gemm", (METHOD)nm_cblas_gemm, 14);
511
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11);
512
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_trsm", (METHOD)nm_cblas_trsm, 12);
543
513
  }
544
514
 
545
515
  /*
@@ -572,55 +542,6 @@ static VALUE nm_cblas_scal(VALUE self, VALUE n, VALUE alpha, VALUE vector, VALUE
572
542
  return vector;
573
543
  }
574
544
 
575
- /*
576
- * Interprets cblas argument which could be :left or :right
577
- *
578
- * Called by nm_cblas_trsm -- basically inline
579
- */
580
- static inline enum CBLAS_SIDE blas_side_sym(VALUE op) {
581
- ID op_id = rb_to_id(op);
582
- if (op_id == nm_rb_left) return CblasLeft;
583
- if (op_id == nm_rb_right) return CblasRight;
584
- rb_raise(rb_eArgError, "Expected :left or :right for side argument");
585
- return CblasLeft;
586
- }
587
-
588
- /*
589
- * Interprets cblas argument which could be :upper or :lower
590
- *
591
- * Called by nm_cblas_trsm -- basically inline
592
- */
593
- static inline enum CBLAS_UPLO blas_uplo_sym(VALUE op) {
594
- ID op_id = rb_to_id(op);
595
- if (op_id == nm_rb_upper) return CblasUpper;
596
- if (op_id == nm_rb_lower) return CblasLower;
597
- rb_raise(rb_eArgError, "Expected :upper or :lower for uplo argument");
598
- return CblasUpper;
599
- }
600
-
601
-
602
- /*
603
- * Interprets cblas argument which could be :unit (true) or :nonunit (false or anything other than true/:unit)
604
- *
605
- * Called by nm_cblas_trsm -- basically inline
606
- */
607
- static inline enum CBLAS_DIAG blas_diag_sym(VALUE op) {
608
- if (rb_to_id(op) == nm_rb_unit || op == Qtrue) return CblasUnit;
609
- return CblasNonUnit;
610
- }
611
-
612
- /*
613
- * Interprets cblas argument which could be :row or :col
614
- */
615
- static inline enum CBLAS_ORDER blas_order_sym(VALUE op) {
616
- if (rb_to_id(op) == rb_intern("row") || rb_to_id(op) == rb_intern("row_major")) return CblasRowMajor;
617
- else if (rb_to_id(op) == rb_intern("col") || rb_to_id(op) == rb_intern("col_major") ||
618
- rb_to_id(op) == rb_intern("column") || rb_to_id(op) == rb_intern("column_major")) return CblasColMajor;
619
- rb_raise(rb_eArgError, "Expected :row or :col for order argument");
620
- return CblasRowMajor;
621
- }
622
-
623
-
624
545
  /*
625
546
  * Call any of the cblas_xrotg functions as directly as possible.
626
547
  *
@@ -641,10 +562,6 @@ static inline enum CBLAS_ORDER blas_order_sym(VALUE op) {
641
562
  * The outputs [c,s] will be returned in a Ruby Array at the end; the input
642
563
  * NMatrix will also be modified in-place.
643
564
  *
644
- * If you provide rationals, be aware that there's a high probability of an
645
- * error, since rotg includes a square root -- and most rationals' square roots
646
- * are irrational. You're better off converting to Float first.
647
- *
648
565
  * This function, like the other cblas_ functions, does minimal type-checking.
649
566
  */
650
567
  static VALUE nm_cblas_rotg(VALUE self, VALUE ab) {
@@ -654,14 +571,13 @@ static VALUE nm_cblas_rotg(VALUE self, VALUE ab) {
654
571
  nm::math::cblas_rotg<double>,
655
572
  nm::math::cblas_rotg<nm::Complex64>,
656
573
  nm::math::cblas_rotg<nm::Complex128>,
657
- NULL, NULL, NULL, // no rationals
658
574
  NULL //nm::math::cblas_rotg<nm::RubyObject>
659
575
  };
660
576
 
661
577
  nm::dtype_t dtype = NM_DTYPE(ab);
662
578
 
663
579
  if (!ttable[dtype]) {
664
- rb_raise(nm_eDataTypeError, "this operation undefined for integer and rational vectors");
580
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
665
581
  return Qnil;
666
582
 
667
583
  } else {
@@ -723,9 +639,6 @@ static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VAL
723
639
  nm::math::cblas_rot<double,double>,
724
640
  nm::math::cblas_rot<nm::Complex64,float>,
725
641
  nm::math::cblas_rot<nm::Complex128,double>,
726
- nm::math::cblas_rot<nm::Rational32,nm::Rational32>,
727
- nm::math::cblas_rot<nm::Rational64,nm::Rational64>,
728
- nm::math::cblas_rot<nm::Rational128,nm::Rational128>,
729
642
  nm::math::cblas_rot<nm::RubyObject,nm::RubyObject>
730
643
  };
731
644
 
@@ -792,17 +705,13 @@ static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx) {
792
705
  nm::math::cblas_nrm2<float64_t,float64_t>,
793
706
  nm::math::cblas_nrm2<float32_t,nm::Complex64>,
794
707
  nm::math::cblas_nrm2<float64_t,nm::Complex128>,
795
- //nm::math::cblas_nrm2<nm::Rational32,nm::Rational32>,
796
- //nm::math::cblas_nrm2<nm::Rational64,nm::Rational64>,
797
- //nm::math::cblas_nrm2<nm::Rational128,nm::Rational128>,
798
- NULL, NULL, NULL,
799
708
  nm::math::cblas_nrm2<nm::RubyObject,nm::RubyObject>
800
709
  };
801
710
 
802
711
  nm::dtype_t dtype = NM_DTYPE(x);
803
712
 
804
713
  if (!ttable[dtype]) {
805
- rb_raise(nm_eDataTypeError, "this operation undefined for integer and rational vectors");
714
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
806
715
  return Qnil;
807
716
 
808
717
  } else {
@@ -850,9 +759,6 @@ static VALUE nm_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx) {
850
759
  nm::math::cblas_asum<float64_t,float64_t>,
851
760
  nm::math::cblas_asum<float32_t,nm::Complex64>,
852
761
  nm::math::cblas_asum<float64_t,nm::Complex128>,
853
- nm::math::cblas_asum<nm::Rational32,nm::Rational32>,
854
- nm::math::cblas_asum<nm::Rational64,nm::Rational64>,
855
- nm::math::cblas_asum<nm::Rational128,nm::Rational128>,
856
762
  nm::math::cblas_asum<nm::RubyObject,nm::RubyObject>
857
763
  };
858
764
 
@@ -1000,10 +906,8 @@ static VALUE nm_cblas_trsm(VALUE self,
1000
906
  NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1001
907
  nm::math::cblas_trsm<float>,
1002
908
  nm::math::cblas_trsm<double>,
1003
- cblas_ctrsm, cblas_ztrsm, // call directly, same function signature!
1004
- nm::math::cblas_trsm<nm::Rational32>,
1005
- nm::math::cblas_trsm<nm::Rational64>,
1006
- nm::math::cblas_trsm<nm::Rational128>,
909
+ nm::math::cblas_trsm<nm::Complex64>,
910
+ nm::math::cblas_trsm<nm::Complex128>,
1007
911
  nm::math::cblas_trsm<nm::RubyObject>
1008
912
  };
1009
913
 
@@ -1021,358 +925,6 @@ static VALUE nm_cblas_trsm(VALUE self,
1021
925
  return Qtrue;
1022
926
  }
1023
927
 
1024
-
1025
- static VALUE nm_cblas_trmm(VALUE self,
1026
- VALUE order,
1027
- VALUE side, VALUE uplo,
1028
- VALUE trans_a, VALUE diag,
1029
- VALUE m, VALUE n,
1030
- VALUE alpha,
1031
- VALUE a, VALUE lda,
1032
- VALUE b, VALUE ldb)
1033
- {
1034
- static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER,
1035
- const enum CBLAS_SIDE, const enum CBLAS_UPLO,
1036
- const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
1037
- const int m, const int n, const void* alpha, const void* a,
1038
- const int lda, void* b, const int ldb) = {
1039
- NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1040
- nm::math::cblas_trmm<float>,
1041
- nm::math::cblas_trmm<double>,
1042
- cblas_ctrmm, cblas_ztrmm // call directly, same function signature!
1043
- /*
1044
- nm::math::cblas_trmm<nm::Rational32>,
1045
- nm::math::cblas_trmm<nm::Rational64>,
1046
- nm::math::cblas_trmm<nm::Rational128>,
1047
- nm::math::cblas_trmm<nm::RubyObject>*/
1048
- };
1049
-
1050
- nm::dtype_t dtype = NM_DTYPE(a);
1051
-
1052
- if (!ttable[dtype]) {
1053
- rb_raise(nm_eDataTypeError, "this matrix operation not yet defined for non-BLAS dtypes");
1054
- } else {
1055
- void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
1056
- rubyval_to_cval(alpha, dtype, pAlpha);
1057
-
1058
- ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
1059
- }
1060
-
1061
- return b;
1062
- }
1063
-
1064
-
1065
- static VALUE nm_cblas_syrk(VALUE self,
1066
- VALUE order,
1067
- VALUE uplo,
1068
- VALUE trans,
1069
- VALUE n, VALUE k,
1070
- VALUE alpha,
1071
- VALUE a, VALUE lda,
1072
- VALUE beta,
1073
- VALUE c, VALUE ldc)
1074
- {
1075
- static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE,
1076
- const int n, const int k, const void* alpha, const void* a,
1077
- const int lda, const void* beta, void* c, const int ldc) = {
1078
- NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1079
- nm::math::cblas_syrk<float>,
1080
- nm::math::cblas_syrk<double>,
1081
- cblas_csyrk, cblas_zsyrk// call directly, same function signature!
1082
- /*nm::math::cblas_trsm<nm::Rational32>,
1083
- nm::math::cblas_trsm<nm::Rational64>,
1084
- nm::math::cblas_trsm<nm::Rational128>,
1085
- nm::math::cblas_trsm<nm::RubyObject>*/
1086
- };
1087
-
1088
- nm::dtype_t dtype = NM_DTYPE(a);
1089
-
1090
- if (!ttable[dtype]) {
1091
- rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1092
- } else {
1093
- void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
1094
- *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
1095
- rubyval_to_cval(alpha, dtype, pAlpha);
1096
- rubyval_to_cval(beta, dtype, pBeta);
1097
-
1098
- ttable[dtype](blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
1099
- }
1100
-
1101
- return Qtrue;
1102
- }
1103
-
1104
-
1105
- static VALUE nm_cblas_herk(VALUE self,
1106
- VALUE order,
1107
- VALUE uplo,
1108
- VALUE trans,
1109
- VALUE n, VALUE k,
1110
- VALUE alpha,
1111
- VALUE a, VALUE lda,
1112
- VALUE beta,
1113
- VALUE c, VALUE ldc)
1114
- {
1115
-
1116
- nm::dtype_t dtype = NM_DTYPE(a);
1117
-
1118
- if (dtype == nm::COMPLEX64) {
1119
- cblas_cherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
1120
- } else if (dtype == nm::COMPLEX128) {
1121
- cblas_zherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
1122
- } else
1123
- rb_raise(rb_eNotImpError, "this matrix operation undefined for non-complex dtypes");
1124
- return Qtrue;
1125
- }
1126
-
1127
-
1128
- /*
1129
- * Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
1130
- *
1131
- * xGESVD computes the singular value decomposition (SVD) of a real
1132
- * M-by-N matrix A, optionally computing the left and/or right singular
1133
- * vectors. The SVD is written
1134
- *
1135
- * A = U * SIGMA * transpose(V)
1136
- *
1137
- * where SIGMA is an M-by-N matrix which is zero except for its
1138
- * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
1139
- * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
1140
- * are the singular values of A; they are real and non-negative, and
1141
- * are returned in descending order. The first min(m,n) columns of
1142
- * U and V are the left and right singular vectors of A.
1143
- *
1144
- * Note that the routine returns V**T, not V.
1145
- */
1146
- static VALUE nm_lapack_gesvd(VALUE self, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lwork) {
1147
- static int (*gesvd_table[nm::NUM_DTYPES])(char, char, int, int, void* a, int, void* s, void* u, int, void* vt, int, void* work, int, void* rwork) = {
1148
- NULL, NULL, NULL, NULL, NULL, // no integer ops
1149
- nm::math::lapack_gesvd<float,float>,
1150
- nm::math::lapack_gesvd<double,double>,
1151
- nm::math::lapack_gesvd<nm::Complex64,float>,
1152
- nm::math::lapack_gesvd<nm::Complex128,double>,
1153
- NULL, NULL, NULL, NULL // no rationals or Ruby objects
1154
- };
1155
-
1156
- nm::dtype_t dtype = NM_DTYPE(a);
1157
-
1158
-
1159
- if (!gesvd_table[dtype]) {
1160
- rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1161
- return Qfalse;
1162
- } else {
1163
- int M = FIX2INT(m),
1164
- N = FIX2INT(n);
1165
-
1166
- int min_mn = NM_MIN(M,N);
1167
- int max_mn = NM_MAX(M,N);
1168
-
1169
- char JOBU = lapack_svd_job_sym(jobu),
1170
- JOBVT = lapack_svd_job_sym(jobvt);
1171
-
1172
- // only need rwork for complex matrices
1173
- int rwork_size = (dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128) ? 5 * min_mn : 0;
1174
- void* rwork = rwork_size > 0 ? NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size) : NULL;
1175
- int work_size = FIX2INT(lwork);
1176
-
1177
- // ignore user argument for lwork if it's too small.
1178
- work_size = NM_MAX((dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128 ? 2 * min_mn + max_mn : NM_MAX(3*min_mn + max_mn, 5*min_mn)), work_size);
1179
- void* work = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);
1180
-
1181
- int info = gesvd_table[dtype](JOBU, JOBVT, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
1182
- NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
1183
- work, work_size, rwork);
1184
- return INT2FIX(info);
1185
- }
1186
- }
1187
-
1188
- /*
1189
- * Function signature conversion for calling CBLAS' gesdd functions as directly as possible.
1190
- *
1191
- * xGESDD uses a divide-and-conquer strategy to compute the singular value decomposition (SVD) of a real
1192
- * M-by-N matrix A, optionally computing the left and/or right singular
1193
- * vectors. The SVD is written
1194
- *
1195
- * A = U * SIGMA * transpose(V)
1196
- *
1197
- * where SIGMA is an M-by-N matrix which is zero except for its
1198
- * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
1199
- * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
1200
- * are the singular values of A; they are real and non-negative, and
1201
- * are returned in descending order. The first min(m,n) columns of
1202
- * U and V are the left and right singular vectors of A.
1203
- *
1204
- * Note that the routine returns V**T, not V.
1205
- */
1206
- static VALUE nm_lapack_gesdd(VALUE self, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lwork) {
1207
- static int (*gesdd_table[nm::NUM_DTYPES])(char, int, int, void* a, int, void* s, void* u, int, void* vt, int, void* work, int, int* iwork, void* rwork) = {
1208
- NULL, NULL, NULL, NULL, NULL, // no integer ops
1209
- nm::math::lapack_gesdd<float,float>,
1210
- nm::math::lapack_gesdd<double,double>,
1211
- nm::math::lapack_gesdd<nm::Complex64,float>,
1212
- nm::math::lapack_gesdd<nm::Complex128,double>,
1213
- NULL, NULL, NULL, NULL // no rationals or Ruby objects
1214
- };
1215
-
1216
- nm::dtype_t dtype = NM_DTYPE(a);
1217
-
1218
- if (!gesdd_table[dtype]) {
1219
- rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1220
- return Qfalse;
1221
- } else {
1222
- int M = FIX2INT(m),
1223
- N = FIX2INT(n);
1224
-
1225
- int min_mn = NM_MIN(M,N);
1226
- int max_mn = NM_MAX(M,N);
1227
-
1228
- char JOBZ = lapack_svd_job_sym(jobz);
1229
-
1230
- // only need rwork for complex matrices
1231
- void* rwork = NULL;
1232
-
1233
- int work_size = FIX2INT(lwork); // Make sure we allocate enough work, regardless of the user request.
1234
- if (dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128) {
1235
- int rwork_size = min_mn * (JOBZ == 'N' ? 5 : NM_MAX(5*min_mn + 7, 2*max_mn + 2*min_mn + 1));
1236
- rwork = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size);
1237
-
1238
- if (JOBZ == 'N') work_size = NM_MAX(work_size, 3*min_mn + NM_MAX(max_mn, 6*min_mn));
1239
- else if (JOBZ == 'O') work_size = NM_MAX(work_size, 3*min_mn*min_mn + NM_MAX(max_mn, 5*min_mn*min_mn + 4*min_mn));
1240
- else work_size = NM_MAX(work_size, 3*min_mn*min_mn + NM_MAX(max_mn, 4*min_mn*min_mn + 4*min_mn));
1241
- } else {
1242
- if (JOBZ == 'N') work_size = NM_MAX(work_size, 2*min_mn + max_mn);
1243
- else if (JOBZ == 'O') work_size = NM_MAX(work_size, 2*min_mn*min_mn + max_mn + 2*min_mn);
1244
- else work_size = NM_MAX(work_size, min_mn*min_mn + max_mn + 2*min_mn);
1245
- }
1246
- void* work = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);
1247
- int* iwork = NM_ALLOCA_N(int, 8*min_mn);
1248
-
1249
- int info = gesdd_table[dtype](JOBZ, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
1250
- NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
1251
- work, work_size, iwork, rwork);
1252
- return INT2FIX(info);
1253
- }
1254
- }
1255
-
1256
-
1257
- /*
1258
- * Function signature conversion for calling CBLAS' geev functions as directly as possible.
1259
- *
1260
- * GEEV computes for an N-by-N real nonsymmetric matrix A, the
1261
- * eigenvalues and, optionally, the left and/or right eigenvectors.
1262
- *
1263
- * The right eigenvector v(j) of A satisfies
1264
- * A * v(j) = lambda(j) * v(j)
1265
- * where lambda(j) is its eigenvalue.
1266
- *
1267
- * The left eigenvector u(j) of A satisfies
1268
- * u(j)**H * A = lambda(j) * u(j)**H
1269
- * where u(j)**H denotes the conjugate transpose of u(j).
1270
- *
1271
- * The computed eigenvectors are normalized to have Euclidean norm
1272
- * equal to 1 and largest component real.
1273
- */
1274
- static VALUE nm_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr, VALUE lwork) {
1275
- static int (*geev_table[nm::NUM_DTYPES])(char, char, int, void* a, int, void* w, void* wi, void* vl, int, void* vr, int, void* work, int, void* rwork) = {
1276
- NULL, NULL, NULL, NULL, NULL, // no integer ops
1277
- nm::math::lapack_geev<float,float>,
1278
- nm::math::lapack_geev<double,double>,
1279
- nm::math::lapack_geev<nm::Complex64,float>,
1280
- nm::math::lapack_geev<nm::Complex128,double>,
1281
- NULL, NULL, NULL, NULL // no rationals or Ruby objects
1282
- };
1283
-
1284
- nm::dtype_t dtype = NM_DTYPE(a);
1285
-
1286
-
1287
- if (!geev_table[dtype]) {
1288
- rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1289
- return Qfalse;
1290
- } else {
1291
- int N = FIX2INT(n);
1292
-
1293
- char JOBVL = lapack_evd_job_sym(compute_left),
1294
- JOBVR = lapack_evd_job_sym(compute_right);
1295
-
1296
- void* A = NM_STORAGE_DENSE(a)->elements;
1297
- void* WR = NM_STORAGE_DENSE(w)->elements;
1298
- void* WI = wi == Qnil ? NULL : NM_STORAGE_DENSE(wi)->elements;
1299
- void* VL = NM_STORAGE_DENSE(vl)->elements;
1300
- void* VR = NM_STORAGE_DENSE(vr)->elements;
1301
-
1302
- // only need rwork for complex matrices (wi == Qnil for complex)
1303
- int rwork_size = dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128 ? N * DTYPE_SIZES[dtype] : 0; // 2*N*floattype for complex only, otherwise 0
1304
- void* rwork = rwork_size > 0 ? NM_ALLOCA_N(char, rwork_size) : NULL;
1305
- int work_size = FIX2INT(lwork);
1306
- void* work;
1307
-
1308
- int info;
1309
-
1310
- // if work size is 0 or -1, query.
1311
- if (work_size <= 0) {
1312
- work_size = -1;
1313
- work = NM_ALLOC_N(char, DTYPE_SIZES[dtype]); //2*N * DTYPE_SIZES[dtype]);
1314
- info = geev_table[dtype](JOBVL, JOBVR, N, A, FIX2INT(lda), WR, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr), work, work_size, rwork);
1315
- work_size = (int)(dtype == nm::COMPLEX64 || dtype == nm::FLOAT32 ? reinterpret_cast<float*>(work)[0] : reinterpret_cast<double*>(work)[0]);
1316
- // line above is basically: work_size = (int)(work[0]); // now have new work_size
1317
- NM_FREE(work);
1318
- if (info == 0)
1319
- rb_warn("geev: calculated optimal lwork of %d; to eliminate this message, use a positive value for lwork (at least 2*shape[i])", work_size);
1320
- else return INT2FIX(info); // error of some kind on query!
1321
- }
1322
-
1323
- // if work size is < 2*N, just set it to 2*N
1324
- if (work_size < 2*N) work_size = 2*N;
1325
- if (work_size < 3*N && (dtype == nm::FLOAT32 || dtype == nm::FLOAT64)) {
1326
- work_size = JOBVL == 'V' || JOBVR == 'V' ? 4*N : 3*N;
1327
- }
1328
-
1329
- // Allocate work array for actual run
1330
- work = NM_ALLOCA_N(char, work_size * DTYPE_SIZES[dtype]);
1331
-
1332
- // Perform the actual calculation.
1333
- info = geev_table[dtype](JOBVL, JOBVR, N, A, FIX2INT(lda), WR, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr), work, work_size, rwork);
1334
-
1335
- return INT2FIX(info);
1336
- }
1337
- }
1338
-
1339
-
1340
- static VALUE nm_clapack_lauum(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
1341
- static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
1342
- /*nm::math::clapack_lauum<uint8_t, false>,
1343
- nm::math::clapack_lauum<int8_t, false>,
1344
- nm::math::clapack_lauum<int16_t, false>,
1345
- nm::math::clapack_lauum<uint32_t, false>,
1346
- nm::math::clapack_lauum<uint64_t, false>,*/
1347
- NULL, NULL, NULL, NULL, NULL,
1348
- nm::math::clapack_lauum<false, float>,
1349
- nm::math::clapack_lauum<false, double>,
1350
- #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1351
- clapack_clauum, clapack_zlauum, // call directly, same function signature!
1352
- #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1353
- nm::math::clapack_lauum<true, nm::Complex64>,
1354
- nm::math::clapack_lauum<true, nm::Complex128>,
1355
- #endif
1356
- /*
1357
- nm::math::clapack_lauum<nm::Rational32, false>,
1358
- nm::math::clapack_lauum<nm::Rational64, false>,
1359
- nm::math::clapack_lauum<nm::Rational128, false>,
1360
- nm::math::clapack_lauum<nm::RubyObject, false>
1361
-
1362
- */
1363
- };
1364
-
1365
- if (!ttable[NM_DTYPE(a)]) {
1366
- rb_raise(rb_eNotImpError, "does not yet work for non-BLAS dtypes (needs herk, syrk, trmm)");
1367
- } else {
1368
- // Call either our version of lauum or the LAPACK version.
1369
- ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
1370
- }
1371
-
1372
- return a;
1373
- }
1374
-
1375
-
1376
928
  /* Call any of the clapack_xgetrf functions as directly as possible.
1377
929
  *
1378
930
  * The clapack_getrf functions (dgetrf, sgetrf, cgetrf, and zgetrf) compute an LU factorization of a general M-by-N
@@ -1402,15 +954,8 @@ static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a
1402
954
  NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1403
955
  nm::math::clapack_getrf<float>,
1404
956
  nm::math::clapack_getrf<double>,
1405
- #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1406
- clapack_cgetrf, clapack_zgetrf, // call directly, same function signature!
1407
- #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1408
957
  nm::math::clapack_getrf<nm::Complex64>,
1409
958
  nm::math::clapack_getrf<nm::Complex128>,
1410
- #endif
1411
- nm::math::clapack_getrf<nm::Rational32>,
1412
- nm::math::clapack_getrf<nm::Rational64>,
1413
- nm::math::clapack_getrf<nm::Rational128>,
1414
959
  nm::math::clapack_getrf<nm::RubyObject>
1415
960
  };
1416
961
 
@@ -1438,51 +983,6 @@ static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a
1438
983
  }
1439
984
 
1440
985
 
1441
- /* Call any of the clapack_xpotrf functions as directly as possible.
1442
- *
1443
- * You probably don't want to call this function. Instead, why don't you try clapack_potrf, which is more flexible
1444
- * with its arguments?
1445
- *
1446
- * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1447
- * handling, so you can easily crash Ruby!
1448
- *
1449
- * Returns an array giving the pivot indices (normally these are argument #5).
1450
- */
1451
- static VALUE nm_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
1452
- #if !defined(HAVE_CLAPACK_H) && !defined(HAVE_ATLAS_CLAPACK_H)
1453
- rb_raise(rb_eNotImpError, "potrf currently requires CLAPACK");
1454
- #endif
1455
-
1456
- static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
1457
- NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1458
- nm::math::clapack_potrf<float>,
1459
- nm::math::clapack_potrf<double>,
1460
- #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1461
- clapack_cpotrf, clapack_zpotrf, // call directly, same function signature!
1462
- #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1463
- nm::math::clapack_potrf<nm::Complex64>,
1464
- nm::math::clapack_potrf<nm::Complex128>,
1465
- #endif
1466
- NULL, NULL, NULL, NULL /*
1467
- nm::math::clapack_potrf<nm::Rational32>,
1468
- nm::math::clapack_potrf<nm::Rational64>,
1469
- nm::math::clapack_potrf<nm::Rational128>,
1470
- nm::math::clapack_potrf<nm::RubyObject> */
1471
- };
1472
-
1473
- if (!ttable[NM_DTYPE(a)]) {
1474
- rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1475
- // FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
1476
- //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1477
- } else {
1478
- // Call either our version of potrf or the LAPACK version.
1479
- ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
1480
- }
1481
-
1482
- return a;
1483
- }
1484
-
1485
-
1486
986
  /*
1487
987
  * Call any of the clapack_xgetrs functions as directly as possible.
1488
988
  */
@@ -1493,15 +993,8 @@ static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VAL
1493
993
  NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1494
994
  nm::math::clapack_getrs<float>,
1495
995
  nm::math::clapack_getrs<double>,
1496
- #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1497
- clapack_cgetrs, clapack_zgetrs, // call directly, same function signature!
1498
- #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1499
996
  nm::math::clapack_getrs<nm::Complex64>,
1500
997
  nm::math::clapack_getrs<nm::Complex128>,
1501
- #endif
1502
- nm::math::clapack_getrs<nm::Rational32>,
1503
- nm::math::clapack_getrs<nm::Rational64>,
1504
- nm::math::clapack_getrs<nm::Rational128>,
1505
998
  nm::math::clapack_getrs<nm::RubyObject>
1506
999
  };
1507
1000
 
@@ -1529,157 +1022,14 @@ static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VAL
1529
1022
  return b;
1530
1023
  }
1531
1024
 
1532
-
1533
- /*
1534
- * Call any of the clapack_xpotrs functions as directly as possible.
1535
- */
1536
- static VALUE nm_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb) {
1537
- static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
1538
- const int NRHS, const void* A, const int lda, void* B, const int ldb) = {
1539
- NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1540
- nm::math::clapack_potrs<float,false>,
1541
- nm::math::clapack_potrs<double,false>,
1542
- #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1543
- clapack_cpotrs, clapack_zpotrs, // call directly, same function signature!
1544
- #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1545
- nm::math::clapack_potrs<nm::Complex64,true>,
1546
- nm::math::clapack_potrs<nm::Complex128,true>,
1547
- #endif
1548
- nm::math::clapack_potrs<nm::Rational32,false>,
1549
- nm::math::clapack_potrs<nm::Rational64,false>,
1550
- nm::math::clapack_potrs<nm::Rational128,false>,
1551
- nm::math::clapack_potrs<nm::RubyObject,false>
1552
- };
1553
-
1554
-
1555
- if (!ttable[NM_DTYPE(a)]) {
1556
- rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1557
- } else {
1558
-
1559
- // Call either our version of potrs or the LAPACK version.
1560
- ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
1561
- NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
1562
- }
1563
-
1564
- // b is both returned and modified directly in the argument list.
1565
- return b;
1566
- }
1567
-
1568
-
1569
1025
  /*
1570
1026
  * Simple way to check from within Ruby code if clapack functions are available, without
1571
1027
  * having to wait around for an exception to be thrown.
1572
1028
  */
1573
1029
  static VALUE nm_has_clapack(VALUE self) {
1574
- #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1575
1030
  return Qfalse;
1576
- #else
1577
- return Qtrue;
1578
- #endif
1579
1031
  }
1580
1032
 
1581
-
1582
- /* Call any of the clapack_xgetri functions as directly as possible.
1583
- *
1584
- * You probably don't want to call this function. Instead, why don't you try clapack_getri, which is more flexible
1585
- * with its arguments?
1586
- *
1587
- * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1588
- * handling, so you can easily crash Ruby!
1589
- *
1590
- * Returns an array giving the pivot indices (normally these are argument #5).
1591
- */
1592
- static VALUE nm_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv) {
1593
- #if !defined (HAVE_CLAPACK_H) && !defined (HAVE_ATLAS_CLAPACK_H)
1594
- rb_raise(rb_eNotImpError, "getri currently requires CLAPACK");
1595
- #endif
1596
-
1597
- static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int n, void* a, const int lda, const int* ipiv) = {
1598
- NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1599
- nm::math::clapack_getri<float>,
1600
- nm::math::clapack_getri<double>,
1601
- #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1602
- clapack_cgetri, clapack_zgetri, // call directly, same function signature!
1603
- #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1604
- nm::math::clapack_getri<nm::Complex64>,
1605
- nm::math::clapack_getri<nm::Complex128>,
1606
- #endif
1607
- NULL, NULL, NULL, NULL /*
1608
- nm::math::clapack_getri<nm::Rational32>,
1609
- nm::math::clapack_getri<nm::Rational64>,
1610
- nm::math::clapack_getri<nm::Rational128>,
1611
- nm::math::clapack_getri<nm::RubyObject> */
1612
- };
1613
-
1614
- // Allocate the C version of the pivot index array
1615
- int* ipiv_;
1616
- if (TYPE(ipiv) != T_ARRAY) {
1617
- rb_raise(rb_eArgError, "ipiv must be of type Array");
1618
- } else {
1619
- ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
1620
- for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
1621
- ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
1622
- }
1623
- }
1624
-
1625
- if (!ttable[NM_DTYPE(a)]) {
1626
- rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1627
- // FIXME: Once non-BLAS dtypes are implemented, replace error above with the error below.
1628
- //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1629
- } else {
1630
- // Call either our version of getri or the LAPACK version.
1631
- ttable[NM_DTYPE(a)](blas_order_sym(order), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv_);
1632
- }
1633
-
1634
- return a;
1635
- }
1636
-
1637
-
1638
- /* Call any of the clapack_xpotri functions as directly as possible.
1639
- *
1640
- * You probably don't want to call this function. Instead, why don't you try clapack_potri, which is more flexible
1641
- * with its arguments?
1642
- *
1643
- * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1644
- * handling, so you can easily crash Ruby!
1645
- *
1646
- * Returns an array giving the pivot indices (normally these are argument #5).
1647
- */
1648
- static VALUE nm_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
1649
- #if !defined (HAVE_CLAPACK_H) && !defined (HAVE_ATLAS_CLAPACK_H)
1650
- rb_raise(rb_eNotImpError, "getri currently requires CLAPACK");
1651
- #endif
1652
-
1653
- static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
1654
- NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1655
- nm::math::clapack_potri<float>,
1656
- nm::math::clapack_potri<double>,
1657
- #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1658
- clapack_cpotri, clapack_zpotri, // call directly, same function signature!
1659
- #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1660
- nm::math::clapack_potri<nm::Complex64>,
1661
- nm::math::clapack_potri<nm::Complex128>,
1662
- #endif
1663
- NULL, NULL, NULL, NULL /*
1664
- nm::math::clapack_getri<nm::Rational32>,
1665
- nm::math::clapack_getri<nm::Rational64>,
1666
- nm::math::clapack_getri<nm::Rational128>,
1667
- nm::math::clapack_getri<nm::RubyObject> */
1668
- };
1669
-
1670
- if (!ttable[NM_DTYPE(a)]) {
1671
- rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1672
- // FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
1673
- //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1674
- } else {
1675
- // Call either our version of getri or the LAPACK version.
1676
- ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
1677
- }
1678
-
1679
- return a;
1680
- }
1681
-
1682
-
1683
1033
  /*
1684
1034
  * Call any of the clapack_xlaswp functions as directly as possible.
1685
1035
  *
@@ -1695,15 +1045,8 @@ static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1,
1695
1045
  nm::math::clapack_laswp<int64_t>,
1696
1046
  nm::math::clapack_laswp<float>,
1697
1047
  nm::math::clapack_laswp<double>,
1698
- //#ifdef HAVE_CLAPACK_H // laswp doesn't actually exist in clapack.h!
1699
- // clapack_claswp, clapack_zlaswp, // call directly, same function signature!
1700
- //#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1701
1048
  nm::math::clapack_laswp<nm::Complex64>,
1702
1049
  nm::math::clapack_laswp<nm::Complex128>,
1703
- //#endif
1704
- nm::math::clapack_laswp<nm::Rational32>,
1705
- nm::math::clapack_laswp<nm::Rational64>,
1706
- nm::math::clapack_laswp<nm::Rational128>,
1707
1050
  nm::math::clapack_laswp<nm::RubyObject>
1708
1051
  };
1709
1052
 
@@ -1735,6 +1078,20 @@ void nm_math_det_exact(const int M, const void* elements, const int lda, nm::dty
1735
1078
  ttable[dtype](M, elements, lda, result);
1736
1079
  }
1737
1080
 
1081
+ /*
1082
+ * C accessor for reducing a matrix to hessenberg form.
1083
+ */
1084
+ void nm_math_hessenberg(VALUE a) {
1085
+ static void (*ttable[nm::NUM_DTYPES])(const int, void*) = {
1086
+ NULL, NULL, NULL, NULL, NULL, // does not support ints
1087
+ nm::math::hessenberg<float>,
1088
+ nm::math::hessenberg<double>,
1089
+ NULL, NULL, // does not support Complex
1090
+ NULL // no support for Ruby Object
1091
+ };
1092
+
1093
+ ttable[NM_DTYPE(a)](NM_SHAPE0(a), NM_STORAGE_DENSE(a)->elements);
1094
+ }
1738
1095
  /*
1739
1096
  * C accessor for calculating an in-place inverse.
1740
1097
  */