nmatrix 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/ext/nmatrix/data/complex.h +20 -55
  3. data/ext/nmatrix/data/data.cpp +11 -44
  4. data/ext/nmatrix/data/data.h +174 -311
  5. data/ext/nmatrix/data/meta.h +1 -7
  6. data/ext/nmatrix/data/ruby_object.h +3 -85
  7. data/ext/nmatrix/extconf.rb +2 -73
  8. data/ext/nmatrix/math.cpp +170 -813
  9. data/ext/nmatrix/math/asum.h +2 -25
  10. data/ext/nmatrix/math/{inc.h → cblas_enums.h} +11 -22
  11. data/ext/nmatrix/math/cblas_templates_core.h +507 -0
  12. data/ext/nmatrix/math/gemm.h +2 -32
  13. data/ext/nmatrix/math/gemv.h +1 -35
  14. data/ext/nmatrix/math/getrf.h +21 -6
  15. data/ext/nmatrix/math/getrs.h +0 -8
  16. data/ext/nmatrix/math/imax.h +0 -22
  17. data/ext/nmatrix/math/long_dtype.h +0 -3
  18. data/ext/nmatrix/math/math.h +11 -337
  19. data/ext/nmatrix/math/nrm2.h +2 -23
  20. data/ext/nmatrix/math/rot.h +1 -25
  21. data/ext/nmatrix/math/rotg.h +4 -13
  22. data/ext/nmatrix/math/scal.h +0 -22
  23. data/ext/nmatrix/math/trsm.h +0 -55
  24. data/ext/nmatrix/math/util.h +148 -0
  25. data/ext/nmatrix/nmatrix.cpp +0 -14
  26. data/ext/nmatrix/nmatrix.h +92 -84
  27. data/ext/nmatrix/ruby_constants.cpp +0 -2
  28. data/ext/nmatrix/ruby_constants.h +0 -2
  29. data/ext/nmatrix/ruby_nmatrix.c +86 -45
  30. data/ext/nmatrix/storage/dense/dense.cpp +1 -7
  31. data/ext/nmatrix/storage/storage.h +0 -1
  32. data/ext/nmatrix/ttable_helper.rb +0 -6
  33. data/ext/nmatrix/util/io.cpp +1 -1
  34. data/lib/nmatrix.rb +1 -19
  35. data/lib/nmatrix/blas.rb +33 -11
  36. data/lib/nmatrix/io/market.rb +3 -3
  37. data/lib/nmatrix/lapack_core.rb +181 -0
  38. data/lib/nmatrix/lapack_plugin.rb +44 -0
  39. data/lib/nmatrix/math.rb +382 -131
  40. data/lib/nmatrix/monkeys.rb +2 -3
  41. data/lib/nmatrix/nmatrix.rb +166 -13
  42. data/lib/nmatrix/shortcuts.rb +72 -7
  43. data/lib/nmatrix/version.rb +2 -2
  44. data/spec/00_nmatrix_spec.rb +154 -5
  45. data/spec/02_slice_spec.rb +2 -6
  46. data/spec/03_nmatrix_monkeys_spec.rb +7 -1
  47. data/spec/blas_spec.rb +60 -33
  48. data/spec/homogeneous_spec.rb +10 -10
  49. data/spec/lapack_core_spec.rb +482 -0
  50. data/spec/math_spec.rb +436 -52
  51. data/spec/shortcuts_spec.rb +28 -4
  52. data/spec/spec_helper.rb +14 -2
  53. data/spec/utm5940.mtx +83844 -0
  54. metadata +49 -76
  55. data/.gitignore +0 -27
  56. data/.rspec +0 -2
  57. data/.travis.yml +0 -15
  58. data/CONTRIBUTING.md +0 -82
  59. data/Gemfile +0 -2
  60. data/History.txt +0 -677
  61. data/LICENSE.txt +0 -23
  62. data/Manifest.txt +0 -92
  63. data/README.rdoc +0 -150
  64. data/Rakefile +0 -216
  65. data/ext/nmatrix/data/rational.h +0 -440
  66. data/ext/nmatrix/math/geev.h +0 -82
  67. data/ext/nmatrix/math/ger.h +0 -96
  68. data/ext/nmatrix/math/gesdd.h +0 -80
  69. data/ext/nmatrix/math/gesvd.h +0 -78
  70. data/ext/nmatrix/math/getf2.h +0 -86
  71. data/ext/nmatrix/math/getri.h +0 -108
  72. data/ext/nmatrix/math/potrs.h +0 -129
  73. data/ext/nmatrix/math/swap.h +0 -52
  74. data/lib/nmatrix/lapack.rb +0 -240
  75. data/nmatrix.gemspec +0 -55
  76. data/scripts/mac-brew-gcc.sh +0 -50
  77. data/scripts/mac-mavericks-brew-gcc.sh +0 -22
  78. data/spec/lapack_spec.rb +0 -459
@@ -44,9 +44,6 @@ namespace nm {
44
44
  template <> struct ctype_to_dtype_enum<double> { static const nm::dtype_t value_type = nm::FLOAT64; };
45
45
  template <> struct ctype_to_dtype_enum<Complex64> { static const nm::dtype_t value_type = nm::COMPLEX64; };
46
46
  template <> struct ctype_to_dtype_enum<Complex128> { static const nm::dtype_t value_type = nm::COMPLEX128; };
47
- template <> struct ctype_to_dtype_enum<Rational32> { static const nm::dtype_t value_type = nm::RATIONAL32; };
48
- template <> struct ctype_to_dtype_enum<Rational64> { static const nm::dtype_t value_type = nm::RATIONAL64; };
49
- template <> struct ctype_to_dtype_enum<Rational128> { static const nm::dtype_t value_type = nm::RATIONAL128; };
50
47
  template <> struct ctype_to_dtype_enum<RubyObject> { static const nm::dtype_t value_type = nm::RUBYOBJ; };
51
48
 
52
49
 
@@ -60,11 +57,8 @@ namespace nm {
60
57
  template <> struct dtype_enum_T<nm::FLOAT64> { typedef double type; };
61
58
  template <> struct dtype_enum_T<nm::COMPLEX64> { typedef nm::Complex64 type; };
62
59
  template <> struct dtype_enum_T<nm::COMPLEX128> { typedef nm::Complex128 type; };
63
- template <> struct dtype_enum_T<nm::RATIONAL32> { typedef nm::Rational32 type; };
64
- template <> struct dtype_enum_T<nm::RATIONAL64> { typedef nm::Rational64 type; };
65
- template <> struct dtype_enum_T<nm::RATIONAL128> { typedef nm::Rational128 type; };
66
60
  template <> struct dtype_enum_T<nm::RUBYOBJ> { typedef nm::RubyObject type; };
67
61
 
68
62
  } // end namespace nm
69
63
 
70
- #endif
64
+ #endif
@@ -45,7 +45,7 @@
45
45
  /*
46
46
  * Macros
47
47
  */
48
- #define NM_RUBYVAL_IS_NUMERIC(val) (FIXNUM_P(val) or (TYPE(val) == T_FLOAT) or (TYPE(val) == T_COMPLEX) or (TYPE(val) == T_RATIONAL))
48
+ #define NM_RUBYVAL_IS_NUMERIC(val) (FIXNUM_P(val) or (TYPE(val) == T_FLOAT) or (TYPE(val) == T_COMPLEX))
49
49
  #define NMATRIX_CHECK_TYPE(val) \
50
50
  if (TYPE(val) != T_DATA || (RDATA(val)->dfree != (RUBY_DATA_FUNC)nm_delete && RDATA(val)->dfree != (RUBY_DATA_FUNC)nm_delete_ref)) \
51
51
  rb_raise(rb_eTypeError, "Expected NMatrix on left-hand side of operation.");
@@ -76,12 +76,6 @@ class RubyObject {
76
76
  template <typename FloatType, typename = typename std::enable_if<std::is_floating_point<FloatType>::value>::type>
77
77
  inline RubyObject(const Complex<FloatType>& other) : rval(rb_complex_new(rb_float_new(other.r), rb_float_new(other.i))) {}
78
78
 
79
- /*
80
- * Rational number constructor.
81
- */
82
- template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
83
- inline RubyObject(const Rational<IntType>& other) : rval(rb_rational_new(INT2FIX(other.n), INT2FIX(other.d))) {}
84
-
85
79
  /*
86
80
  * Integer constructor.
87
81
  *
@@ -123,10 +117,6 @@ class RubyObject {
123
117
  inline operator double() const { RETURN_OBJ2NUM(NUM2DBL) }
124
118
  inline operator float() const { RETURN_OBJ2NUM(NUM2DBL) }
125
119
 
126
- inline operator Rational32() const { return this->to<Rational32>(); }
127
- inline operator Rational64() const { return this->to<Rational64>(); }
128
- inline operator Rational128() const { return this->to<Rational128>(); }
129
-
130
120
  inline operator Complex64() const { return this->to<Complex64>(); }
131
121
  inline operator Complex128() const { return this->to<Complex128>(); }
132
122
  /*
@@ -255,20 +245,6 @@ class RubyObject {
255
245
  return *this != RubyObject(other);
256
246
  }
257
247
  */
258
- //////////////////////////////
259
- // RUBY-RATIONAL OPERATIONS //
260
- //////////////////////////////
261
-
262
- template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
263
- inline bool operator==(const Rational<IntType>& other) const {
264
- return *this == RubyObject(other);
265
- }
266
-
267
- template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
268
- inline bool operator!=(const Rational<IntType>& other) const {
269
- return *this != RubyObject(other);
270
- }
271
-
272
248
  //////////////////////////////
273
249
  // RUBY-COMPLEX OPERATIONS //
274
250
  //////////////////////////////
@@ -304,7 +280,7 @@ class RubyObject {
304
280
  */
305
281
  template <typename ComplexType>
306
282
  inline typename std::enable_if<made_from_same_template<ComplexType, Complex64>::value, ComplexType>::type to(void) const {
307
- if (FIXNUM_P(this->rval) or TYPE(this->rval) == T_FLOAT or TYPE(this->rval) == T_RATIONAL) {
283
+ if (FIXNUM_P(this->rval) or TYPE(this->rval) == T_FLOAT) {
308
284
  return ComplexType(NUM2DBL(this->rval));
309
285
 
310
286
  } else if (TYPE(this->rval) == T_COMPLEX) {
@@ -314,25 +290,8 @@ class RubyObject {
314
290
  rb_raise(rb_eTypeError, "Invalid conversion to Complex type.");
315
291
  }
316
292
  }
317
-
318
- /*
319
- * Convert a Ruby object to a rational number.
320
- */
321
- template <typename RationalType>
322
- inline typename std::enable_if<made_from_same_template<RationalType, Rational32>::value, RationalType>::type to(void) const {
323
- if (FIXNUM_P(this->rval) or TYPE(this->rval) == T_FLOAT or TYPE(this->rval) == T_COMPLEX) {
324
- return RationalType(NUM2INT(this->rval));
325
-
326
- } else if (TYPE(this->rval) == T_RATIONAL) {
327
- return RationalType(NUM2INT(rb_funcall(this->rval, nm_rb_numer, 0)), NUM2INT(rb_funcall(this->rval, nm_rb_denom, 0)));
328
-
329
- } else {
330
- rb_raise(rb_eTypeError, "Invalid conversion to Rational type.");
331
- }
332
- }
333
-
334
293
  };
335
-
294
+
336
295
  // Negative operator
337
296
  inline RubyObject operator-(const RubyObject& rhs) {
338
297
  return RubyObject(rb_funcall(rhs.rval, nm_rb_negate, 0));
@@ -413,47 +372,6 @@ inline bool operator>(const Complex<FloatType>& left, const RubyObject& right) {
413
372
  return RubyObject(left) > right;
414
373
  }
415
374
 
416
-
417
-
418
- //////////////////////////////
419
- // RATIONAL-RUBY OPERATIONS //
420
- //////////////////////////////
421
-
422
- template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
423
- inline bool operator==(const Rational<IntType>& left, const RubyObject& right) {
424
- return RubyObject(left) == right;
425
- }
426
-
427
- template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
428
- inline bool operator!=(const Rational<IntType>& left, const RubyObject& right) {
429
- return RubyObject(left) != right;
430
- }
431
-
432
- template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
433
- inline bool operator>=(const Rational<IntType>& left, const RubyObject& right) {
434
- return RubyObject(left) >= right;
435
- }
436
-
437
- template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
438
- inline bool operator<=(const Rational<IntType>& left, const RubyObject& right) {
439
- return RubyObject(left) <= right;
440
- }
441
-
442
- template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
443
- inline bool operator<(const Rational<IntType>& left, const RubyObject& right) {
444
- return RubyObject(left) < right;
445
- }
446
-
447
- template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
448
- inline bool operator>(const Rational<IntType>& left, const RubyObject& right) {
449
- return RubyObject(left) > right;
450
- }
451
-
452
- inline std::ostream& operator<<(std::ostream& out, const RubyObject& rhs) {
453
- out << "RUBYOBJECT" << std::flush; // FIXME: Try calling inspect or something on the Ruby object if we really need to debug it.
454
- return out;
455
- }
456
-
457
375
  } // end of namespace nm
458
376
 
459
377
  namespace std {
@@ -155,83 +155,12 @@ else
155
155
  puts "g++ reports version... " + `#{CONFIG['CXX']} --version|head -n 1|cut -f 3 -d " "`
156
156
  end
157
157
 
158
- # add smmp in to get generic transp; remove smmp2 to eliminate funcptr transp
159
-
160
- # The next line allows the user to supply --with-atlas-dir=/usr/local/atlas,
161
- # --with-atlas-lib or --with-atlas-include and tell the compiler where to look
162
- # for ATLAS. The same for all the others
163
- #
164
- #dir_config("clapack", ["/usr/local/atlas/include"], [])
165
- #
166
- #
167
-
168
- # Is g++ having trouble finding your header files?
169
- # Try this:
170
- # export C_INCLUDE_PATH=/usr/local/atlas/include
171
- # export CPLUS_INCLUDE_PATH=/usr/local/atlas/include
172
- # (substituting in the path of your cblas.h and clapack.h for the path I used). -- JW 8/27/12
173
-
174
- idefaults = {lapack: ["/usr/include/atlas"],
175
- cblas: ["/usr/local/atlas/include", "/usr/include/atlas"],
176
- atlas: ["/usr/local/atlas/include", "/usr/include/atlas"]}
177
-
178
- # For some reason, if we try to look for /usr/lib64/atlas on a Mac OS X Mavericks system, and the directory does not
179
- # exist, it will give a linker error -- even if the lib dir is already correctly included with -L. So we need to check
180
- # that Dir.exists?(d) for each.
181
- ldefaults = {lapack: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) },
182
- cblas: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) },
183
- atlas: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) }}
184
-
185
- if have_library("clapack") # Usually only applies for Mac OS X
186
- $libs += " -lclapack "
187
- end
188
-
189
- unless have_library("lapack")
190
- dir_config("lapack", idefaults[:lapack], ldefaults[:lapack])
191
- end
192
-
193
- unless have_library("cblas")
194
- dir_config("cblas", idefaults[:cblas], ldefaults[:cblas])
195
- end
196
-
197
- unless have_library("atlas")
198
- dir_config("atlas", idefaults[:atlas], ldefaults[:atlas])
199
- end
200
-
201
- # If BLAS and LAPACK headers are in an atlas directory, prefer those. Otherwise,
202
- # we try our luck with the default location.
203
- if have_header("atlas/cblas.h")
204
- have_header("atlas/clapack.h")
205
- else
206
- have_header("cblas.h")
207
- have_header("clapack.h")
208
- end
209
-
210
-
211
- # Although have_func is supposed to take a list as its second argument, I find that it simply
212
- # applies a :to_s to the second arg and doesn't actually check each one. We may want to put
213
- # have_func calls inside an :each block which checks atlas/clapack.h, cblas.h, clapack.h, and
214
- # lastly lapack.h. On Ubuntu, it only works if I use atlas/clapack.h. --@mohawkjohn 8/20/14
215
- have_func("clapack_dgetrf", "atlas/clapack.h")
216
- have_func("clapack_dgetri", "atlas/clapack.h")
217
- have_func("dgesvd_", "clapack.h") # This may not do anything. dgesvd_ seems to be in LAPACK, not CLAPACK.
218
-
219
- have_func("cblas_dgemm", "cblas.h")
220
-
221
- #have_func("rb_scan_args", "ruby.h")
222
-
223
- #find_library("lapack", "clapack_dgetrf")
224
- #find_library("cblas", "cblas_dgemm")
225
- #find_library("atlas", "ATL_dgemmNN")
226
- # Order matters here: ATLAS has to go after LAPACK: http://mail.scipy.org/pipermail/scipy-user/2007-January/010717.html
227
- $libs += " -llapack -lcblas -latlas "
228
158
  #$libs += " -lprofiler "
229
159
 
230
-
231
160
  # For release, these next two should both be changed to -O3.
232
- $CFLAGS += " -O3" #" -O0 -g "
161
+ $CFLAGS += " -O3 "
233
162
  #$CFLAGS += " -static -O0 -g "
234
- $CPPFLAGS += " -O3 -std=#{$CPP_STANDARD}" #" -O0 -g -std=#{$CPP_STANDARD} " #-fmax-errors=10 -save-temps
163
+ $CPPFLAGS += " -O3 -std=#{$CPP_STANDARD} " #-fmax-errors=10 -save-temps
235
164
  #$CPPFLAGS += " -static -O0 -g -std=#{$CPP_STANDARD} "
236
165
 
237
166
  CONFIG['warnflags'].gsub!('-Wshorten-64-to-32', '') # doesn't work except in Mac-patched gcc (4.2)
@@ -23,90 +23,103 @@
23
23
  //
24
24
  // == math.cpp
25
25
  //
26
- // Ruby-exposed BLAS functions.
26
+ // Ruby-exposed CBLAS and LAPACK functions that are available without
27
+ // an external library.
27
28
  //
28
- // === Procedure for adding LAPACK or CBLAS functions to math.cpp/math.h:
29
+ // === Procedure for adding CBLAS functions to math.cpp/math.h:
29
30
  //
30
31
  // This procedure is written as if for a fictional function with double
31
- // version dbacon, which we'll say is from LAPACK.
32
+ // version dbacon, which we'll say is from CBLAS.
32
33
  //
33
34
  // 1. Write a default templated version which probably returns a boolean.
34
35
  // Call it bacon, and put it in math.h.
35
36
  //
36
- // Order will always be row-major, so we don't need to pass that.
37
- // CBLAS_TRANSPOSE-type arguments, however, should be passed.
38
- //
39
- // Otherwise, arguments should look like those in cblas.h or clapack.h:
40
- //
41
37
  // template <typename DType>
42
38
  // bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, DType* A, ...) {
43
39
  // rb_raise(rb_eNotImpError, "only implemented for ATLAS types (float32, float64, complex64, complex128)");
44
40
  // }
45
41
  //
42
+ // Make sure this is in namespace nm::math
43
+ //
46
44
  // 2. In math.cpp, add a templated inline static version of the function which takes
47
- // only void* pointers and uses reinterpret_cast to convert them to the
48
- // proper dtype.
45
+ // only void* pointers and uses static_cast to convert them to the
46
+ // proper dtype. This should also be in namespace nm::math
49
47
  //
50
48
  // This function may also need to switch m and n if these arguments are given.
51
49
  //
52
50
  // For an example, see cblas_gemm. This function should do nothing other than cast
53
- // appropriately. If clapack_dbacon, clapack_sbacon, clapack_cbacon, and clapack_zbacon
51
+ // appropriately. If cblas_dbacon, cblas_sbacon, cblas_cbacon, and cblas_zbacon
54
52
  // all take void* only, and no other pointers that vary between functions, you can skip
55
53
  // this particular step -- as we can call them directly using a custom function pointer
56
54
  // array (same function signature!).
57
55
  //
58
- // This version of the function will be the one exposed through NMatrix::LAPACK. We
59
- // want it to be as close to the actual LAPACK version of the function as possible,
56
+ // This version of the function will be the one exposed through NMatrix::BLAS. We
57
+ // want it to be as close to the actual BLAS version of the function as possible,
60
58
  // and with as few checks as possible.
61
59
  //
62
60
  // You will probably need a forward declaration in the extern "C" block.
63
61
  //
64
62
  // Note: In that case, the function you wrote in Step 1 should also take exactly the
65
- // same arguments as clapack_xbacon. Otherwise Bad Things will happen.
63
+ // same arguments as cblas_xbacon. Otherwise Bad Things will happen.
66
64
  //
67
- // 3. In math.cpp, add inline specialized versions of bacon for the different ATLAS types.
65
+ // 3. In cblas_templates_core.h, add a default template like in step 1 (which will just
66
+ // call nm::math::bacon()) and also
67
+ // inline specialized versions of bacon for the different BLAS types.
68
+ // This will allow both nmatrix-atlas and nmatrix-lapacke to use the optimized version
69
+ // of bacon from whatever external library is available, as well as the internal version
70
+ // if an external version is not available. These functions will end up in a namsespace
71
+ // like nm::math::atlas, but don't explicitly put them in a namespace, they will get
72
+ // put in the appropriate namespace when cblas_templates_core.h is included.
68
73
  //
69
- // You could do this with a macro, if the arguments are all similar (see #define LAPACK_GETRF).
70
- // Or you may prefer to do it by hand:
74
+ // template <typename DType>
75
+ // inline bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, DType* A, ...) {
76
+ // nm::math::bacon(trans, M, N, A, ...);
77
+ // }
71
78
  //
72
79
  // template <>
73
80
  // inline bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, float* A, ...) {
74
- // clapack_sbacon(trans, M, N, A, ...);
81
+ // cblas_sbacon(trans, M, N, A, ...);
75
82
  // return true;
76
83
  // }
77
84
  //
78
- // Make sure these functions are in the namespace nm::math.
79
- //
80
85
  // Note that you should do everything in your power here to parse any return values
81
- // clapack_sbacon may give you. We're not trying very hard in this example, but you might
86
+ // cblas_sbacon may give you. We're not trying very hard in this example, but you might
82
87
  // look at getrf to see how it might be done.
83
88
  //
84
- // 4. Expose the function in nm_math_init_blas(), in math.cpp:
89
+ // 4. Write the C function nm_cblas_bacon, which is what Ruby will call. Use the example
90
+ // of nm_cblas_gemm below. Also you must add a similar function in math_atlas.cpp
91
+ // and math_lapacke.cpp
85
92
  //
86
- // rb_define_singleton_method(cNMatrix_LAPACK, "clapack_bacon", (METHOD)nm_lapack_bacon, 5);
93
+ // 5. Expose the function in nm_math_init_blas(), in math.cpp:
87
94
  //
88
- // Here, we're telling Ruby that nm_lapack_bacon takes five arguments as a Ruby function.
95
+ // rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_bacon", (METHOD)nm_cblas_bacon, 5);
89
96
  //
90
- // 5. In blas.rb, write a bacon function which accesses clapack_bacon, but does all the
97
+ // Do something similar in math_atlas.cpp and math_lapacke.cpp to add the function
98
+ // to the plugin gems.
99
+ //
100
+ // Here, we're telling Ruby that nm_cblas_bacon takes five arguments as a Ruby function.
101
+ //
102
+ // 6. In blas.rb, write a bacon function which accesses cblas_bacon, but does all the
91
103
  // sanity checks we left out in step 2.
92
104
  //
93
- // 6. Write tests for NMatrix::LAPACK::getrf, confirming that it works for the ATLAS dtypes.
105
+ // 7. Write tests for NMatrix::BLAS::bacon, confirming that it works for the ATLAS dtypes.
94
106
  //
95
- // 7. After you get it working properly with ATLAS, download dbacon.f from NETLIB, and use
107
+ // 8. After you get it working properly with CBLAS, download dbacon.f from NETLIB, and use
96
108
  // f2c to convert it to C. Clean it up so it's readable. Remove the extra indices -- f2c
97
109
  // inserts a lot of unnecessary stuff.
98
110
  //
99
111
  // Copy and paste the output into the default templated function you wrote in Step 1.
100
112
  // Fix it so it works as a template instead of just for doubles.
101
113
  //
102
- // 8. Write tests to confirm that it works for integers, rationals, and Ruby objects.
114
+ // Because of step 3, this will automatically also work for the nmatrix-atlas
115
+ // and nmatrix-lapacke implementations.
116
+ //
117
+ // 9. Write tests to confirm that it works for all data types.
103
118
  //
104
- // 9. See about adding a Ruby-like interface, such as matrix_matrix_multiply for cblas_gemm,
119
+ // 10. See about adding a Ruby-like interface, such as matrix_matrix_multiply for cblas_gemm,
105
120
  // or matrix_vector_multiply for cblas_gemv. This step is not mandatory.
106
121
  //
107
- // 10. Pull request!
108
-
109
-
122
+ // 11. Pull request!
110
123
 
111
124
  /*
112
125
  * Project Includes
@@ -117,30 +130,23 @@
117
130
  #include <limits>
118
131
  #include <cmath>
119
132
 
120
- #include "math/inc.h"
133
+ #include "math/cblas_enums.h"
134
+
121
135
  #include "data/data.h"
122
- #include "math/gesdd.h"
123
- #include "math/gesvd.h"
124
- #include "math/geev.h"
125
- #include "math/swap.h"
126
136
  #include "math/imax.h"
127
137
  #include "math/scal.h"
128
- #include "math/ger.h"
129
- #include "math/getf2.h"
130
138
  #include "math/laswp.h"
131
139
  #include "math/trsm.h"
132
- #include "math/long_dtype.h" // for gemm.h
133
140
  #include "math/gemm.h"
134
141
  #include "math/gemv.h"
135
142
  #include "math/asum.h"
136
143
  #include "math/nrm2.h"
137
144
  #include "math/getrf.h"
138
- #include "math/getri.h"
139
145
  #include "math/getrs.h"
140
- #include "math/potrs.h"
141
146
  #include "math/rot.h"
142
147
  #include "math/rotg.h"
143
148
  #include "math/math.h"
149
+ #include "math/util.h"
144
150
  #include "storage/dense/dense.h"
145
151
 
146
152
  #include "nmatrix.h"
@@ -151,12 +157,6 @@
151
157
  */
152
158
 
153
159
  extern "C" {
154
- #if defined HAVE_CLAPACK_H
155
- #include <clapack.h>
156
- #elif defined HAVE_ATLAS_CLAPACK_H
157
- #include <atlas/clapack.h>
158
- #endif
159
-
160
160
  /* BLAS Level 1. */
161
161
  static VALUE nm_cblas_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx);
162
162
  static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx);
@@ -166,34 +166,20 @@ extern "C" {
166
166
  static VALUE nm_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx);
167
167
 
168
168
  /* BLAS Level 2. */
169
+ static VALUE nm_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
170
+ VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);
171
+
169
172
  /* BLAS Level 3. */
170
173
  static VALUE nm_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha,
171
174
  VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc);
172
- static VALUE nm_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
173
- VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);
174
175
  static VALUE nm_cblas_trsm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
175
176
  VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
176
- static VALUE nm_cblas_trmm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
177
- VALUE alpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
178
- static VALUE nm_cblas_herk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
179
- VALUE lda, VALUE beta, VALUE c, VALUE ldc);
180
- static VALUE nm_cblas_syrk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
181
- VALUE lda, VALUE beta, VALUE c, VALUE ldc);
182
177
 
183
178
  /* LAPACK. */
184
179
  static VALUE nm_has_clapack(VALUE self);
185
180
  static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda);
186
- static VALUE nm_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
187
181
  static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb);
188
- static VALUE nm_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb);
189
- static VALUE nm_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv);
190
- static VALUE nm_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
191
182
  static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx);
192
- static VALUE nm_clapack_lauum(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
193
-
194
- static VALUE nm_lapack_gesvd(VALUE self, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lworkspace_size);
195
- static VALUE nm_lapack_gesdd(VALUE self, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lworkspace_size);
196
- static VALUE nm_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr, VALUE lwork);
197
183
  } // end of extern "C" block
198
184
 
199
185
  ////////////////////
@@ -230,6 +216,11 @@ namespace nm {
230
216
  }
231
217
  }
232
218
 
219
+ //we can't do det_exact on byte, because it will want to return a byte (unsigned), but determinants can be negative, even if all elements of the matrix are positive
220
+ template <>
221
+ void det_exact<uint8_t>(const int M, const void* A_elements, const int lda, void* result_arg) {
222
+ rb_raise(nm_eDataTypeError, "cannot call det_exact on unsigned type");
223
+ }
233
224
 
234
225
  /*
235
226
  * Calculates in-place inverse of A_elements. Uses Gauss-Jordan elimination technique.
@@ -308,6 +299,80 @@ namespace nm {
308
299
  delete[] col_index;
309
300
  }
310
301
 
302
+ /*
303
+ * Reduce a square matrix to hessenberg form with householder transforms
304
+ *
305
+ * == Arguments
306
+ *
307
+ * nrows - The number of rows present in matrix a.
308
+ * a_elements - Elements of the matrix to be reduced in 1D array form.
309
+ *
310
+ * == References
311
+ *
312
+ * http://www.mymathlib.com/c_source/matrices/eigen/hessenberg_orthog.c
313
+ * This code has been included by permission of the author.
314
+ */
315
+ template <typename DType>
316
+ void hessenberg(const int nrows, void* a_elements) {
317
+ DType* a = reinterpret_cast<DType*>(a_elements);
318
+ DType* u = new DType[nrows]; // auxillary storage for the chosen vector
319
+ DType sum_of_squares, *p_row, *psubdiag, *p_a, scale, innerproduct;
320
+ int i, k, col;
321
+
322
+ // For each column use a Householder transformation to zero all entries
323
+ // below the subdiagonal.
324
+ for (psubdiag = a + nrows, col = 0; col < nrows - 2; psubdiag += nrows + 1,
325
+ col++) {
326
+ // Calculate the signed square root of the sum of squares of the
327
+ // elements below the diagonal.
328
+
329
+ for (p_a = psubdiag, sum_of_squares = 0.0, i = col + 1; i < nrows;
330
+ p_a += nrows, i++) {
331
+ sum_of_squares += *p_a * *p_a;
332
+ }
333
+ if (sum_of_squares == 0.0) { continue; }
334
+ sum_of_squares = std::sqrt(sum_of_squares);
335
+
336
+ if ( *psubdiag >= 0.0 ) { sum_of_squares = -sum_of_squares; }
337
+
338
+ // Calculate the Householder transformation Q = I - 2uu'/u'u.
339
+ u[col + 1] = *psubdiag - sum_of_squares;
340
+ *psubdiag = sum_of_squares;
341
+
342
+ for (p_a = psubdiag + nrows, i = col + 2; i < nrows; p_a += nrows, i++) {
343
+ u[i] = *p_a;
344
+ *p_a = 0.0;
345
+ }
346
+
347
+ // Premultiply A by Q
348
+ scale = -1.0 / (sum_of_squares * u[col+1]);
349
+ for (p_row = psubdiag - col, i = col + 1; i < nrows; i++) {
350
+ p_a = a + nrows * (col + 1) + i;
351
+ for (innerproduct = 0.0, k = col + 1; k < nrows; p_a += nrows, k++) {
352
+ innerproduct += u[k] * *p_a;
353
+ }
354
+ innerproduct *= scale;
355
+ for (p_a = p_row + i, k = col + 1; k < nrows; p_a += nrows, k++) {
356
+ *p_a -= u[k] * innerproduct;
357
+ }
358
+ }
359
+
360
+ // Postmultiply QA by Q
361
+ for (p_row = a, i = 0; i < nrows; p_row += nrows, i++) {
362
+ for (innerproduct = 0.0, k = col + 1; k < nrows; k++) {
363
+ innerproduct += u[k] * *(p_row + k);
364
+ }
365
+ innerproduct *= scale;
366
+
367
+ for (k = col + 1; k < nrows; k++) {
368
+ *(p_row + k) -= u[k] * innerproduct;
369
+ }
370
+ }
371
+ }
372
+
373
+ delete[] u;
374
+ }
375
+
311
376
  /*
312
377
  * Calculate the exact inverse for a dense matrix (A [elements]) of size 2 or 3. Places the result in B_elements.
313
378
  */
@@ -318,6 +383,10 @@ namespace nm {
318
383
 
319
384
  if (M == 2) {
320
385
  DType det = A[0] * A[lda+1] - A[1] * A[lda];
386
+ if (det == 0) {
387
+ rb_raise(nm_eNotInvertibleError,
388
+ "matrix must have non-zero determinant to be invertible (not getting this error does not mean matrix is invertible if you're dealing with floating points)");
389
+ }
321
390
  B[0] = A[lda+1] / det;
322
391
  B[1] = -A[1] / det;
323
392
  B[ldb] = -A[lda] / det;
@@ -328,7 +397,8 @@ namespace nm {
328
397
  DType det;
329
398
  det_exact<DType>(M, A_elements, lda, reinterpret_cast<void*>(&det));
330
399
  if (det == 0) {
331
- rb_raise(nm_eNotInvertibleError, "matrix must have non-zero determinant to be invertible (not getting this error does not mean matrix is invertible if you're dealing with floating points)");
400
+ rb_raise(nm_eNotInvertibleError,
401
+ "matrix must have non-zero determinant to be invertible (not getting this error does not mean matrix is invertible if you're dealing with floating points)");
332
402
  }
333
403
 
334
404
  B[0] = ( A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]) / det; // A = ei - fh
@@ -347,22 +417,6 @@ namespace nm {
347
417
  }
348
418
  }
349
419
 
350
- /*
351
- * Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
352
- */
353
- template <typename DType, typename CType>
354
- inline static int lapack_gesvd(char jobu, char jobvt, int m, int n, void* a, int lda, void* s, void* u, int ldu, void* vt, int ldvt, void* work, int lwork, void* rwork) {
355
- return gesvd<DType,CType>(jobu, jobvt, m, n, reinterpret_cast<DType*>(a), lda, reinterpret_cast<DType*>(s), reinterpret_cast<DType*>(u), ldu, reinterpret_cast<DType*>(vt), ldvt, reinterpret_cast<DType*>(work), lwork, reinterpret_cast<CType*>(rwork));
356
- }
357
-
358
- /*
359
- * Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
360
- */
361
- template <typename DType, typename CType>
362
- inline static int lapack_gesdd(char jobz, int m, int n, void* a, int lda, void* s, void* u, int ldu, void* vt, int ldvt, void* work, int lwork, int* iwork, void* rwork) {
363
- return gesdd<DType,CType>(jobz, m, n, reinterpret_cast<DType*>(a), lda, reinterpret_cast<DType*>(s), reinterpret_cast<DType*>(u), ldu, reinterpret_cast<DType*>(vt), ldvt, reinterpret_cast<DType*>(work), lwork, iwork, reinterpret_cast<CType*>(rwork));
364
- }
365
-
366
420
  /*
367
421
  * Function signature conversion for calling CBLAS' gemm functions as directly as possible.
368
422
  *
@@ -422,39 +476,6 @@ namespace nm {
422
476
  reinterpret_cast<const DType*>(a), lda, reinterpret_cast<DType*>(b), ldb);
423
477
  }
424
478
 
425
-
426
- /*
427
- * Function signature conversion for calling CBLAS' trmm functions as directly as possible.
428
- *
429
- * For documentation: http://www.netlib.org/blas/dtrmm.f
430
- */
431
- template <typename DType>
432
- inline static void cblas_trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
433
- const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const void* alpha,
434
- const void* A, const int lda, void* B, const int ldb)
435
- {
436
- trmm<DType>(order, side, uplo, ta, diag, m, n, reinterpret_cast<const DType*>(alpha),
437
- reinterpret_cast<const DType*>(A), lda, reinterpret_cast<DType*>(B), ldb);
438
- }
439
-
440
-
441
- /*
442
- * Function signature conversion for calling CBLAS' syrk functions as directly as possible.
443
- *
444
- * For documentation: http://www.netlib.org/blas/dsyrk.f
445
- */
446
- template <typename DType>
447
- inline static void cblas_syrk(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const enum CBLAS_TRANSPOSE trans,
448
- const int n, const int k, const void* alpha,
449
- const void* A, const int lda, const void* beta, void* C, const int ldc)
450
- {
451
- syrk<DType>(order, uplo, trans, n, k, reinterpret_cast<const DType*>(alpha),
452
- reinterpret_cast<const DType*>(A), lda, reinterpret_cast<const DType*>(beta), reinterpret_cast<DType*>(C), ldc);
453
- }
454
-
455
-
456
-
457
-
458
479
  }
459
480
  } // end of namespace nm::math
460
481
 
@@ -466,80 +487,29 @@ extern "C" {
466
487
  ///////////////////
467
488
 
468
489
  void nm_math_init_blas() {
469
- cNMatrix_LAPACK = rb_define_module_under(cNMatrix, "LAPACK");
490
+ VALUE cNMatrix_Internal = rb_define_module_under(cNMatrix, "Internal");
470
491
 
471
492
  rb_define_singleton_method(cNMatrix, "has_clapack?", (METHOD)nm_has_clapack, 0);
472
493
 
473
- /* ATLAS-CLAPACK Functions */
474
- rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getrf", (METHOD)nm_clapack_getrf, 5);
475
- rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potrf", (METHOD)nm_clapack_potrf, 5);
476
- rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getrs", (METHOD)nm_clapack_getrs, 9);
477
- rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potrs", (METHOD)nm_clapack_potrs, 8);
478
- rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getri", (METHOD)nm_clapack_getri, 5);
479
- rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potri", (METHOD)nm_clapack_potri, 5);
480
- rb_define_singleton_method(cNMatrix_LAPACK, "clapack_laswp", (METHOD)nm_clapack_laswp, 7);
481
- rb_define_singleton_method(cNMatrix_LAPACK, "clapack_lauum", (METHOD)nm_clapack_lauum, 5);
482
-
483
- /* Non-ATLAS regular LAPACK Functions called via Fortran interface */
484
- rb_define_singleton_method(cNMatrix_LAPACK, "lapack_gesvd", (METHOD)nm_lapack_gesvd, 12);
485
- rb_define_singleton_method(cNMatrix_LAPACK, "lapack_gesdd", (METHOD)nm_lapack_gesdd, 11);
486
- rb_define_singleton_method(cNMatrix_LAPACK, "lapack_geev", (METHOD)nm_lapack_geev, 12);
487
-
488
- cNMatrix_BLAS = rb_define_module_under(cNMatrix, "BLAS");
489
-
490
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_scal", (METHOD)nm_cblas_scal, 4);
491
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_nrm2", (METHOD)nm_cblas_nrm2, 3);
492
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_asum", (METHOD)nm_cblas_asum, 3);
493
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_rot", (METHOD)nm_cblas_rot, 7);
494
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_rotg", (METHOD)nm_cblas_rotg, 1);
495
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_imax", (METHOD)nm_cblas_imax, 3);
496
-
497
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemm", (METHOD)nm_cblas_gemm, 14);
498
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11);
499
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_trsm", (METHOD)nm_cblas_trsm, 12);
500
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_trmm", (METHOD)nm_cblas_trmm, 12);
501
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_syrk", (METHOD)nm_cblas_syrk, 11);
502
- rb_define_singleton_method(cNMatrix_BLAS, "cblas_herk", (METHOD)nm_cblas_herk, 11);
503
- }
504
-
505
- /*
506
- * Interprets lapack jobu and jobvt arguments, for which LAPACK needs character values A, S, O, or N.
507
- *
508
- * Called by lapack_gesvd -- basically inline. svd stands for singular value decomposition.
509
- */
510
- static inline char lapack_svd_job_sym(VALUE op) {
511
- if (rb_to_id(op) == rb_intern("all") || rb_to_id(op) == rb_intern("a")) return 'A';
512
- else if (rb_to_id(op) == rb_intern("return") || rb_to_id(op) == rb_intern("s")) return 'S';
513
- else if (rb_to_id(op) == rb_intern("overwrite") || rb_to_id(op) == rb_intern("o")) return 'O';
514
- else if (rb_to_id(op) == rb_intern("none") || rb_to_id(op) == rb_intern("n")) return 'N';
515
- else rb_raise(rb_eArgError, "Expected :all, :return, :overwrite, :none (or :a, :s, :o, :n, respectively)");
516
- return 'a';
517
- }
494
+ VALUE cNMatrix_Internal_LAPACK = rb_define_module_under(cNMatrix_Internal, "LAPACK");
518
495
 
496
+ /* ATLAS-CLAPACK Functions that are implemented internally */
497
+ rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_getrf", (METHOD)nm_clapack_getrf, 5);
498
+ rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_getrs", (METHOD)nm_clapack_getrs, 9);
499
+ rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_laswp", (METHOD)nm_clapack_laswp, 7);
519
500
 
520
- /*
521
- * Interprets lapack jobvl and jobvr arguments, for which LAPACK needs character values N or V.
522
- *
523
- * Called by lapack_geev -- basically inline. evd stands for eigenvalue decomposition.
524
- */
525
- static inline char lapack_evd_job_sym(VALUE op) {
526
- if (op == Qfalse || op == Qnil || rb_to_id(op) == rb_intern("n")) return 'N';
527
- else return 'V';
528
- }
501
+ VALUE cNMatrix_Internal_BLAS = rb_define_module_under(cNMatrix_Internal, "BLAS");
529
502
 
503
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_scal", (METHOD)nm_cblas_scal, 4);
504
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_nrm2", (METHOD)nm_cblas_nrm2, 3);
505
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_asum", (METHOD)nm_cblas_asum, 3);
506
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_rot", (METHOD)nm_cblas_rot, 7);
507
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_rotg", (METHOD)nm_cblas_rotg, 1);
508
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_imax", (METHOD)nm_cblas_imax, 3);
530
509
 
531
- /* Interprets cblas argument which could be any of false/:no_transpose, :transpose, or :complex_conjugate,
532
- * into an enum recognized by cblas.
533
- *
534
- * Called by nm_cblas_gemm -- basically inline.
535
- *
536
- */
537
- static inline enum CBLAS_TRANSPOSE blas_transpose_sym(VALUE op) {
538
- if (op == Qfalse || rb_to_id(op) == nm_rb_no_transpose) return CblasNoTrans;
539
- else if (rb_to_id(op) == nm_rb_transpose) return CblasTrans;
540
- else if (rb_to_id(op) == nm_rb_complex_conjugate) return CblasConjTrans;
541
- else rb_raise(rb_eArgError, "Expected false, :transpose, or :complex_conjugate");
542
- return CblasNoTrans;
510
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_gemm", (METHOD)nm_cblas_gemm, 14);
511
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11);
512
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_trsm", (METHOD)nm_cblas_trsm, 12);
543
513
  }
544
514
 
545
515
  /*
@@ -572,55 +542,6 @@ static VALUE nm_cblas_scal(VALUE self, VALUE n, VALUE alpha, VALUE vector, VALUE
572
542
  return vector;
573
543
  }
574
544
 
575
- /*
576
- * Interprets cblas argument which could be :left or :right
577
- *
578
- * Called by nm_cblas_trsm -- basically inline
579
- */
580
- static inline enum CBLAS_SIDE blas_side_sym(VALUE op) {
581
- ID op_id = rb_to_id(op);
582
- if (op_id == nm_rb_left) return CblasLeft;
583
- if (op_id == nm_rb_right) return CblasRight;
584
- rb_raise(rb_eArgError, "Expected :left or :right for side argument");
585
- return CblasLeft;
586
- }
587
-
588
- /*
589
- * Interprets cblas argument which could be :upper or :lower
590
- *
591
- * Called by nm_cblas_trsm -- basically inline
592
- */
593
- static inline enum CBLAS_UPLO blas_uplo_sym(VALUE op) {
594
- ID op_id = rb_to_id(op);
595
- if (op_id == nm_rb_upper) return CblasUpper;
596
- if (op_id == nm_rb_lower) return CblasLower;
597
- rb_raise(rb_eArgError, "Expected :upper or :lower for uplo argument");
598
- return CblasUpper;
599
- }
600
-
601
-
602
- /*
603
- * Interprets cblas argument which could be :unit (true) or :nonunit (false or anything other than true/:unit)
604
- *
605
- * Called by nm_cblas_trsm -- basically inline
606
- */
607
- static inline enum CBLAS_DIAG blas_diag_sym(VALUE op) {
608
- if (rb_to_id(op) == nm_rb_unit || op == Qtrue) return CblasUnit;
609
- return CblasNonUnit;
610
- }
611
-
612
- /*
613
- * Interprets cblas argument which could be :row or :col
614
- */
615
- static inline enum CBLAS_ORDER blas_order_sym(VALUE op) {
616
- if (rb_to_id(op) == rb_intern("row") || rb_to_id(op) == rb_intern("row_major")) return CblasRowMajor;
617
- else if (rb_to_id(op) == rb_intern("col") || rb_to_id(op) == rb_intern("col_major") ||
618
- rb_to_id(op) == rb_intern("column") || rb_to_id(op) == rb_intern("column_major")) return CblasColMajor;
619
- rb_raise(rb_eArgError, "Expected :row or :col for order argument");
620
- return CblasRowMajor;
621
- }
622
-
623
-
624
545
  /*
625
546
  * Call any of the cblas_xrotg functions as directly as possible.
626
547
  *
@@ -641,10 +562,6 @@ static inline enum CBLAS_ORDER blas_order_sym(VALUE op) {
641
562
  * The outputs [c,s] will be returned in a Ruby Array at the end; the input
642
563
  * NMatrix will also be modified in-place.
643
564
  *
644
- * If you provide rationals, be aware that there's a high probability of an
645
- * error, since rotg includes a square root -- and most rationals' square roots
646
- * are irrational. You're better off converting to Float first.
647
- *
648
565
  * This function, like the other cblas_ functions, does minimal type-checking.
649
566
  */
650
567
  static VALUE nm_cblas_rotg(VALUE self, VALUE ab) {
@@ -654,14 +571,13 @@ static VALUE nm_cblas_rotg(VALUE self, VALUE ab) {
654
571
  nm::math::cblas_rotg<double>,
655
572
  nm::math::cblas_rotg<nm::Complex64>,
656
573
  nm::math::cblas_rotg<nm::Complex128>,
657
- NULL, NULL, NULL, // no rationals
658
574
  NULL //nm::math::cblas_rotg<nm::RubyObject>
659
575
  };
660
576
 
661
577
  nm::dtype_t dtype = NM_DTYPE(ab);
662
578
 
663
579
  if (!ttable[dtype]) {
664
- rb_raise(nm_eDataTypeError, "this operation undefined for integer and rational vectors");
580
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
665
581
  return Qnil;
666
582
 
667
583
  } else {
@@ -723,9 +639,6 @@ static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VAL
723
639
  nm::math::cblas_rot<double,double>,
724
640
  nm::math::cblas_rot<nm::Complex64,float>,
725
641
  nm::math::cblas_rot<nm::Complex128,double>,
726
- nm::math::cblas_rot<nm::Rational32,nm::Rational32>,
727
- nm::math::cblas_rot<nm::Rational64,nm::Rational64>,
728
- nm::math::cblas_rot<nm::Rational128,nm::Rational128>,
729
642
  nm::math::cblas_rot<nm::RubyObject,nm::RubyObject>
730
643
  };
731
644
 
@@ -792,17 +705,13 @@ static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx) {
792
705
  nm::math::cblas_nrm2<float64_t,float64_t>,
793
706
  nm::math::cblas_nrm2<float32_t,nm::Complex64>,
794
707
  nm::math::cblas_nrm2<float64_t,nm::Complex128>,
795
- //nm::math::cblas_nrm2<nm::Rational32,nm::Rational32>,
796
- //nm::math::cblas_nrm2<nm::Rational64,nm::Rational64>,
797
- //nm::math::cblas_nrm2<nm::Rational128,nm::Rational128>,
798
- NULL, NULL, NULL,
799
708
  nm::math::cblas_nrm2<nm::RubyObject,nm::RubyObject>
800
709
  };
801
710
 
802
711
  nm::dtype_t dtype = NM_DTYPE(x);
803
712
 
804
713
  if (!ttable[dtype]) {
805
- rb_raise(nm_eDataTypeError, "this operation undefined for integer and rational vectors");
714
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
806
715
  return Qnil;
807
716
 
808
717
  } else {
@@ -850,9 +759,6 @@ static VALUE nm_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx) {
850
759
  nm::math::cblas_asum<float64_t,float64_t>,
851
760
  nm::math::cblas_asum<float32_t,nm::Complex64>,
852
761
  nm::math::cblas_asum<float64_t,nm::Complex128>,
853
- nm::math::cblas_asum<nm::Rational32,nm::Rational32>,
854
- nm::math::cblas_asum<nm::Rational64,nm::Rational64>,
855
- nm::math::cblas_asum<nm::Rational128,nm::Rational128>,
856
762
  nm::math::cblas_asum<nm::RubyObject,nm::RubyObject>
857
763
  };
858
764
 
@@ -1000,10 +906,8 @@ static VALUE nm_cblas_trsm(VALUE self,
1000
906
  NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1001
907
  nm::math::cblas_trsm<float>,
1002
908
  nm::math::cblas_trsm<double>,
1003
- cblas_ctrsm, cblas_ztrsm, // call directly, same function signature!
1004
- nm::math::cblas_trsm<nm::Rational32>,
1005
- nm::math::cblas_trsm<nm::Rational64>,
1006
- nm::math::cblas_trsm<nm::Rational128>,
909
+ nm::math::cblas_trsm<nm::Complex64>,
910
+ nm::math::cblas_trsm<nm::Complex128>,
1007
911
  nm::math::cblas_trsm<nm::RubyObject>
1008
912
  };
1009
913
 
@@ -1021,358 +925,6 @@ static VALUE nm_cblas_trsm(VALUE self,
1021
925
  return Qtrue;
1022
926
  }
1023
927
 
1024
-
1025
- static VALUE nm_cblas_trmm(VALUE self,
1026
- VALUE order,
1027
- VALUE side, VALUE uplo,
1028
- VALUE trans_a, VALUE diag,
1029
- VALUE m, VALUE n,
1030
- VALUE alpha,
1031
- VALUE a, VALUE lda,
1032
- VALUE b, VALUE ldb)
1033
- {
1034
- static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER,
1035
- const enum CBLAS_SIDE, const enum CBLAS_UPLO,
1036
- const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
1037
- const int m, const int n, const void* alpha, const void* a,
1038
- const int lda, void* b, const int ldb) = {
1039
- NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1040
- nm::math::cblas_trmm<float>,
1041
- nm::math::cblas_trmm<double>,
1042
- cblas_ctrmm, cblas_ztrmm // call directly, same function signature!
1043
- /*
1044
- nm::math::cblas_trmm<nm::Rational32>,
1045
- nm::math::cblas_trmm<nm::Rational64>,
1046
- nm::math::cblas_trmm<nm::Rational128>,
1047
- nm::math::cblas_trmm<nm::RubyObject>*/
1048
- };
1049
-
1050
- nm::dtype_t dtype = NM_DTYPE(a);
1051
-
1052
- if (!ttable[dtype]) {
1053
- rb_raise(nm_eDataTypeError, "this matrix operation not yet defined for non-BLAS dtypes");
1054
- } else {
1055
- void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
1056
- rubyval_to_cval(alpha, dtype, pAlpha);
1057
-
1058
- ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
1059
- }
1060
-
1061
- return b;
1062
- }
1063
-
1064
-
1065
- static VALUE nm_cblas_syrk(VALUE self,
1066
- VALUE order,
1067
- VALUE uplo,
1068
- VALUE trans,
1069
- VALUE n, VALUE k,
1070
- VALUE alpha,
1071
- VALUE a, VALUE lda,
1072
- VALUE beta,
1073
- VALUE c, VALUE ldc)
1074
- {
1075
- static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE,
1076
- const int n, const int k, const void* alpha, const void* a,
1077
- const int lda, const void* beta, void* c, const int ldc) = {
1078
- NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1079
- nm::math::cblas_syrk<float>,
1080
- nm::math::cblas_syrk<double>,
1081
- cblas_csyrk, cblas_zsyrk// call directly, same function signature!
1082
- /*nm::math::cblas_trsm<nm::Rational32>,
1083
- nm::math::cblas_trsm<nm::Rational64>,
1084
- nm::math::cblas_trsm<nm::Rational128>,
1085
- nm::math::cblas_trsm<nm::RubyObject>*/
1086
- };
1087
-
1088
- nm::dtype_t dtype = NM_DTYPE(a);
1089
-
1090
- if (!ttable[dtype]) {
1091
- rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1092
- } else {
1093
- void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
1094
- *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
1095
- rubyval_to_cval(alpha, dtype, pAlpha);
1096
- rubyval_to_cval(beta, dtype, pBeta);
1097
-
1098
- ttable[dtype](blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
1099
- }
1100
-
1101
- return Qtrue;
1102
- }
1103
-
1104
-
1105
- static VALUE nm_cblas_herk(VALUE self,
1106
- VALUE order,
1107
- VALUE uplo,
1108
- VALUE trans,
1109
- VALUE n, VALUE k,
1110
- VALUE alpha,
1111
- VALUE a, VALUE lda,
1112
- VALUE beta,
1113
- VALUE c, VALUE ldc)
1114
- {
1115
-
1116
- nm::dtype_t dtype = NM_DTYPE(a);
1117
-
1118
- if (dtype == nm::COMPLEX64) {
1119
- cblas_cherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
1120
- } else if (dtype == nm::COMPLEX128) {
1121
- cblas_zherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
1122
- } else
1123
- rb_raise(rb_eNotImpError, "this matrix operation undefined for non-complex dtypes");
1124
- return Qtrue;
1125
- }
1126
-
1127
-
1128
- /*
1129
- * Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
1130
- *
1131
- * xGESVD computes the singular value decomposition (SVD) of a real
1132
- * M-by-N matrix A, optionally computing the left and/or right singular
1133
- * vectors. The SVD is written
1134
- *
1135
- * A = U * SIGMA * transpose(V)
1136
- *
1137
- * where SIGMA is an M-by-N matrix which is zero except for its
1138
- * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
1139
- * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
1140
- * are the singular values of A; they are real and non-negative, and
1141
- * are returned in descending order. The first min(m,n) columns of
1142
- * U and V are the left and right singular vectors of A.
1143
- *
1144
- * Note that the routine returns V**T, not V.
1145
- */
1146
- static VALUE nm_lapack_gesvd(VALUE self, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lwork) {
1147
- static int (*gesvd_table[nm::NUM_DTYPES])(char, char, int, int, void* a, int, void* s, void* u, int, void* vt, int, void* work, int, void* rwork) = {
1148
- NULL, NULL, NULL, NULL, NULL, // no integer ops
1149
- nm::math::lapack_gesvd<float,float>,
1150
- nm::math::lapack_gesvd<double,double>,
1151
- nm::math::lapack_gesvd<nm::Complex64,float>,
1152
- nm::math::lapack_gesvd<nm::Complex128,double>,
1153
- NULL, NULL, NULL, NULL // no rationals or Ruby objects
1154
- };
1155
-
1156
- nm::dtype_t dtype = NM_DTYPE(a);
1157
-
1158
-
1159
- if (!gesvd_table[dtype]) {
1160
- rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1161
- return Qfalse;
1162
- } else {
1163
- int M = FIX2INT(m),
1164
- N = FIX2INT(n);
1165
-
1166
- int min_mn = NM_MIN(M,N);
1167
- int max_mn = NM_MAX(M,N);
1168
-
1169
- char JOBU = lapack_svd_job_sym(jobu),
1170
- JOBVT = lapack_svd_job_sym(jobvt);
1171
-
1172
- // only need rwork for complex matrices
1173
- int rwork_size = (dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128) ? 5 * min_mn : 0;
1174
- void* rwork = rwork_size > 0 ? NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size) : NULL;
1175
- int work_size = FIX2INT(lwork);
1176
-
1177
- // ignore user argument for lwork if it's too small.
1178
- work_size = NM_MAX((dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128 ? 2 * min_mn + max_mn : NM_MAX(3*min_mn + max_mn, 5*min_mn)), work_size);
1179
- void* work = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);
1180
-
1181
- int info = gesvd_table[dtype](JOBU, JOBVT, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
1182
- NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
1183
- work, work_size, rwork);
1184
- return INT2FIX(info);
1185
- }
1186
- }
1187
-
1188
- /*
1189
- * Function signature conversion for calling CBLAS' gesdd functions as directly as possible.
1190
- *
1191
- * xGESDD uses a divide-and-conquer strategy to compute the singular value decomposition (SVD) of a real
1192
- * M-by-N matrix A, optionally computing the left and/or right singular
1193
- * vectors. The SVD is written
1194
- *
1195
- * A = U * SIGMA * transpose(V)
1196
- *
1197
- * where SIGMA is an M-by-N matrix which is zero except for its
1198
- * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
1199
- * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
1200
- * are the singular values of A; they are real and non-negative, and
1201
- * are returned in descending order. The first min(m,n) columns of
1202
- * U and V are the left and right singular vectors of A.
1203
- *
1204
- * Note that the routine returns V**T, not V.
1205
- */
1206
- static VALUE nm_lapack_gesdd(VALUE self, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lwork) {
1207
- static int (*gesdd_table[nm::NUM_DTYPES])(char, int, int, void* a, int, void* s, void* u, int, void* vt, int, void* work, int, int* iwork, void* rwork) = {
1208
- NULL, NULL, NULL, NULL, NULL, // no integer ops
1209
- nm::math::lapack_gesdd<float,float>,
1210
- nm::math::lapack_gesdd<double,double>,
1211
- nm::math::lapack_gesdd<nm::Complex64,float>,
1212
- nm::math::lapack_gesdd<nm::Complex128,double>,
1213
- NULL, NULL, NULL, NULL // no rationals or Ruby objects
1214
- };
1215
-
1216
- nm::dtype_t dtype = NM_DTYPE(a);
1217
-
1218
- if (!gesdd_table[dtype]) {
1219
- rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1220
- return Qfalse;
1221
- } else {
1222
- int M = FIX2INT(m),
1223
- N = FIX2INT(n);
1224
-
1225
- int min_mn = NM_MIN(M,N);
1226
- int max_mn = NM_MAX(M,N);
1227
-
1228
- char JOBZ = lapack_svd_job_sym(jobz);
1229
-
1230
- // only need rwork for complex matrices
1231
- void* rwork = NULL;
1232
-
1233
- int work_size = FIX2INT(lwork); // Make sure we allocate enough work, regardless of the user request.
1234
- if (dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128) {
1235
- int rwork_size = min_mn * (JOBZ == 'N' ? 5 : NM_MAX(5*min_mn + 7, 2*max_mn + 2*min_mn + 1));
1236
- rwork = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size);
1237
-
1238
- if (JOBZ == 'N') work_size = NM_MAX(work_size, 3*min_mn + NM_MAX(max_mn, 6*min_mn));
1239
- else if (JOBZ == 'O') work_size = NM_MAX(work_size, 3*min_mn*min_mn + NM_MAX(max_mn, 5*min_mn*min_mn + 4*min_mn));
1240
- else work_size = NM_MAX(work_size, 3*min_mn*min_mn + NM_MAX(max_mn, 4*min_mn*min_mn + 4*min_mn));
1241
- } else {
1242
- if (JOBZ == 'N') work_size = NM_MAX(work_size, 2*min_mn + max_mn);
1243
- else if (JOBZ == 'O') work_size = NM_MAX(work_size, 2*min_mn*min_mn + max_mn + 2*min_mn);
1244
- else work_size = NM_MAX(work_size, min_mn*min_mn + max_mn + 2*min_mn);
1245
- }
1246
- void* work = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);
1247
- int* iwork = NM_ALLOCA_N(int, 8*min_mn);
1248
-
1249
- int info = gesdd_table[dtype](JOBZ, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
1250
- NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
1251
- work, work_size, iwork, rwork);
1252
- return INT2FIX(info);
1253
- }
1254
- }
1255
-
1256
-
1257
- /*
1258
- * Function signature conversion for calling CBLAS' geev functions as directly as possible.
1259
- *
1260
- * GEEV computes for an N-by-N real nonsymmetric matrix A, the
1261
- * eigenvalues and, optionally, the left and/or right eigenvectors.
1262
- *
1263
- * The right eigenvector v(j) of A satisfies
1264
- * A * v(j) = lambda(j) * v(j)
1265
- * where lambda(j) is its eigenvalue.
1266
- *
1267
- * The left eigenvector u(j) of A satisfies
1268
- * u(j)**H * A = lambda(j) * u(j)**H
1269
- * where u(j)**H denotes the conjugate transpose of u(j).
1270
- *
1271
- * The computed eigenvectors are normalized to have Euclidean norm
1272
- * equal to 1 and largest component real.
1273
- */
1274
- static VALUE nm_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr, VALUE lwork) {
1275
- static int (*geev_table[nm::NUM_DTYPES])(char, char, int, void* a, int, void* w, void* wi, void* vl, int, void* vr, int, void* work, int, void* rwork) = {
1276
- NULL, NULL, NULL, NULL, NULL, // no integer ops
1277
- nm::math::lapack_geev<float,float>,
1278
- nm::math::lapack_geev<double,double>,
1279
- nm::math::lapack_geev<nm::Complex64,float>,
1280
- nm::math::lapack_geev<nm::Complex128,double>,
1281
- NULL, NULL, NULL, NULL // no rationals or Ruby objects
1282
- };
1283
-
1284
- nm::dtype_t dtype = NM_DTYPE(a);
1285
-
1286
-
1287
- if (!geev_table[dtype]) {
1288
- rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1289
- return Qfalse;
1290
- } else {
1291
- int N = FIX2INT(n);
1292
-
1293
- char JOBVL = lapack_evd_job_sym(compute_left),
1294
- JOBVR = lapack_evd_job_sym(compute_right);
1295
-
1296
- void* A = NM_STORAGE_DENSE(a)->elements;
1297
- void* WR = NM_STORAGE_DENSE(w)->elements;
1298
- void* WI = wi == Qnil ? NULL : NM_STORAGE_DENSE(wi)->elements;
1299
- void* VL = NM_STORAGE_DENSE(vl)->elements;
1300
- void* VR = NM_STORAGE_DENSE(vr)->elements;
1301
-
1302
- // only need rwork for complex matrices (wi == Qnil for complex)
1303
- int rwork_size = dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128 ? N * DTYPE_SIZES[dtype] : 0; // 2*N*floattype for complex only, otherwise 0
1304
- void* rwork = rwork_size > 0 ? NM_ALLOCA_N(char, rwork_size) : NULL;
1305
- int work_size = FIX2INT(lwork);
1306
- void* work;
1307
-
1308
- int info;
1309
-
1310
- // if work size is 0 or -1, query.
1311
- if (work_size <= 0) {
1312
- work_size = -1;
1313
- work = NM_ALLOC_N(char, DTYPE_SIZES[dtype]); //2*N * DTYPE_SIZES[dtype]);
1314
- info = geev_table[dtype](JOBVL, JOBVR, N, A, FIX2INT(lda), WR, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr), work, work_size, rwork);
1315
- work_size = (int)(dtype == nm::COMPLEX64 || dtype == nm::FLOAT32 ? reinterpret_cast<float*>(work)[0] : reinterpret_cast<double*>(work)[0]);
1316
- // line above is basically: work_size = (int)(work[0]); // now have new work_size
1317
- NM_FREE(work);
1318
- if (info == 0)
1319
- rb_warn("geev: calculated optimal lwork of %d; to eliminate this message, use a positive value for lwork (at least 2*shape[i])", work_size);
1320
- else return INT2FIX(info); // error of some kind on query!
1321
- }
1322
-
1323
- // if work size is < 2*N, just set it to 2*N
1324
- if (work_size < 2*N) work_size = 2*N;
1325
- if (work_size < 3*N && (dtype == nm::FLOAT32 || dtype == nm::FLOAT64)) {
1326
- work_size = JOBVL == 'V' || JOBVR == 'V' ? 4*N : 3*N;
1327
- }
1328
-
1329
- // Allocate work array for actual run
1330
- work = NM_ALLOCA_N(char, work_size * DTYPE_SIZES[dtype]);
1331
-
1332
- // Perform the actual calculation.
1333
- info = geev_table[dtype](JOBVL, JOBVR, N, A, FIX2INT(lda), WR, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr), work, work_size, rwork);
1334
-
1335
- return INT2FIX(info);
1336
- }
1337
- }
1338
-
1339
-
1340
- static VALUE nm_clapack_lauum(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
1341
- static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
1342
- /*nm::math::clapack_lauum<uint8_t, false>,
1343
- nm::math::clapack_lauum<int8_t, false>,
1344
- nm::math::clapack_lauum<int16_t, false>,
1345
- nm::math::clapack_lauum<uint32_t, false>,
1346
- nm::math::clapack_lauum<uint64_t, false>,*/
1347
- NULL, NULL, NULL, NULL, NULL,
1348
- nm::math::clapack_lauum<false, float>,
1349
- nm::math::clapack_lauum<false, double>,
1350
- #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1351
- clapack_clauum, clapack_zlauum, // call directly, same function signature!
1352
- #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1353
- nm::math::clapack_lauum<true, nm::Complex64>,
1354
- nm::math::clapack_lauum<true, nm::Complex128>,
1355
- #endif
1356
- /*
1357
- nm::math::clapack_lauum<nm::Rational32, false>,
1358
- nm::math::clapack_lauum<nm::Rational64, false>,
1359
- nm::math::clapack_lauum<nm::Rational128, false>,
1360
- nm::math::clapack_lauum<nm::RubyObject, false>
1361
-
1362
- */
1363
- };
1364
-
1365
- if (!ttable[NM_DTYPE(a)]) {
1366
- rb_raise(rb_eNotImpError, "does not yet work for non-BLAS dtypes (needs herk, syrk, trmm)");
1367
- } else {
1368
- // Call either our version of lauum or the LAPACK version.
1369
- ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
1370
- }
1371
-
1372
- return a;
1373
- }
1374
-
1375
-
1376
928
  /* Call any of the clapack_xgetrf functions as directly as possible.
1377
929
  *
1378
930
  * The clapack_getrf functions (dgetrf, sgetrf, cgetrf, and zgetrf) compute an LU factorization of a general M-by-N
@@ -1402,15 +954,8 @@ static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a
1402
954
  NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1403
955
  nm::math::clapack_getrf<float>,
1404
956
  nm::math::clapack_getrf<double>,
1405
- #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1406
- clapack_cgetrf, clapack_zgetrf, // call directly, same function signature!
1407
- #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1408
957
  nm::math::clapack_getrf<nm::Complex64>,
1409
958
  nm::math::clapack_getrf<nm::Complex128>,
1410
- #endif
1411
- nm::math::clapack_getrf<nm::Rational32>,
1412
- nm::math::clapack_getrf<nm::Rational64>,
1413
- nm::math::clapack_getrf<nm::Rational128>,
1414
959
  nm::math::clapack_getrf<nm::RubyObject>
1415
960
  };
1416
961
 
@@ -1438,51 +983,6 @@ static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a
1438
983
  }
1439
984
 
1440
985
 
1441
- /* Call any of the clapack_xpotrf functions as directly as possible.
1442
- *
1443
- * You probably don't want to call this function. Instead, why don't you try clapack_potrf, which is more flexible
1444
- * with its arguments?
1445
- *
1446
- * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1447
- * handling, so you can easily crash Ruby!
1448
- *
1449
- * Returns an array giving the pivot indices (normally these are argument #5).
1450
- */
1451
- static VALUE nm_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
1452
- #if !defined(HAVE_CLAPACK_H) && !defined(HAVE_ATLAS_CLAPACK_H)
1453
- rb_raise(rb_eNotImpError, "potrf currently requires CLAPACK");
1454
- #endif
1455
-
1456
- static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
1457
- NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1458
- nm::math::clapack_potrf<float>,
1459
- nm::math::clapack_potrf<double>,
1460
- #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1461
- clapack_cpotrf, clapack_zpotrf, // call directly, same function signature!
1462
- #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1463
- nm::math::clapack_potrf<nm::Complex64>,
1464
- nm::math::clapack_potrf<nm::Complex128>,
1465
- #endif
1466
- NULL, NULL, NULL, NULL /*
1467
- nm::math::clapack_potrf<nm::Rational32>,
1468
- nm::math::clapack_potrf<nm::Rational64>,
1469
- nm::math::clapack_potrf<nm::Rational128>,
1470
- nm::math::clapack_potrf<nm::RubyObject> */
1471
- };
1472
-
1473
- if (!ttable[NM_DTYPE(a)]) {
1474
- rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1475
- // FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
1476
- //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1477
- } else {
1478
- // Call either our version of potrf or the LAPACK version.
1479
- ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
1480
- }
1481
-
1482
- return a;
1483
- }
1484
-
1485
-
1486
986
  /*
1487
987
  * Call any of the clapack_xgetrs functions as directly as possible.
1488
988
  */
@@ -1493,15 +993,8 @@ static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VAL
1493
993
  NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1494
994
  nm::math::clapack_getrs<float>,
1495
995
  nm::math::clapack_getrs<double>,
1496
- #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1497
- clapack_cgetrs, clapack_zgetrs, // call directly, same function signature!
1498
- #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1499
996
  nm::math::clapack_getrs<nm::Complex64>,
1500
997
  nm::math::clapack_getrs<nm::Complex128>,
1501
- #endif
1502
- nm::math::clapack_getrs<nm::Rational32>,
1503
- nm::math::clapack_getrs<nm::Rational64>,
1504
- nm::math::clapack_getrs<nm::Rational128>,
1505
998
  nm::math::clapack_getrs<nm::RubyObject>
1506
999
  };
1507
1000
 
@@ -1529,157 +1022,14 @@ static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VAL
1529
1022
  return b;
1530
1023
  }
1531
1024
 
1532
-
1533
- /*
1534
- * Call any of the clapack_xpotrs functions as directly as possible.
1535
- */
1536
- static VALUE nm_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb) {
1537
- static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
1538
- const int NRHS, const void* A, const int lda, void* B, const int ldb) = {
1539
- NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1540
- nm::math::clapack_potrs<float,false>,
1541
- nm::math::clapack_potrs<double,false>,
1542
- #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1543
- clapack_cpotrs, clapack_zpotrs, // call directly, same function signature!
1544
- #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1545
- nm::math::clapack_potrs<nm::Complex64,true>,
1546
- nm::math::clapack_potrs<nm::Complex128,true>,
1547
- #endif
1548
- nm::math::clapack_potrs<nm::Rational32,false>,
1549
- nm::math::clapack_potrs<nm::Rational64,false>,
1550
- nm::math::clapack_potrs<nm::Rational128,false>,
1551
- nm::math::clapack_potrs<nm::RubyObject,false>
1552
- };
1553
-
1554
-
1555
- if (!ttable[NM_DTYPE(a)]) {
1556
- rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1557
- } else {
1558
-
1559
- // Call either our version of potrs or the LAPACK version.
1560
- ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
1561
- NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
1562
- }
1563
-
1564
- // b is both returned and modified directly in the argument list.
1565
- return b;
1566
- }
1567
-
1568
-
1569
1025
  /*
1570
1026
  * Simple way to check from within Ruby code if clapack functions are available, without
1571
1027
  * having to wait around for an exception to be thrown.
1572
1028
  */
1573
1029
  static VALUE nm_has_clapack(VALUE self) {
1574
- #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1575
1030
  return Qfalse;
1576
- #else
1577
- return Qtrue;
1578
- #endif
1579
1031
  }
1580
1032
 
1581
-
1582
- /* Call any of the clapack_xgetri functions as directly as possible.
1583
- *
1584
- * You probably don't want to call this function. Instead, why don't you try clapack_getri, which is more flexible
1585
- * with its arguments?
1586
- *
1587
- * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1588
- * handling, so you can easily crash Ruby!
1589
- *
1590
- * Returns an array giving the pivot indices (normally these are argument #5).
1591
- */
1592
- static VALUE nm_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv) {
1593
- #if !defined (HAVE_CLAPACK_H) && !defined (HAVE_ATLAS_CLAPACK_H)
1594
- rb_raise(rb_eNotImpError, "getri currently requires CLAPACK");
1595
- #endif
1596
-
1597
- static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int n, void* a, const int lda, const int* ipiv) = {
1598
- NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1599
- nm::math::clapack_getri<float>,
1600
- nm::math::clapack_getri<double>,
1601
- #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1602
- clapack_cgetri, clapack_zgetri, // call directly, same function signature!
1603
- #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1604
- nm::math::clapack_getri<nm::Complex64>,
1605
- nm::math::clapack_getri<nm::Complex128>,
1606
- #endif
1607
- NULL, NULL, NULL, NULL /*
1608
- nm::math::clapack_getri<nm::Rational32>,
1609
- nm::math::clapack_getri<nm::Rational64>,
1610
- nm::math::clapack_getri<nm::Rational128>,
1611
- nm::math::clapack_getri<nm::RubyObject> */
1612
- };
1613
-
1614
- // Allocate the C version of the pivot index array
1615
- int* ipiv_;
1616
- if (TYPE(ipiv) != T_ARRAY) {
1617
- rb_raise(rb_eArgError, "ipiv must be of type Array");
1618
- } else {
1619
- ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
1620
- for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
1621
- ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
1622
- }
1623
- }
1624
-
1625
- if (!ttable[NM_DTYPE(a)]) {
1626
- rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1627
- // FIXME: Once non-BLAS dtypes are implemented, replace error above with the error below.
1628
- //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1629
- } else {
1630
- // Call either our version of getri or the LAPACK version.
1631
- ttable[NM_DTYPE(a)](blas_order_sym(order), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv_);
1632
- }
1633
-
1634
- return a;
1635
- }
1636
-
1637
-
1638
- /* Call any of the clapack_xpotri functions as directly as possible.
1639
- *
1640
- * You probably don't want to call this function. Instead, why don't you try clapack_potri, which is more flexible
1641
- * with its arguments?
1642
- *
1643
- * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1644
- * handling, so you can easily crash Ruby!
1645
- *
1646
- * Returns an array giving the pivot indices (normally these are argument #5).
1647
- */
1648
- static VALUE nm_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
1649
- #if !defined (HAVE_CLAPACK_H) && !defined (HAVE_ATLAS_CLAPACK_H)
1650
- rb_raise(rb_eNotImpError, "getri currently requires CLAPACK");
1651
- #endif
1652
-
1653
- static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
1654
- NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1655
- nm::math::clapack_potri<float>,
1656
- nm::math::clapack_potri<double>,
1657
- #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1658
- clapack_cpotri, clapack_zpotri, // call directly, same function signature!
1659
- #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1660
- nm::math::clapack_potri<nm::Complex64>,
1661
- nm::math::clapack_potri<nm::Complex128>,
1662
- #endif
1663
- NULL, NULL, NULL, NULL /*
1664
- nm::math::clapack_getri<nm::Rational32>,
1665
- nm::math::clapack_getri<nm::Rational64>,
1666
- nm::math::clapack_getri<nm::Rational128>,
1667
- nm::math::clapack_getri<nm::RubyObject> */
1668
- };
1669
-
1670
- if (!ttable[NM_DTYPE(a)]) {
1671
- rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1672
- // FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
1673
- //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1674
- } else {
1675
- // Call either our version of getri or the LAPACK version.
1676
- ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
1677
- }
1678
-
1679
- return a;
1680
- }
1681
-
1682
-
1683
1033
  /*
1684
1034
  * Call any of the clapack_xlaswp functions as directly as possible.
1685
1035
  *
@@ -1695,15 +1045,8 @@ static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1,
1695
1045
  nm::math::clapack_laswp<int64_t>,
1696
1046
  nm::math::clapack_laswp<float>,
1697
1047
  nm::math::clapack_laswp<double>,
1698
- //#ifdef HAVE_CLAPACK_H // laswp doesn't actually exist in clapack.h!
1699
- // clapack_claswp, clapack_zlaswp, // call directly, same function signature!
1700
- //#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1701
1048
  nm::math::clapack_laswp<nm::Complex64>,
1702
1049
  nm::math::clapack_laswp<nm::Complex128>,
1703
- //#endif
1704
- nm::math::clapack_laswp<nm::Rational32>,
1705
- nm::math::clapack_laswp<nm::Rational64>,
1706
- nm::math::clapack_laswp<nm::Rational128>,
1707
1050
  nm::math::clapack_laswp<nm::RubyObject>
1708
1051
  };
1709
1052
 
@@ -1735,6 +1078,20 @@ void nm_math_det_exact(const int M, const void* elements, const int lda, nm::dty
1735
1078
  ttable[dtype](M, elements, lda, result);
1736
1079
  }
1737
1080
 
1081
+ /*
1082
+ * C accessor for reducing a matrix to hessenberg form.
1083
+ */
1084
+ void nm_math_hessenberg(VALUE a) {
1085
+ static void (*ttable[nm::NUM_DTYPES])(const int, void*) = {
1086
+ NULL, NULL, NULL, NULL, NULL, // does not support ints
1087
+ nm::math::hessenberg<float>,
1088
+ nm::math::hessenberg<double>,
1089
+ NULL, NULL, // does not support Complex
1090
+ NULL // no support for Ruby Object
1091
+ };
1092
+
1093
+ ttable[NM_DTYPE(a)](NM_SHAPE0(a), NM_STORAGE_DENSE(a)->elements);
1094
+ }
1738
1095
  /*
1739
1096
  * C accessor for calculating an in-place inverse.
1740
1097
  */