nmatrix-gemv 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +29 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +14 -0
  5. data/Gemfile +7 -0
  6. data/README.md +29 -0
  7. data/Rakefile +225 -0
  8. data/ext/nmatrix_gemv/binary_format.txt +53 -0
  9. data/ext/nmatrix_gemv/data/complex.h +399 -0
  10. data/ext/nmatrix_gemv/data/data.cpp +298 -0
  11. data/ext/nmatrix_gemv/data/data.h +771 -0
  12. data/ext/nmatrix_gemv/data/meta.h +70 -0
  13. data/ext/nmatrix_gemv/data/rational.h +436 -0
  14. data/ext/nmatrix_gemv/data/ruby_object.h +471 -0
  15. data/ext/nmatrix_gemv/extconf.rb +254 -0
  16. data/ext/nmatrix_gemv/math.cpp +1639 -0
  17. data/ext/nmatrix_gemv/math/asum.h +143 -0
  18. data/ext/nmatrix_gemv/math/geev.h +82 -0
  19. data/ext/nmatrix_gemv/math/gemm.h +271 -0
  20. data/ext/nmatrix_gemv/math/gemv.h +212 -0
  21. data/ext/nmatrix_gemv/math/ger.h +96 -0
  22. data/ext/nmatrix_gemv/math/gesdd.h +80 -0
  23. data/ext/nmatrix_gemv/math/gesvd.h +78 -0
  24. data/ext/nmatrix_gemv/math/getf2.h +86 -0
  25. data/ext/nmatrix_gemv/math/getrf.h +240 -0
  26. data/ext/nmatrix_gemv/math/getri.h +108 -0
  27. data/ext/nmatrix_gemv/math/getrs.h +129 -0
  28. data/ext/nmatrix_gemv/math/idamax.h +86 -0
  29. data/ext/nmatrix_gemv/math/inc.h +47 -0
  30. data/ext/nmatrix_gemv/math/laswp.h +165 -0
  31. data/ext/nmatrix_gemv/math/long_dtype.h +52 -0
  32. data/ext/nmatrix_gemv/math/math.h +1069 -0
  33. data/ext/nmatrix_gemv/math/nrm2.h +181 -0
  34. data/ext/nmatrix_gemv/math/potrs.h +129 -0
  35. data/ext/nmatrix_gemv/math/rot.h +141 -0
  36. data/ext/nmatrix_gemv/math/rotg.h +115 -0
  37. data/ext/nmatrix_gemv/math/scal.h +73 -0
  38. data/ext/nmatrix_gemv/math/swap.h +73 -0
  39. data/ext/nmatrix_gemv/math/trsm.h +387 -0
  40. data/ext/nmatrix_gemv/nm_memory.h +60 -0
  41. data/ext/nmatrix_gemv/nmatrix_gemv.cpp +90 -0
  42. data/ext/nmatrix_gemv/nmatrix_gemv.h +374 -0
  43. data/ext/nmatrix_gemv/ruby_constants.cpp +153 -0
  44. data/ext/nmatrix_gemv/ruby_constants.h +107 -0
  45. data/ext/nmatrix_gemv/ruby_nmatrix.c +84 -0
  46. data/ext/nmatrix_gemv/ttable_helper.rb +122 -0
  47. data/ext/nmatrix_gemv/types.h +54 -0
  48. data/ext/nmatrix_gemv/util/util.h +78 -0
  49. data/lib/nmatrix-gemv.rb +43 -0
  50. data/lib/nmatrix_gemv/blas.rb +85 -0
  51. data/lib/nmatrix_gemv/nmatrix_gemv.rb +35 -0
  52. data/lib/nmatrix_gemv/rspec.rb +75 -0
  53. data/nmatrix-gemv.gemspec +31 -0
  54. data/spec/blas_spec.rb +154 -0
  55. data/spec/spec_helper.rb +128 -0
  56. metadata +186 -0
@@ -0,0 +1,254 @@
1
+ # = NMatrix
2
+ #
3
+ # A linear algebra library for scientific computation in Ruby.
4
+ # NMatrix is part of SciRuby.
5
+ #
6
+ # NMatrix was originally inspired by and derived from NArray, by
7
+ # Masahiro Tanaka: http://narray.rubyforge.org
8
+ #
9
+ # == Copyright Information
10
+ #
11
+ # SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
12
+ # NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
13
+ #
14
+ # Please see LICENSE.txt for additional copyright notices.
15
+ #
16
+ # == Contributing
17
+ #
18
+ # By contributing source code to SciRuby, you agree to be bound by
19
+ # our Contributor Agreement:
20
+ #
21
+ # * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
22
+ #
23
+ # == extconf.rb
24
+ #
25
+ # This file checks for ATLAS and other necessary headers, and
26
+ # generates a Makefile for compiling NMatrix.
27
+
28
+ require "mkmf"
29
+
30
+
31
+ # Function derived from NArray's extconf.rb.
32
+ def have_type(type, header=nil) #:nodoc:
33
+ printf "checking for %s... ", type
34
+ STDOUT.flush
35
+
36
+ src = <<"SRC"
37
+ #include <ruby.h>
38
+ SRC
39
+
40
+
41
+ src << <<"SRC" unless header.nil?
42
+ #include <#{header}>
43
+ SRC
44
+
45
+ r = try_link(src + <<"SRC")
46
+ int main() { return 0; }
47
+ int t() { #{type} a; return 0; }
48
+ SRC
49
+
50
+ unless r
51
+ print "no\n"
52
+ return false
53
+ end
54
+
55
+ $defs.push(format("-DHAVE_%s", type.upcase))
56
+
57
+ print "yes\n"
58
+
59
+ return true
60
+ end
61
+
62
+ # Function derived from NArray's extconf.rb.
63
+ def create_conf_h(file) #:nodoc:
64
+ print "creating #{file}\n"
65
+ File.open(file, 'w') do |hfile|
66
+ header_guard = file.upcase.sub(/\s|\./, '_')
67
+
68
+ hfile.puts "#ifndef #{header_guard}"
69
+ hfile.puts "#define #{header_guard}"
70
+ hfile.puts
71
+
72
+ # FIXME: Find a better way to do this:
73
+ hfile.puts "#define RUBY_2 1" if RUBY_VERSION >= '2.0'
74
+ hfile.puts "#define OLD_RB_SCAN_ARGS" if RUBY_VERSION < '1.9.3'
75
+
76
+ for line in $defs
77
+ line =~ /^-D(.*)/
78
+ hfile.printf "#define %s 1\n", $1
79
+ end
80
+
81
+ hfile.puts
82
+ hfile.puts "#endif"
83
+ end
84
+ end
85
+
86
+ if RUBY_VERSION < '1.9'
87
+ raise(NotImplementedError, "Sorry, you need at least Ruby 1.9!")
88
+ else
89
+ $INSTALLFILES = [['nmatrix_gemv.h', '$(archdir)'], ['nmatrix_gemv.hpp', '$(archdir)'], ['nmatrix_gemv_config.h', '$(archdir)']]
90
+ if /cygwin|mingw/ =~ RUBY_PLATFORM
91
+ $INSTALLFILES << ['libnmatrix_gemv.a', '$(archdir)']
92
+ end
93
+ end
94
+
95
+ if /cygwin|mingw/ =~ RUBY_PLATFORM
96
+ CONFIG["DLDFLAGS"] << " --output-lib libnmatrix_gemv.a"
97
+ end
98
+
99
+ $DEBUG = true
100
+ $CFLAGS = ["-Wall -Werror=return-type",$CFLAGS].join(" ")
101
+ $CXXFLAGS = ["-Wall -Werror=return-type",$CXXFLAGS].join(" ")
102
+ $CPPFLAGS = ["-Wall -Werror=return-type",$CPPFLAGS].join(" ")
103
+
104
+ # When adding objects here, make sure their directories are included in CLEANOBJS down at the bottom of extconf.rb.
105
+ basenames = %w{nmatrix_gemv ruby_constants data/data math }
106
+ $objs = basenames.map { |b| "#{b}.o" }
107
+ $srcs = basenames.map { |b| "#{b}.cpp" }
108
+
109
+ #CONFIG['CXX'] = 'clang++'
110
+ CONFIG['CXX'] = 'g++'
111
+
112
+ def find_newer_gplusplus #:nodoc:
113
+ print "checking for apparent GNU g++ binary with C++0x/C++11 support... "
114
+ [9,8,7,6,5,4,3].each do |minor|
115
+ ver = "4.#{minor}"
116
+ gpp = "g++-#{ver}"
117
+ result = `which #{gpp}`
118
+ next if result.empty?
119
+ CONFIG['CXX'] = gpp
120
+ puts ver
121
+ return CONFIG['CXX']
122
+ end
123
+ false
124
+ end
125
+
126
+ def gplusplus_version #:nodoc:
127
+ cxxvar = proc { |n| `#{CONFIG['CXX']} -E -dM - </dev/null | grep #{n}`.chomp.split(' ')[2] }
128
+ major = cxxvar.call('__GNUC__')
129
+ minor = cxxvar.call('__GNUC_MINOR__')
130
+ patch = cxxvar.call('__GNUC_PATCHLEVEL__')
131
+
132
+ raise("unable to determine g++ version (match to get version was nil)") if major.nil? || minor.nil? || patch.nil?
133
+
134
+ "#{major}.#{minor}.#{patch}"
135
+ end
136
+
137
+
138
+ if CONFIG['CXX'] == 'clang++'
139
+ $CPP_STANDARD = 'c++11'
140
+
141
+ else
142
+ version = gplusplus_version
143
+ if version < '4.3.0' && CONFIG['CXX'] == 'g++' # see if we can find a newer G++, unless it's been overridden by user
144
+ if !find_newer_gplusplus
145
+ raise("You need a version of g++ which supports -std=c++0x or -std=c++11. If you're on a Mac and using Homebrew, we recommend using mac-brew-gcc.sh to install a more recent g++.")
146
+ end
147
+ version = gplusplus_version
148
+ end
149
+
150
+ if version < '4.7.0'
151
+ $CPP_STANDARD = 'c++0x'
152
+ else
153
+ $CPP_STANDARD = 'c++11'
154
+ end
155
+ puts "using C++ standard... #{$CPP_STANDARD}"
156
+ puts "g++ reports version... " + `#{CONFIG['CXX']} --version|head -n 1|cut -f 3 -d " "`
157
+ end
158
+
159
+ # add smmp in to get generic transp; remove smmp2 to eliminate funcptr transp
160
+
161
+ # The next line allows the user to supply --with-atlas-dir=/usr/local/atlas,
162
+ # --with-atlas-lib or --with-atlas-include and tell the compiler where to look
163
+ # for ATLAS. The same for all the others
164
+ #
165
+ #dir_config("clapack", ["/usr/local/atlas/include"], [])
166
+ #
167
+ #
168
+
169
+ # Is g++ having trouble finding your header files?
170
+ # Try this:
171
+ # export C_INCLUDE_PATH=/usr/local/atlas/include
172
+ # export CPLUS_INCLUDE_PATH=/usr/local/atlas/include
173
+ # (substituting in the path of your cblas.h and clapack.h for the path I used). -- JW 8/27/12
174
+
175
+ idefaults = {lapack: ["/usr/include/atlas"],
176
+ cblas: ["/usr/local/atlas/include", "/usr/include/atlas"],
177
+ atlas: ["/usr/local/atlas/include", "/usr/include/atlas"]}
178
+
179
+ # For some reason, if we try to look for /usr/lib64/atlas on a Mac OS X Mavericks system, and the directory does not
180
+ # exist, it will give a linker error -- even if the lib dir is already correctly included with -L. So we need to check
181
+ # that Dir.exists?(d) for each.
182
+ ldefaults = {lapack: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) },
183
+ cblas: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) },
184
+ atlas: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) }}
185
+
186
+ if have_library("clapack") # Usually only applies for Mac OS X
187
+ $libs += " -lclapack "
188
+ end
189
+
190
+ unless have_library("lapack")
191
+ dir_config("lapack", idefaults[:lapack], ldefaults[:lapack])
192
+ end
193
+
194
+ unless have_library("cblas")
195
+ dir_config("cblas", idefaults[:cblas], ldefaults[:cblas])
196
+ end
197
+
198
+ unless have_library("atlas")
199
+ dir_config("atlas", idefaults[:atlas], ldefaults[:atlas])
200
+ end
201
+
202
+ # If BLAS and LAPACK headers are in an atlas directory, prefer those. Otherwise,
203
+ # we try our luck with the default location.
204
+ if have_header("atlas/cblas.h")
205
+ have_header("atlas/clapack.h")
206
+ else
207
+ have_header("cblas.h")
208
+ have_header("clapack.h")
209
+ end
210
+
211
+
212
+ have_func("clapack_dgetrf", ["cblas.h", "clapack.h"])
213
+ have_func("clapack_dgetri", ["cblas.h", "clapack.h"])
214
+ have_func("dgesvd_", "clapack.h") # This may not do anything. dgesvd_ seems to be in LAPACK, not CLAPACK.
215
+
216
+ have_func("cblas_dgemm", "cblas.h")
217
+
218
+ #have_func("rb_scan_args", "ruby.h")
219
+
220
+ #find_library("lapack", "clapack_dgetrf")
221
+ #find_library("cblas", "cblas_dgemm")
222
+ #find_library("atlas", "ATL_dgemmNN")
223
+ # Order matters here: ATLAS has to go after LAPACK: http://mail.scipy.org/pipermail/scipy-user/2007-January/010717.html
224
+ $libs += " -llapack -lcblas -latlas "
225
+ #$libs += " -lprofiler "
226
+
227
+
228
+ # For release, these next two should both be changed to -O3.
229
+ $CFLAGS += " -O3 -g" #" -O0 -g "
230
+ #$CFLAGS += " -static -O0 -g "
231
+ $CPPFLAGS += " -O3 -std=#{$CPP_STANDARD} -g" #" -O0 -g -std=#{$CPP_STANDARD} " #-fmax-errors=10 -save-temps
232
+ #$CPPFLAGS += " -static -O0 -g -std=#{$CPP_STANDARD} "
233
+
234
+ CONFIG['warnflags'].gsub!('-Wshorten-64-to-32', '') # doesn't work except in Mac-patched gcc (4.2)
235
+ CONFIG['warnflags'].gsub!('-Wdeclaration-after-statement', '')
236
+ CONFIG['warnflags'].gsub!('-Wimplicit-function-declaration', '')
237
+
238
+ create_conf_h("nmatrix_gemv_config.h")
239
+ create_makefile("nmatrix_gemv")
240
+
241
+ Dir.mkdir("data") unless Dir.exists?("data")
242
+ #Dir.mkdir("util") unless Dir.exists?("util")
243
+ #Dir.mkdir("storage") unless Dir.exists?("storage")
244
+ #Dir.chdir("storage") do
245
+ # Dir.mkdir("yale") unless Dir.exists?("yale")
246
+ # Dir.mkdir("list") unless Dir.exists?("list")
247
+ # Dir.mkdir("dense") unless Dir.exists?("dense")
248
+ #end
249
+
250
+ # to clean up object files in subdirectories:
251
+ open('Makefile', 'a') do |f|
252
+ clean_objs_paths = %w{data}.map { |d| "#{d}/*.#{CONFIG["OBJEXT"]}" }
253
+ f.write("CLEANOBJS := $(CLEANOBJS) #{clean_objs_paths.join(' ')}")
254
+ end
@@ -0,0 +1,1639 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == math.cpp
25
+ //
26
+ // Ruby-exposed BLAS functions.
27
+ //
28
+ // === Procedure for adding LAPACK or CBLAS functions to math.cpp/math.h:
29
+ //
30
+ // This procedure is written as if for a fictional function with double
31
+ // version dbacon, which we'll say is from LAPACK.
32
+ //
33
+ // 1. Write a default templated version which probably returns a boolean.
34
+ // Call it bacon, and put it in math.h.
35
+ //
36
+ // Order will always be row-major, so we don't need to pass that.
37
+ // CBLAS_TRANSPOSE-type arguments, however, should be passed.
38
+ //
39
+ // Otherwise, arguments should look like those in cblas.h or clapack.h:
40
+ //
41
+ // template <typename DType>
42
+ // bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, DType* A, ...) {
43
+ // rb_raise(rb_eNotImpError, "only implemented for ATLAS types (float32, float64, complex64, complex128)");
44
+ // }
45
+ //
46
+ // 2. In math.cpp, add a templated inline static version of the function which takes
47
+ // only void* pointers and uses reinterpret_cast to convert them to the
48
+ // proper dtype.
49
+ //
50
+ // This function may also need to switch m and n if these arguments are given.
51
+ //
52
+ // For an example, see cblas_gemm. This function should do nothing other than cast
53
+ // appropriately. If clapack_dbacon, clapack_sbacon, clapack_cbacon, and clapack_zbacon
54
+ // all take void* only, and no other pointers that vary between functions, you can skip
55
+ // this particular step -- as we can call them directly using a custom function pointer
56
+ // array (same function signature!).
57
+ //
58
+ // This version of the function will be the one exposed through NMatrix::LAPACK. We
59
+ // want it to be as close to the actual LAPACK version of the function as possible,
60
+ // and with as few checks as possible.
61
+ //
62
+ // You will probably need a forward declaration in the extern "C" block.
63
+ //
64
+ // Note: In that case, the function you wrote in Step 1 should also take exactly the
65
+ // same arguments as clapack_xbacon. Otherwise Bad Things will happen.
66
+ //
67
+ // 3. In math.cpp, add inline specialized versions of bacon for the different ATLAS types.
68
+ //
69
+ // You could do this with a macro, if the arguments are all similar (see #define LAPACK_GETRF).
70
+ // Or you may prefer to do it by hand:
71
+ //
72
+ // template <>
73
+ // inline bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, float* A, ...) {
74
+ // clapack_sbacon(trans, M, N, A, ...);
75
+ // return true;
76
+ // }
77
+ //
78
+ // Make sure these functions are in the namespace nm::math.
79
+ //
80
+ // Note that you should do everything in your power here to parse any return values
81
+ // clapack_sbacon may give you. We're not trying very hard in this example, but you might
82
+ // look at getrf to see how it might be done.
83
+ //
84
+ // 4. Expose the function in nm_math_init_blas(), in math.cpp:
85
+ //
86
+ // rb_define_singleton_method(cNMatrix_LAPACK, "clapack_bacon", (METHOD)nm_lapack_bacon, 5);
87
+ //
88
+ // Here, we're telling Ruby that nm_lapack_bacon takes five arguments as a Ruby function.
89
+ //
90
+ // 5. In blas.rb, write a bacon function which accesses clapack_bacon, but does all the
91
+ // sanity checks we left out in step 2.
92
+ //
93
+ // 6. Write tests for NMatrix::LAPACK::getrf, confirming that it works for the ATLAS dtypes.
94
+ //
95
+ // 7. After you get it working properly with ATLAS, download dbacon.f from NETLIB, and use
96
+ // f2c to convert it to C. Clean it up so it's readable. Remove the extra indices -- f2c
97
+ // inserts a lot of unnecessary stuff.
98
+ //
99
+ // Copy and paste the output into the default templated function you wrote in Step 1.
100
+ // Fix it so it works as a template instead of just for doubles.
101
+ //
102
+ // 8. Write tests to confirm that it works for integers, rationals, and Ruby objects.
103
+ //
104
+ // 9. See about adding a Ruby-like interface, such as matrix_matrix_multiply for cblas_gemm,
105
+ // or matrix_vector_multiply for cblas_gemv. This step is not mandatory.
106
+ //
107
+ // 10. Pull request!
108
+
109
+
110
+
111
+ /*
112
+ * Project Includes
113
+ */
114
+
115
+
116
+ #include <algorithm>
117
+ #include <limits>
118
+
119
+
120
+ //#include "math/inc.h"
121
+ #include "data/data.h"
122
+ /*
123
+ #include "math/gesdd.h"
124
+ #include "math/gesvd.h"
125
+ #include "math/geev.h"
126
+ #include "math/swap.h"
127
+ #include "math/idamax.h"
128
+ #include "math/scal.h"
129
+ #include "math/ger.h"
130
+ #include "math/getf2.h"
131
+ #include "math/laswp.h"
132
+ #include "math/trsm.h"
133
+ */
134
+ #include "math/long_dtype.h" // for gemm.h
135
+ //#include "math/gemm.h"
136
+ #include "math/gemv.h"
137
+ /*
138
+ #include "math/asum.h"
139
+ #include "math/nrm2.h"
140
+ #include "math/getrf.h"
141
+ #include "math/getri.h"
142
+ #include "math/getrs.h"
143
+ #include "math/potrs.h"
144
+ #include "math/rot.h"
145
+ #include "math/rotg.h"
146
+ */
147
+ //#include "math/math.h"
148
+ //#include "storage/dense/dense.h"
149
+
150
+ #include "nmatrix_gemv.h"
151
+ #include "ruby_constants.h"
152
+
153
+ /*
154
+ * Forward Declarations
155
+ */
156
+
157
+ extern "C" {
158
+ #if defined HAVE_CLAPACK_H
159
+ #include <clapack.h>
160
+ #elif defined HAVE_ATLAS_CLAPACK_H
161
+ #include <atlas/clapack.h>
162
+ #endif
163
+
164
+ /*
165
+ static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx);
166
+ static VALUE nm_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx);
167
+ static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s);
168
+ static VALUE nm_cblas_rotg(VALUE self, VALUE ab);
169
+
170
+ static VALUE nm_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha,
171
+ VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc);
172
+ */
173
+ static VALUE nm_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
174
+ VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);
175
+ /*
176
+ static VALUE nm_cblas_trsm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
177
+ VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
178
+ static VALUE nm_cblas_trmm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
179
+ VALUE alpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
180
+ static VALUE nm_cblas_herk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
181
+ VALUE lda, VALUE beta, VALUE c, VALUE ldc);
182
+ static VALUE nm_cblas_syrk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
183
+ VALUE lda, VALUE beta, VALUE c, VALUE ldc);
184
+
185
+ static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda);
186
+ static VALUE nm_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
187
+ static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb);
188
+ static VALUE nm_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb);
189
+ static VALUE nm_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv);
190
+ static VALUE nm_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
191
+ static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx);
192
+ static VALUE nm_clapack_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx);
193
+ static VALUE nm_clapack_lauum(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
194
+
195
+ static VALUE nm_lapack_gesvd(VALUE self, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lworkspace_size);
196
+ static VALUE nm_lapack_gesdd(VALUE self, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lworkspace_size);
197
+ static VALUE nm_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr, VALUE lwork);
198
+ */
199
+ } // end of extern "C" block
200
+
201
+ ////////////////////
202
+ // Math Functions //
203
+ ////////////////////
204
+
205
+ namespace nm { namespace math {
206
+
207
+ /*
208
+ * Calculate the determinant for a dense matrix (A [elements]) of size 2 or 3. Return the result.
209
+ */
210
+
211
+ /*
212
+ template <typename DType>
213
+ void det_exact(const int M, const void* A_elements, const int lda, void* result_arg) {
214
+ DType* result = reinterpret_cast<DType*>(result_arg);
215
+ const DType* A = reinterpret_cast<const DType*>(A_elements);
216
+
217
+ typename LongDType<DType>::type x, y;
218
+
219
+ if (M == 2) {
220
+ *result = A[0] * A[lda+1] - A[1] * A[lda];
221
+
222
+ } else if (M == 3) {
223
+ x = A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]; // ei - fh
224
+ y = A[lda] * A[2*lda+2] - A[lda+2] * A[2*lda]; // fg - di
225
+ x = A[0]*x - A[1]*y ; // a*(ei-fh) - b*(fg-di)
226
+
227
+ y = A[lda] * A[2*lda+1] - A[lda+1] * A[2*lda]; // dh - eg
228
+ *result = A[2]*y + x; // c*(dh-eg) + _
229
+ } else if (M < 2) {
230
+ rb_raise(rb_eArgError, "can only calculate exact determinant of a square matrix of size 2 or larger");
231
+ } else {
232
+ rb_raise(rb_eNotImpError, "exact determinant calculation needed for matrices larger than 3x3");
233
+ }
234
+ }*/
235
+
236
+
237
+ /*
238
+ * Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
239
+ */
240
+ /*
241
+ template <typename DType, typename CType>
242
+ inline static int lapack_gesvd(char jobu, char jobvt, int m, int n, void* a, int lda, void* s, void* u, int ldu, void* vt, int ldvt, void* work, int lwork, void* rwork) {
243
+ return gesvd<DType,CType>(jobu, jobvt, m, n, reinterpret_cast<DType*>(a), lda, reinterpret_cast<DType*>(s), reinterpret_cast<DType*>(u), ldu, reinterpret_cast<DType*>(vt), ldvt, reinterpret_cast<DType*>(work), lwork, reinterpret_cast<CType*>(rwork));
244
+ }
245
+ */
246
+
247
+ /*
248
+ * Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
249
+ */
250
+ /*
251
+ template <typename DType, typename CType>
252
+ inline static int lapack_gesdd(char jobz, int m, int n, void* a, int lda, void* s, void* u, int ldu, void* vt, int ldvt, void* work, int lwork, int* iwork, void* rwork) {
253
+ return gesdd<DType,CType>(jobz, m, n, reinterpret_cast<DType*>(a), lda, reinterpret_cast<DType*>(s), reinterpret_cast<DType*>(u), ldu, reinterpret_cast<DType*>(vt), ldvt, reinterpret_cast<DType*>(work), lwork, iwork, reinterpret_cast<CType*>(rwork));
254
+ }
255
+ */
256
+
257
+ /*
258
+ * Function signature conversion for calling CBLAS' gemm functions as directly as possible.
259
+ *
260
+ * For documentation: http://www.netlib.org/blas/dgemm.f
261
+ */
262
+ /*
263
+ template <typename DType>
264
+ inline static void cblas_gemm(const enum CBLAS_ORDER order,
265
+ const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b,
266
+ int m, int n, int k,
267
+ void* alpha,
268
+ void* a, int lda,
269
+ void* b, int ldb,
270
+ void* beta,
271
+ void* c, int ldc)
272
+ {
273
+ gemm<DType>(order, trans_a, trans_b, m, n, k, reinterpret_cast<DType*>(alpha),
274
+ reinterpret_cast<DType*>(a), lda,
275
+ reinterpret_cast<DType*>(b), ldb, reinterpret_cast<DType*>(beta),
276
+ reinterpret_cast<DType*>(c), ldc);
277
+ }*/
278
+
279
+
280
+ /*
281
+ * Function signature conversion for calling CBLAS's gemv functions as directly as possible.
282
+ *
283
+ * For documentation: http://www.netlib.org/lapack/double/dgetrf.f
284
+ */
285
+ template <typename DType>
286
+ inline static bool cblas_gemv(const enum CBLAS_TRANSPOSE trans,
287
+ const int m, const int n,
288
+ const void* alpha,
289
+ const void* a, const int lda,
290
+ const void* x, const int incx,
291
+ const void* beta,
292
+ void* y, const int incy)
293
+ {
294
+ return gemv<DType>(trans,
295
+ m, n, reinterpret_cast<const DType*>(alpha),
296
+ reinterpret_cast<const DType*>(a), lda,
297
+ reinterpret_cast<const DType*>(x), incx, reinterpret_cast<const DType*>(beta),
298
+ reinterpret_cast<DType*>(y), incy);
299
+ }
300
+
301
+
302
+ /*
303
+ * Function signature conversion for calling CBLAS' trsm functions as directly as possible.
304
+ *
305
+ * For documentation: http://www.netlib.org/blas/dtrsm.f
306
+ */
307
+ /*
308
+ template <typename DType>
309
+ inline static void cblas_trsm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
310
+ const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_DIAG diag,
311
+ const int m, const int n, const void* alpha, const void* a,
312
+ const int lda, void* b, const int ldb)
313
+ {
314
+ trsm<DType>(order, side, uplo, trans_a, diag, m, n, *reinterpret_cast<const DType*>(alpha),
315
+ reinterpret_cast<const DType*>(a), lda, reinterpret_cast<DType*>(b), ldb);
316
+ }*/
317
+
318
+
319
+ /*
320
+ * Function signature conversion for calling CBLAS' trmm functions as directly as possible.
321
+ *
322
+ * For documentation: http://www.netlib.org/blas/dtrmm.f
323
+ */
324
+ /*
325
+ template <typename DType>
326
+ inline static void cblas_trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
327
+ const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const void* alpha,
328
+ const void* A, const int lda, void* B, const int ldb)
329
+ {
330
+ trmm<DType>(order, side, uplo, ta, diag, m, n, reinterpret_cast<const DType*>(alpha),
331
+ reinterpret_cast<const DType*>(A), lda, reinterpret_cast<DType*>(B), ldb);
332
+ }*/
333
+
334
+
335
+ /*
336
+ * Function signature conversion for calling CBLAS' syrk functions as directly as possible.
337
+ *
338
+ * For documentation: http://www.netlib.org/blas/dsyrk.f
339
+ */
340
+ /*
341
+ template <typename DType>
342
+ inline static void cblas_syrk(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const enum CBLAS_TRANSPOSE trans,
343
+ const int n, const int k, const void* alpha,
344
+ const void* A, const int lda, const void* beta, void* C, const int ldc)
345
+ {
346
+ syrk<DType>(order, uplo, trans, n, k, reinterpret_cast<const DType*>(alpha),
347
+ reinterpret_cast<const DType*>(A), lda, reinterpret_cast<const DType*>(beta), reinterpret_cast<DType*>(C), ldc);
348
+ }*/
349
+
350
+
351
+
352
+
353
+ }} // end of namespace nm::math
354
+
355
+
356
+ extern "C" {
357
+
358
+ ///////////////////
359
+ // Ruby Bindings //
360
+ ///////////////////
361
+
362
+ void nm_math_init_blas() {
363
+
364
+ //cNMatrix_LAPACK = rb_define_module_under(cNMatrix, "LAPACK");
365
+
366
+ /* ATLAS-CLAPACK Functions */
367
+ /*
368
+ rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getrf", (METHOD)nm_clapack_getrf, 5);
369
+ rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potrf", (METHOD)nm_clapack_potrf, 5);
370
+ rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getrs", (METHOD)nm_clapack_getrs, 9);
371
+ rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potrs", (METHOD)nm_clapack_potrs, 8);
372
+ rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getri", (METHOD)nm_clapack_getri, 5);
373
+ rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potri", (METHOD)nm_clapack_potri, 5);
374
+ rb_define_singleton_method(cNMatrix_LAPACK, "clapack_laswp", (METHOD)nm_clapack_laswp, 7);
375
+ rb_define_singleton_method(cNMatrix_LAPACK, "clapack_scal", (METHOD)nm_clapack_scal, 4);
376
+ rb_define_singleton_method(cNMatrix_LAPACK, "clapack_lauum", (METHOD)nm_clapack_lauum, 5);
377
+ */
378
+
379
+ /* Non-ATLAS regular LAPACK Functions called via Fortran interface */
380
+ /*
381
+ rb_define_singleton_method(cNMatrix_LAPACK, "lapack_gesvd", (METHOD)nm_lapack_gesvd, 12);
382
+ rb_define_singleton_method(cNMatrix_LAPACK, "lapack_gesdd", (METHOD)nm_lapack_gesdd, 11);
383
+ rb_define_singleton_method(cNMatrix_LAPACK, "lapack_geev", (METHOD)nm_lapack_geev, 12);
384
+ */
385
+
386
+ cNMatrix_BLAS = rb_define_module_under(cNMatrix, "BLAS");
387
+
388
+ /*
389
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_nrm2", (METHOD)nm_cblas_nrm2, 3);
390
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_asum", (METHOD)nm_cblas_asum, 3);
391
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_rot", (METHOD)nm_cblas_rot, 7);
392
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_rotg", (METHOD)nm_cblas_rotg, 1);
393
+
394
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemm", (METHOD)nm_cblas_gemm, 14);
395
+ */
396
+ //rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11);
397
+
398
+ rb_define_method(cNMatrix_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11);
399
+ /*
400
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_trsm", (METHOD)nm_cblas_trsm, 12);
401
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_trmm", (METHOD)nm_cblas_trmm, 12);
402
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_syrk", (METHOD)nm_cblas_syrk, 11);
403
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_herk", (METHOD)nm_cblas_herk, 11);
404
+ */
405
+ }
406
+
407
+ /*
408
+ * Interprets lapack jobu and jobvt arguments, for which LAPACK needs character values A, S, O, or N.
409
+ *
410
+ * Called by lapack_gesvd -- basically inline. svd stands for singular value decomposition.
411
+ */
412
+ /*
413
+ static inline char lapack_svd_job_sym(VALUE op) {
414
+ if (rb_to_id(op) == rb_intern("all") || rb_to_id(op) == rb_intern("a")) return 'A';
415
+ else if (rb_to_id(op) == rb_intern("return") || rb_to_id(op) == rb_intern("s")) return 'S';
416
+ else if (rb_to_id(op) == rb_intern("overwrite") || rb_to_id(op) == rb_intern("o")) return 'O';
417
+ else if (rb_to_id(op) == rb_intern("none") || rb_to_id(op) == rb_intern("n")) return 'N';
418
+ else rb_raise(rb_eArgError, "Expected :all, :return, :overwrite, :none (or :a, :s, :o, :n, respectively)");
419
+ return 'a';
420
+ }
421
+ */
422
+
423
+
424
+ /*
425
+ * Interprets lapack jobvl and jobvr arguments, for which LAPACK needs character values N or V.
426
+ *
427
+ * Called by lapack_geev -- basically inline. evd stands for eigenvalue decomposition.
428
+ */
429
+ /*
430
+ static inline char lapack_evd_job_sym(VALUE op) {
431
+ if (op == Qfalse || op == Qnil || rb_to_id(op) == rb_intern("n")) return 'N';
432
+ else return 'V';
433
+ }
434
+ */
435
+
436
+
437
+ /* Interprets cblas argument which could be any of false/:no_transpose, :transpose, or :complex_conjugate,
438
+ * into an enum recognized by cblas.
439
+ *
440
+ * Called by nm_cblas_gemm -- basically inline.
441
+ *
442
+ */
443
+ static inline enum CBLAS_TRANSPOSE blas_transpose_sym(VALUE op) {
444
+ if (op == Qfalse || rb_to_id(op) == nm_rb_no_transpose) return CblasNoTrans;
445
+ else if (rb_to_id(op) == nm_rb_transpose) return CblasTrans;
446
+ else if (rb_to_id(op) == nm_rb_complex_conjugate) return CblasConjTrans;
447
+ else rb_raise(rb_eArgError, "Expected false, :transpose, or :complex_conjugate");
448
+ return CblasNoTrans;
449
+ }
450
+
451
+
452
+ /*
453
+ * Interprets cblas argument which could be :left or :right
454
+ *
455
+ * Called by nm_cblas_trsm -- basically inline
456
+ */
457
+ /*
458
+ static inline enum CBLAS_SIDE blas_side_sym(VALUE op) {
459
+ ID op_id = rb_to_id(op);
460
+ if (op_id == nm_rb_left) return CblasLeft;
461
+ if (op_id == nm_rb_right) return CblasRight;
462
+ rb_raise(rb_eArgError, "Expected :left or :right for side argument");
463
+ return CblasLeft;
464
+ }*/
465
+
466
+ /*
467
+ * Interprets cblas argument which could be :upper or :lower
468
+ *
469
+ * Called by nm_cblas_trsm -- basically inline
470
+ */
471
+ /*
472
+ static inline enum CBLAS_UPLO blas_uplo_sym(VALUE op) {
473
+ ID op_id = rb_to_id(op);
474
+ if (op_id == nm_rb_upper) return CblasUpper;
475
+ if (op_id == nm_rb_lower) return CblasLower;
476
+ rb_raise(rb_eArgError, "Expected :upper or :lower for uplo argument");
477
+ return CblasUpper;
478
+ }*/
479
+
480
+
481
+ /*
482
+ * Interprets cblas argument which could be :unit (true) or :nonunit (false or anything other than true/:unit)
483
+ *
484
+ * Called by nm_cblas_trsm -- basically inline
485
+ */
486
+ /*
487
+ static inline enum CBLAS_DIAG blas_diag_sym(VALUE op) {
488
+ if (rb_to_id(op) == nm_rb_unit || op == Qtrue) return CblasUnit;
489
+ return CblasNonUnit;
490
+ }*/
491
+
492
+ /*
493
+ * Interprets cblas argument which could be :row or :col
494
+ */
495
+ /*
496
+ static inline enum CBLAS_ORDER blas_order_sym(VALUE op) {
497
+ if (rb_to_id(op) == rb_intern("row") || rb_to_id(op) == rb_intern("row_major")) return CblasRowMajor;
498
+ else if (rb_to_id(op) == rb_intern("col") || rb_to_id(op) == rb_intern("col_major") ||
499
+ rb_to_id(op) == rb_intern("column") || rb_to_id(op) == rb_intern("column_major")) return CblasColMajor;
500
+ rb_raise(rb_eArgError, "Expected :row or :col for order argument");
501
+ return CblasRowMajor;
502
+ }*/
503
+
504
+
505
+ /*
506
+ * Call any of the cblas_xrotg functions as directly as possible.
507
+ *
508
+ * xROTG computes the elements of a Givens plane rotation matrix such that:
509
+ *
510
+ * | c s | | a | | r |
511
+ * | -s c | * | b | = | 0 |
512
+ *
513
+ * where r = +- sqrt( a**2 + b**2 ) and c**2 + s**2 = 1.
514
+ *
515
+ * The Givens plane rotation can be used to introduce zero elements into a matrix selectively.
516
+ *
517
+ * This function differs from most of the other raw BLAS accessors. Instead of providing a, b, c, s as arguments, you
518
+ * should only provide a and b (the inputs), and you should provide them as a single NVector (or the first two elements
519
+ * of any dense NMatrix or NVector type, specifically).
520
+ *
521
+ * The outputs [c,s] will be returned in a Ruby Array at the end; the input NVector will also be modified in-place.
522
+ *
523
+ * If you provide rationals, be aware that there's a high probability of an error, since rotg includes a square root --
524
+ * and most rationals' square roots are irrational. You're better off converting to Float first.
525
+ *
526
+ * This function, like the other cblas_ functions, does minimal type-checking.
527
+ */
528
+ /*
529
+ static VALUE nm_cblas_rotg(VALUE self, VALUE ab) {
530
+ static void (*ttable[nm::NUM_DTYPES])(void* a, void* b, void* c, void* s) = {
531
+ NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
532
+ nm::math::cblas_rotg<float>,
533
+ nm::math::cblas_rotg<double>,
534
+ nm::math::cblas_rotg<nm::Complex64>,
535
+ nm::math::cblas_rotg<nm::Complex128>,
536
+ NULL, NULL, NULL, // no rationals
537
+ NULL //nm::math::cblas_rotg<nm::RubyObject>
538
+ };
539
+
540
+ nm::dtype_t dtype = NM_DTYPE(ab);
541
+
542
+ if (!ttable[dtype]) {
543
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer and rational vectors");
544
+ return Qnil;
545
+
546
+ } else {
547
+ NM_CONSERVATIVE(nm_register_value(self));
548
+ NM_CONSERVATIVE(nm_register_value(ab));
549
+ void *pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
550
+ *pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
551
+
552
+ // extract A and B from the NVector (first two elements)
553
+ void* pA = NM_STORAGE_DENSE(ab)->elements;
554
+ void* pB = (char*)(NM_STORAGE_DENSE(ab)->elements) + DTYPE_SIZES[dtype];
555
+ // c and s are output
556
+
557
+ ttable[dtype](pA, pB, pC, pS);
558
+
559
+ VALUE result = rb_ary_new2(2);
560
+
561
+ if (dtype == nm::RUBYOBJ) {
562
+ rb_ary_store(result, 0, *reinterpret_cast<VALUE*>(pC));
563
+ rb_ary_store(result, 1, *reinterpret_cast<VALUE*>(pS));
564
+ } else {
565
+ rb_ary_store(result, 0, rubyobj_from_cval(pC, dtype).rval);
566
+ rb_ary_store(result, 1, rubyobj_from_cval(pS, dtype).rval);
567
+ }
568
+ NM_CONSERVATIVE(nm_unregister_value(ab));
569
+ NM_CONSERVATIVE(nm_unregister_value(self));
570
+ return result;
571
+ }
572
+ }*/
573
+
574
+
575
+ /*
576
+ * Call any of the cblas_xrot functions as directly as possible.
577
+ *
578
+ * xROT is a BLAS level 1 routine (taking two vectors) which applies a plane rotation.
579
+ *
580
+ * It's tough to find documentation on xROT. Here are what we think the arguments are for:
581
+ * * n :: number of elements to consider in x and y
582
+ * * x :: a vector (expects an NVector)
583
+ * * incx :: stride of x
584
+ * * y :: a vector (expects an NVector)
585
+ * * incy :: stride of y
586
+ * * c :: cosine of the angle of rotation
587
+ * * s :: sine of the angle of rotation
588
+ *
589
+ * Note that c and s will be the same dtype as x and y, except when x and y are complex. If x and y are complex, c and s
590
+ * will be float for Complex64 or double for Complex128.
591
+ *
592
+ * You probably don't want to call this function. Instead, why don't you try rot, which is more flexible
593
+ * with its arguments?
594
+ *
595
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
596
+ * handling, so you can easily crash Ruby!
597
+ */
598
+ /*
599
+ static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s) {
600
+ static void (*ttable[nm::NUM_DTYPES])(const int N, void*, const int, void*, const int, const void*, const void*) = {
601
+ NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
602
+ nm::math::cblas_rot<float,float>,
603
+ nm::math::cblas_rot<double,double>,
604
+ nm::math::cblas_rot<nm::Complex64,float>,
605
+ nm::math::cblas_rot<nm::Complex128,double>,
606
+ nm::math::cblas_rot<nm::Rational32,nm::Rational32>,
607
+ nm::math::cblas_rot<nm::Rational64,nm::Rational64>,
608
+ nm::math::cblas_rot<nm::Rational128,nm::Rational128>,
609
+ nm::math::cblas_rot<nm::RubyObject,nm::RubyObject>
610
+ };
611
+
612
+ nm::dtype_t dtype = NM_DTYPE(x);
613
+
614
+
615
+ if (!ttable[dtype]) {
616
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
617
+ return Qfalse;
618
+ } else {
619
+ void *pC, *pS;
620
+
621
+ // We need to ensure the cosine and sine arguments are the correct dtype -- which may differ from the actual dtype.
622
+ if (dtype == nm::COMPLEX64) {
623
+ pC = NM_ALLOCA_N(float,1);
624
+ pS = NM_ALLOCA_N(float,1);
625
+ rubyval_to_cval(c, nm::FLOAT32, pC);
626
+ rubyval_to_cval(s, nm::FLOAT32, pS);
627
+ } else if (dtype == nm::COMPLEX128) {
628
+ pC = NM_ALLOCA_N(double,1);
629
+ pS = NM_ALLOCA_N(double,1);
630
+ rubyval_to_cval(c, nm::FLOAT64, pC);
631
+ rubyval_to_cval(s, nm::FLOAT64, pS);
632
+ } else {
633
+ pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
634
+ pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
635
+ rubyval_to_cval(c, dtype, pC);
636
+ rubyval_to_cval(s, dtype, pS);
637
+ }
638
+
639
+
640
+ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), NM_STORAGE_DENSE(y)->elements, FIX2INT(incy), pC, pS);
641
+
642
+ return Qtrue;
643
+ }
644
+ }*/
645
+
646
+
647
+ /*
648
+ * Call any of the cblas_xnrm2 functions as directly as possible.
649
+ *
650
+ * xNRM2 is a BLAS level 1 routine which calculates the 2-norm of an n-vector x.
651
+ *
652
+ * Arguments:
653
+ * * n :: length of x, must be at least 0
654
+ * * x :: pointer to first entry of input vector
655
+ * * incx :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
656
+ *
657
+ * You probably don't want to call this function. Instead, why don't you try nrm2, which is more flexible
658
+ * with its arguments?
659
+ *
660
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
661
+ * handling, so you can easily crash Ruby!
662
+ */
663
+ /*
664
+ static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx) {
665
+
666
+ static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {*/
667
+ /* nm::math::cblas_nrm2<uint8_t,uint8_t>,
668
+ nm::math::cblas_nrm2<int8_t,int8_t>,
669
+ nm::math::cblas_nrm2<int16_t,int16_t>,
670
+ nm::math::cblas_nrm2<int32_t,int32_t>, */
671
+ /*NULL, NULL, NULL, NULL, NULL, // no help for integers
672
+ nm::math::cblas_nrm2<float32_t,float32_t>,
673
+ nm::math::cblas_nrm2<float64_t,float64_t>,
674
+ nm::math::cblas_nrm2<float32_t,nm::Complex64>,
675
+ nm::math::cblas_nrm2<float64_t,nm::Complex128>,
676
+ //nm::math::cblas_nrm2<nm::Rational32,nm::Rational32>,
677
+ //nm::math::cblas_nrm2<nm::Rational64,nm::Rational64>,
678
+ //nm::math::cblas_nrm2<nm::Rational128,nm::Rational128>,
679
+ NULL, NULL, NULL,
680
+ nm::math::cblas_nrm2<nm::RubyObject,nm::RubyObject>
681
+ };
682
+
683
+ nm::dtype_t dtype = NM_DTYPE(x);
684
+
685
+ if (!ttable[dtype]) {
686
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer and rational vectors");
687
+ return Qnil;
688
+
689
+ } else {
690
+ // Determine the return dtype and allocate it
691
+ nm::dtype_t rdtype = dtype;
692
+ if (dtype == nm::COMPLEX64) rdtype = nm::FLOAT32;
693
+ else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;
694
+
695
+ void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);
696
+
697
+ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
698
+
699
+ return rubyobj_from_cval(Result, rdtype).rval;
700
+ }
701
+ }*/
702
+
703
+
704
+
705
+ /*
706
+ * Call any of the cblas_xasum functions as directly as possible.
707
+ *
708
+ * xASUM is a BLAS level 1 routine which calculates the sum of absolute values of the entries
709
+ * of a vector x.
710
+ *
711
+ * Arguments:
712
+ * * n :: length of x, must be at least 0
713
+ * * x :: pointer to first entry of input vector
714
+ * * incx :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
715
+ *
716
+ * You probably don't want to call this function. Instead, why don't you try asum, which is more flexible
717
+ * with its arguments?
718
+ *
719
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
720
+ * handling, so you can easily crash Ruby!
721
+ */
722
+ /*
723
+ static VALUE nm_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx) {
724
+
725
+ static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
726
+ nm::math::cblas_asum<uint8_t,uint8_t>,
727
+ nm::math::cblas_asum<int8_t,int8_t>,
728
+ nm::math::cblas_asum<int16_t,int16_t>,
729
+ nm::math::cblas_asum<int32_t,int32_t>,
730
+ nm::math::cblas_asum<int64_t,int64_t>,
731
+ nm::math::cblas_asum<float32_t,float32_t>,
732
+ nm::math::cblas_asum<float64_t,float64_t>,
733
+ nm::math::cblas_asum<float32_t,nm::Complex64>,
734
+ nm::math::cblas_asum<float64_t,nm::Complex128>,
735
+ nm::math::cblas_asum<nm::Rational32,nm::Rational32>,
736
+ nm::math::cblas_asum<nm::Rational64,nm::Rational64>,
737
+ nm::math::cblas_asum<nm::Rational128,nm::Rational128>,
738
+ nm::math::cblas_asum<nm::RubyObject,nm::RubyObject>
739
+ };
740
+
741
+ nm::dtype_t dtype = NM_DTYPE(x);
742
+
743
+ // Determine the return dtype and allocate it
744
+ nm::dtype_t rdtype = dtype;
745
+ if (dtype == nm::COMPLEX64) rdtype = nm::FLOAT32;
746
+ else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;
747
+
748
+ void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);
749
+
750
+ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
751
+
752
+ return rubyobj_from_cval(Result, rdtype).rval;
753
+ }*/
754
+
755
+
756
+ /* Call any of the cblas_xgemm functions as directly as possible.
757
+ *
758
+ * The cblas_xgemm functions (dgemm, sgemm, cgemm, and zgemm) define the following operation:
759
+ *
760
+ * C = alpha*op(A)*op(B) + beta*C
761
+ *
762
+ * where op(X) is one of <tt>op(X) = X</tt>, <tt>op(X) = X**T</tt>, or the complex conjugate of X.
763
+ *
764
+ * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
765
+ * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
766
+ * expose the ultra-optimized ATLAS versions.
767
+ *
768
+ * == Arguments
769
+ * See: http://www.netlib.org/blas/dgemm.f
770
+ *
771
+ * You probably don't want to call this function. Instead, why don't you try gemm, which is more flexible
772
+ * with its arguments?
773
+ *
774
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
775
+ * handling, so you can easily crash Ruby!
776
+ */
777
+ /*
778
+ static VALUE nm_cblas_gemm(VALUE self,
779
+ VALUE order,
780
+ VALUE trans_a, VALUE trans_b,
781
+ VALUE m, VALUE n, VALUE k,
782
+ VALUE alpha,
783
+ VALUE a, VALUE lda,
784
+ VALUE b, VALUE ldb,
785
+ VALUE beta,
786
+ VALUE c, VALUE ldc)
787
+ {
788
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemm, void, const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, int m, int n, int k, void* alpha, void* a, int lda, void* b, int ldb, void* beta, void* c, int ldc);
789
+
790
+ nm::dtype_t dtype = NM_DTYPE(a);
791
+
792
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
793
+ *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
794
+ rubyval_to_cval(alpha, dtype, pAlpha);
795
+ rubyval_to_cval(beta, dtype, pBeta);
796
+
797
+ ttable[dtype](blas_order_sym(order), blas_transpose_sym(trans_a), blas_transpose_sym(trans_b), FIX2INT(m), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
798
+
799
+ return c;
800
+ }*/
801
+
802
+
803
+ /* Call any of the cblas_xgemv functions as directly as possible.
804
+ *
805
+ * The cblas_xgemv functions (dgemv, sgemv, cgemv, and zgemv) define the following operation:
806
+ *
807
+ * y = alpha*op(A)*x + beta*y
808
+ *
809
+ * where op(A) is one of <tt>op(A) = A</tt>, <tt>op(A) = A**T</tt>, or the complex conjugate of A.
810
+ *
811
+ * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
812
+ * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
813
+ * expose the ultra-optimized ATLAS versions.
814
+ *
815
+ * == Arguments
816
+ * See: http://www.netlib.org/blas/dgemm.f
817
+ *
818
+ * You probably don't want to call this function. Instead, why don't you try cblas_gemv, which is more flexible
819
+ * with its arguments?
820
+ *
821
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
822
+ * handling, so you can easily crash Ruby!
823
+ */
824
+ static VALUE nm_cblas_gemv(VALUE self,
825
+ VALUE trans_a,
826
+ VALUE m, VALUE n,
827
+ VALUE alpha,
828
+ VALUE a, VALUE lda,
829
+ VALUE x, VALUE incx,
830
+ VALUE beta,
831
+ VALUE y, VALUE incy)
832
+ {
833
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemv, bool, const enum CBLAS_TRANSPOSE, const int, const int, const void*, const void*, const int, const void*, const int, const void*, void*, const int)
834
+
835
+ nm::dtype_t dtype = NM_DTYPE(a);
836
+
837
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
838
+ *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
839
+ rubyval_to_cval(alpha, dtype, pAlpha);
840
+ rubyval_to_cval(beta, dtype, pBeta);
841
+
842
+ return ttable[dtype](blas_transpose_sym(trans_a), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), pBeta, NM_STORAGE_DENSE(y)->elements, FIX2INT(incy)) ? Qtrue : Qfalse;
843
+ }
844
+
845
+
846
+ /*
847
+ static VALUE nm_cblas_trsm(VALUE self,
848
+ VALUE order,
849
+ VALUE side, VALUE uplo,
850
+ VALUE trans_a, VALUE diag,
851
+ VALUE m, VALUE n,
852
+ VALUE alpha,
853
+ VALUE a, VALUE lda,
854
+ VALUE b, VALUE ldb)
855
+ {
856
+ static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_SIDE, const enum CBLAS_UPLO,
857
+ const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
858
+ const int m, const int n, const void* alpha, const void* a,
859
+ const int lda, void* b, const int ldb) = {
860
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
861
+ nm::math::cblas_trsm<float>,
862
+ nm::math::cblas_trsm<double>,
863
+ cblas_ctrsm, cblas_ztrsm, // call directly, same function signature!
864
+ nm::math::cblas_trsm<nm::Rational32>,
865
+ nm::math::cblas_trsm<nm::Rational64>,
866
+ nm::math::cblas_trsm<nm::Rational128>,
867
+ nm::math::cblas_trsm<nm::RubyObject>
868
+ };
869
+
870
+ nm::dtype_t dtype = NM_DTYPE(a);
871
+
872
+ if (!ttable[dtype]) {
873
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
874
+ } else {
875
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
876
+ rubyval_to_cval(alpha, dtype, pAlpha);
877
+
878
+ ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
879
+ }
880
+
881
+ return Qtrue;
882
+ }
883
+
884
+
885
+ static VALUE nm_cblas_trmm(VALUE self,
886
+ VALUE order,
887
+ VALUE side, VALUE uplo,
888
+ VALUE trans_a, VALUE diag,
889
+ VALUE m, VALUE n,
890
+ VALUE alpha,
891
+ VALUE a, VALUE lda,
892
+ VALUE b, VALUE ldb)
893
+ {
894
+ static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER,
895
+ const enum CBLAS_SIDE, const enum CBLAS_UPLO,
896
+ const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
897
+ const int m, const int n, const void* alpha, const void* a,
898
+ const int lda, void* b, const int ldb) = {
899
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
900
+ nm::math::cblas_trmm<float>,
901
+ nm::math::cblas_trmm<double>,
902
+ cblas_ctrmm, cblas_ztrmm // call directly, same function signature!
903
+ */
904
+ /*
905
+ nm::math::cblas_trmm<nm::Rational32>,
906
+ nm::math::cblas_trmm<nm::Rational64>,
907
+ nm::math::cblas_trmm<nm::Rational128>,
908
+ nm::math::cblas_trmm<nm::RubyObject>*/
909
+ /*};
910
+
911
+ nm::dtype_t dtype = NM_DTYPE(a);
912
+
913
+ if (!ttable[dtype]) {
914
+ rb_raise(nm_eDataTypeError, "this matrix operation not yet defined for non-BLAS dtypes");
915
+ } else {
916
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
917
+ rubyval_to_cval(alpha, dtype, pAlpha);
918
+
919
+ ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
920
+ }
921
+
922
+ return b;
923
+ }
924
+
925
+
926
+ static VALUE nm_cblas_syrk(VALUE self,
927
+ VALUE order,
928
+ VALUE uplo,
929
+ VALUE trans,
930
+ VALUE n, VALUE k,
931
+ VALUE alpha,
932
+ VALUE a, VALUE lda,
933
+ VALUE beta,
934
+ VALUE c, VALUE ldc)
935
+ {
936
+ static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE,
937
+ const int n, const int k, const void* alpha, const void* a,
938
+ const int lda, const void* beta, void* c, const int ldc) = {
939
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
940
+ nm::math::cblas_syrk<float>,
941
+ nm::math::cblas_syrk<double>,
942
+ cblas_csyrk, cblas_zsyrk// call directly, same function signature!
943
+ */
944
+ /*nm::math::cblas_trsm<nm::Rational32>,
945
+ nm::math::cblas_trsm<nm::Rational64>,
946
+ nm::math::cblas_trsm<nm::Rational128>,
947
+ nm::math::cblas_trsm<nm::RubyObject>*/
948
+ /* };
949
+
950
+ nm::dtype_t dtype = NM_DTYPE(a);
951
+
952
+ if (!ttable[dtype]) {
953
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
954
+ } else {
955
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
956
+ *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
957
+ rubyval_to_cval(alpha, dtype, pAlpha);
958
+ rubyval_to_cval(beta, dtype, pBeta);
959
+
960
+ ttable[dtype](blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
961
+ }
962
+
963
+ return Qtrue;
964
+ }
965
+
966
+
967
+ static VALUE nm_cblas_herk(VALUE self,
968
+ VALUE order,
969
+ VALUE uplo,
970
+ VALUE trans,
971
+ VALUE n, VALUE k,
972
+ VALUE alpha,
973
+ VALUE a, VALUE lda,
974
+ VALUE beta,
975
+ VALUE c, VALUE ldc)
976
+ {
977
+
978
+ nm::dtype_t dtype = NM_DTYPE(a);
979
+
980
+ if (dtype == nm::COMPLEX64) {
981
+ cblas_cherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
982
+ } else if (dtype == nm::COMPLEX128) {
983
+ cblas_zherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
984
+ } else
985
+ rb_raise(rb_eNotImpError, "this matrix operation undefined for non-complex dtypes");
986
+ return Qtrue;
987
+ }*/
988
+
989
+
990
+ /*
991
+ * Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
992
+ *
993
+ * xGESVD computes the singular value decomposition (SVD) of a real
994
+ * M-by-N matrix A, optionally computing the left and/or right singular
995
+ * vectors. The SVD is written
996
+ *
997
+ * A = U * SIGMA * transpose(V)
998
+ *
999
+ * where SIGMA is an M-by-N matrix which is zero except for its
1000
+ * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
1001
+ * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
1002
+ * are the singular values of A; they are real and non-negative, and
1003
+ * are returned in descending order. The first min(m,n) columns of
1004
+ * U and V are the left and right singular vectors of A.
1005
+ *
1006
+ * Note that the routine returns V**T, not V.
1007
+ */
1008
+ /*
1009
+ static VALUE nm_lapack_gesvd(VALUE self, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lwork) {
1010
+ static int (*gesvd_table[nm::NUM_DTYPES])(char, char, int, int, void* a, int, void* s, void* u, int, void* vt, int, void* work, int, void* rwork) = {
1011
+ NULL, NULL, NULL, NULL, NULL, // no integer ops
1012
+ nm::math::lapack_gesvd<float,float>,
1013
+ nm::math::lapack_gesvd<double,double>,
1014
+ nm::math::lapack_gesvd<nm::Complex64,float>,
1015
+ nm::math::lapack_gesvd<nm::Complex128,double>,
1016
+ NULL, NULL, NULL, NULL // no rationals or Ruby objects
1017
+ };
1018
+
1019
+ nm::dtype_t dtype = NM_DTYPE(a);
1020
+
1021
+
1022
+ if (!gesvd_table[dtype]) {
1023
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1024
+ return Qfalse;
1025
+ } else {
1026
+ int M = FIX2INT(m),
1027
+ N = FIX2INT(n);
1028
+
1029
+ int min_mn = NM_MIN(M,N);
1030
+ int max_mn = NM_MAX(M,N);
1031
+
1032
+ char JOBU = lapack_svd_job_sym(jobu),
1033
+ JOBVT = lapack_svd_job_sym(jobvt);
1034
+
1035
+ // only need rwork for complex matrices
1036
+ int rwork_size = (dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128) ? 5 * min_mn : 0;
1037
+ void* rwork = rwork_size > 0 ? NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size) : NULL;
1038
+ int work_size = FIX2INT(lwork);
1039
+
1040
+ // ignore user argument for lwork if it's too small.
1041
+ work_size = NM_MAX((dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128 ? 2 * min_mn + max_mn : NM_MAX(3*min_mn + max_mn, 5*min_mn)), work_size);
1042
+ void* work = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);
1043
+
1044
+ int info = gesvd_table[dtype](JOBU, JOBVT, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
1045
+ NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
1046
+ work, work_size, rwork);
1047
+ return INT2FIX(info);
1048
+ }
1049
+ }*/
1050
+
1051
+ /*
1052
+ * Function signature conversion for calling CBLAS' gesdd functions as directly as possible.
1053
+ *
1054
+ * xGESDD uses a divide-and-conquer strategy to compute the singular value decomposition (SVD) of a real
1055
+ * M-by-N matrix A, optionally computing the left and/or right singular
1056
+ * vectors. The SVD is written
1057
+ *
1058
+ * A = U * SIGMA * transpose(V)
1059
+ *
1060
+ * where SIGMA is an M-by-N matrix which is zero except for its
1061
+ * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
1062
+ * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
1063
+ * are the singular values of A; they are real and non-negative, and
1064
+ * are returned in descending order. The first min(m,n) columns of
1065
+ * U and V are the left and right singular vectors of A.
1066
+ *
1067
+ * Note that the routine returns V**T, not V.
1068
+ */
1069
+ /*
1070
+ static VALUE nm_lapack_gesdd(VALUE self, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lwork) {
1071
+ static int (*gesdd_table[nm::NUM_DTYPES])(char, int, int, void* a, int, void* s, void* u, int, void* vt, int, void* work, int, int* iwork, void* rwork) = {
1072
+ NULL, NULL, NULL, NULL, NULL, // no integer ops
1073
+ nm::math::lapack_gesdd<float,float>,
1074
+ nm::math::lapack_gesdd<double,double>,
1075
+ nm::math::lapack_gesdd<nm::Complex64,float>,
1076
+ nm::math::lapack_gesdd<nm::Complex128,double>,
1077
+ NULL, NULL, NULL, NULL // no rationals or Ruby objects
1078
+ };
1079
+
1080
+ nm::dtype_t dtype = NM_DTYPE(a);
1081
+
1082
+ if (!gesdd_table[dtype]) {
1083
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1084
+ return Qfalse;
1085
+ } else {
1086
+ int M = FIX2INT(m),
1087
+ N = FIX2INT(n);
1088
+
1089
+ int min_mn = NM_MIN(M,N);
1090
+ int max_mn = NM_MAX(M,N);
1091
+
1092
+ char JOBZ = lapack_svd_job_sym(jobz);
1093
+
1094
+ // only need rwork for complex matrices
1095
+ void* rwork = NULL;
1096
+
1097
+ int work_size = FIX2INT(lwork); // Make sure we allocate enough work, regardless of the user request.
1098
+ if (dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128) {
1099
+ int rwork_size = min_mn * (JOBZ == 'N' ? 5 : NM_MAX(5*min_mn + 7, 2*max_mn + 2*min_mn + 1));
1100
+ rwork = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size);
1101
+
1102
+ if (JOBZ == 'N') work_size = NM_MAX(work_size, 3*min_mn + NM_MAX(max_mn, 6*min_mn));
1103
+ else if (JOBZ == 'O') work_size = NM_MAX(work_size, 3*min_mn*min_mn + NM_MAX(max_mn, 5*min_mn*min_mn + 4*min_mn));
1104
+ else work_size = NM_MAX(work_size, 3*min_mn*min_mn + NM_MAX(max_mn, 4*min_mn*min_mn + 4*min_mn));
1105
+ } else {
1106
+ if (JOBZ == 'N') work_size = NM_MAX(work_size, 2*min_mn + max_mn);
1107
+ else if (JOBZ == 'O') work_size = NM_MAX(work_size, 2*min_mn*min_mn + max_mn + 2*min_mn);
1108
+ else work_size = NM_MAX(work_size, min_mn*min_mn + max_mn + 2*min_mn);
1109
+ }
1110
+ void* work = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);
1111
+ int* iwork = NM_ALLOCA_N(int, 8*min_mn);
1112
+
1113
+ int info = gesdd_table[dtype](JOBZ, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
1114
+ NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
1115
+ work, work_size, iwork, rwork);
1116
+ return INT2FIX(info);
1117
+ }
1118
+ }*/
1119
+
1120
+
1121
+ /*
1122
+ * Function signature conversion for calling CBLAS' geev functions as directly as possible.
1123
+ *
1124
+ * GEEV computes for an N-by-N real nonsymmetric matrix A, the
1125
+ * eigenvalues and, optionally, the left and/or right eigenvectors.
1126
+ *
1127
+ * The right eigenvector v(j) of A satisfies
1128
+ * A * v(j) = lambda(j) * v(j)
1129
+ * where lambda(j) is its eigenvalue.
1130
+ *
1131
+ * The left eigenvector u(j) of A satisfies
1132
+ * u(j)**H * A = lambda(j) * u(j)**H
1133
+ * where u(j)**H denotes the conjugate transpose of u(j).
1134
+ *
1135
+ * The computed eigenvectors are normalized to have Euclidean norm
1136
+ * equal to 1 and largest component real.
1137
+ */
1138
+ /*
1139
+ static VALUE nm_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr, VALUE lwork) {
1140
+ static int (*geev_table[nm::NUM_DTYPES])(char, char, int, void* a, int, void* w, void* wi, void* vl, int, void* vr, int, void* work, int, void* rwork) = {
1141
+ NULL, NULL, NULL, NULL, NULL, // no integer ops
1142
+ nm::math::lapack_geev<float,float>,
1143
+ nm::math::lapack_geev<double,double>,
1144
+ nm::math::lapack_geev<nm::Complex64,float>,
1145
+ nm::math::lapack_geev<nm::Complex128,double>,
1146
+ NULL, NULL, NULL, NULL // no rationals or Ruby objects
1147
+ };
1148
+
1149
+ nm::dtype_t dtype = NM_DTYPE(a);
1150
+
1151
+
1152
+ if (!geev_table[dtype]) {
1153
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1154
+ return Qfalse;
1155
+ } else {
1156
+ int N = FIX2INT(n);
1157
+
1158
+ char JOBVL = lapack_evd_job_sym(compute_left),
1159
+ JOBVR = lapack_evd_job_sym(compute_right);
1160
+
1161
+ void* A = NM_STORAGE_DENSE(a)->elements;
1162
+ void* WR = NM_STORAGE_DENSE(w)->elements;
1163
+ void* WI = wi == Qnil ? NULL : NM_STORAGE_DENSE(wi)->elements;
1164
+ void* VL = NM_STORAGE_DENSE(vl)->elements;
1165
+ void* VR = NM_STORAGE_DENSE(vr)->elements;
1166
+
1167
+ // only need rwork for complex matrices (wi == Qnil for complex)
1168
+ int rwork_size = dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128 ? N * DTYPE_SIZES[dtype] : 0; // 2*N*floattype for complex only, otherwise 0
1169
+ void* rwork = rwork_size > 0 ? NM_ALLOCA_N(char, rwork_size) : NULL;
1170
+ int work_size = FIX2INT(lwork);
1171
+ void* work;
1172
+
1173
+ int info;
1174
+
1175
+ // if work size is 0 or -1, query.
1176
+ if (work_size <= 0) {
1177
+ work_size = -1;
1178
+ work = NM_ALLOC_N(char, DTYPE_SIZES[dtype]); //2*N * DTYPE_SIZES[dtype]);
1179
+ info = geev_table[dtype](JOBVL, JOBVR, N, A, FIX2INT(lda), WR, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr), work, work_size, rwork);
1180
+ work_size = (int)(dtype == nm::COMPLEX64 || dtype == nm::FLOAT32 ? reinterpret_cast<float*>(work)[0] : reinterpret_cast<double*>(work)[0]);
1181
+ // line above is basically: work_size = (int)(work[0]); // now have new work_size
1182
+ NM_FREE(work);
1183
+ if (info == 0)
1184
+ rb_warn("geev: calculated optimal lwork of %d; to eliminate this message, use a positive value for lwork (at least 2*shape[i])", work_size);
1185
+ else return INT2FIX(info); // error of some kind on query!
1186
+ }
1187
+
1188
+ // if work size is < 2*N, just set it to 2*N
1189
+ if (work_size < 2*N) work_size = 2*N;
1190
+ if (work_size < 3*N && (dtype == nm::FLOAT32 || dtype == nm::FLOAT64)) {
1191
+ work_size = JOBVL == 'V' || JOBVR == 'V' ? 4*N : 3*N;
1192
+ }
1193
+
1194
+ // Allocate work array for actual run
1195
+ work = NM_ALLOCA_N(char, work_size * DTYPE_SIZES[dtype]);
1196
+
1197
+ // Perform the actual calculation.
1198
+ info = geev_table[dtype](JOBVL, JOBVR, N, A, FIX2INT(lda), WR, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr), work, work_size, rwork);
1199
+
1200
+ return INT2FIX(info);
1201
+ }
1202
+ }*/
1203
+
1204
+
1205
+ /*
1206
+ * Based on LAPACK's dscal function, but for any dtype.
1207
+ *
1208
+ * In-place modification; returns the modified vector as well.
1209
+ */
1210
+ /*
1211
+ static VALUE nm_clapack_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx) {
1212
+ nm::dtype_t dtype = NM_DTYPE(vector);
1213
+
1214
+ void* da = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
1215
+ rubyval_to_cval(scale, dtype, da);
1216
+
1217
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::clapack_scal, void, const int n, const void* da, void* dx, const int incx);
1218
+
1219
+ ttable[dtype](FIX2INT(n), da, NM_STORAGE_DENSE(vector)->elements, FIX2INT(incx));
1220
+
1221
+ return vector;
1222
+ }
1223
+
1224
+
1225
+ static VALUE nm_clapack_lauum(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
1226
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {*/
1227
+ /*nm::math::clapack_lauum<uint8_t, false>,
1228
+ nm::math::clapack_lauum<int8_t, false>,
1229
+ nm::math::clapack_lauum<int16_t, false>,
1230
+ nm::math::clapack_lauum<uint32_t, false>,
1231
+ nm::math::clapack_lauum<uint64_t, false>,*/
1232
+ /*NULL, NULL, NULL, NULL, NULL,
1233
+ nm::math::clapack_lauum<false, float>,
1234
+ nm::math::clapack_lauum<false, double>,
1235
+ #ifdef HAVE_CLAPACK_H
1236
+ clapack_clauum, clapack_zlauum, // call directly, same function signature!
1237
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1238
+ nm::math::clapack_lauum<true, nm::Complex64>,
1239
+ nm::math::clapack_lauum<true, nm::Complex128>,
1240
+ #endif*/
1241
+ /*
1242
+ nm::math::clapack_lauum<nm::Rational32, false>,
1243
+ nm::math::clapack_lauum<nm::Rational64, false>,
1244
+ nm::math::clapack_lauum<nm::Rational128, false>,
1245
+ nm::math::clapack_lauum<nm::RubyObject, false>
1246
+
1247
+ */
1248
+ /*};
1249
+
1250
+ if (!ttable[NM_DTYPE(a)]) {
1251
+ rb_raise(rb_eNotImpError, "does not yet work for non-BLAS dtypes (needs herk, syrk, trmm)");
1252
+ } else {
1253
+ // Call either our version of lauum or the LAPACK version.
1254
+ ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
1255
+ }
1256
+
1257
+ return a;
1258
+ }*/
1259
+
1260
+
1261
+ /* Call any of the clapack_xgetrf functions as directly as possible.
1262
+ *
1263
+ * The clapack_getrf functions (dgetrf, sgetrf, cgetrf, and zgetrf) compute an LU factorization of a general M-by-N
1264
+ * matrix A using partial pivoting with row interchanges.
1265
+ *
1266
+ * The factorization has the form:
1267
+ * A = P * L * U
1268
+ * where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n),
1269
+ * and U is upper triangular (upper trapezoidal if m < n).
1270
+ *
1271
+ * This is the right-looking level 3 BLAS version of the algorithm.
1272
+ *
1273
+ * == Arguments
1274
+ * See: http://www.netlib.org/lapack/double/dgetrf.f
1275
+ * (You don't need argument 5; this is the value returned by this function.)
1276
+ *
1277
+ * You probably don't want to call this function. Instead, why don't you try clapack_getrf, which is more flexible
1278
+ * with its arguments?
1279
+ *
1280
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1281
+ * handling, so you can easily crash Ruby!
1282
+ *
1283
+ * Returns an array giving the pivot indices (normally these are argument #5).
1284
+ */
1285
+ /*
1286
+ static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda) {
1287
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int m, const int n, void* a, const int lda, int* ipiv) = {
1288
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1289
+ nm::math::clapack_getrf<float>,
1290
+ nm::math::clapack_getrf<double>,
1291
+ #ifdef HAVE_CLAPACK_H
1292
+ clapack_cgetrf, clapack_zgetrf, // call directly, same function signature!
1293
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1294
+ nm::math::clapack_getrf<nm::Complex64>,
1295
+ nm::math::clapack_getrf<nm::Complex128>,
1296
+ #endif
1297
+ nm::math::clapack_getrf<nm::Rational32>,
1298
+ nm::math::clapack_getrf<nm::Rational64>,
1299
+ nm::math::clapack_getrf<nm::Rational128>,
1300
+ nm::math::clapack_getrf<nm::RubyObject>
1301
+ };
1302
+
1303
+ int M = FIX2INT(m),
1304
+ N = FIX2INT(n);
1305
+
1306
+ // Allocate the pivot index array, which is of size MIN(M, N).
1307
+ size_t ipiv_size = std::min(M,N);
1308
+ int* ipiv = NM_ALLOCA_N(int, ipiv_size);
1309
+
1310
+ if (!ttable[NM_DTYPE(a)]) {
1311
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1312
+ } else {
1313
+ // Call either our version of getrf or the LAPACK version.
1314
+ ttable[NM_DTYPE(a)](blas_order_sym(order), M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv);
1315
+ }
1316
+
1317
+ // Result will be stored in a. We return ipiv as an array.
1318
+ VALUE ipiv_array = rb_ary_new2(ipiv_size);
1319
+ for (size_t i = 0; i < ipiv_size; ++i) {
1320
+ rb_ary_store(ipiv_array, i, INT2FIX(ipiv[i]));
1321
+ }
1322
+
1323
+ return ipiv_array;
1324
+ }*/
1325
+
1326
+
1327
+ /* Call any of the clapack_xpotrf functions as directly as possible.
1328
+ *
1329
+ * You probably don't want to call this function. Instead, why don't you try clapack_potrf, which is more flexible
1330
+ * with its arguments?
1331
+ *
1332
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1333
+ * handling, so you can easily crash Ruby!
1334
+ *
1335
+ * Returns an array giving the pivot indices (normally these are argument #5).
1336
+ */
1337
+ /*
1338
+ static VALUE nm_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
1339
+ #ifndef HAVE_CLAPACK_H
1340
+ rb_raise(rb_eNotImpError, "potrf currently requires CLAPACK");
1341
+ #endif
1342
+
1343
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
1344
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1345
+ nm::math::clapack_potrf<float>,
1346
+ nm::math::clapack_potrf<double>,
1347
+ #ifdef HAVE_CLAPACK_H
1348
+ clapack_cpotrf, clapack_zpotrf, // call directly, same function signature!
1349
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1350
+ nm::math::clapack_potrf<nm::Complex64>,
1351
+ nm::math::clapack_potrf<nm::Complex128>,
1352
+ #endif
1353
+ NULL, NULL, NULL, NULL*/ /*
1354
+ nm::math::clapack_potrf<nm::Rational32>,
1355
+ nm::math::clapack_potrf<nm::Rational64>,
1356
+ nm::math::clapack_potrf<nm::Rational128>,
1357
+ nm::math::clapack_potrf<nm::RubyObject> */
1358
+ /*};
1359
+
1360
+ if (!ttable[NM_DTYPE(a)]) {
1361
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1362
+ // FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
1363
+ //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1364
+ } else {
1365
+ // Call either our version of potrf or the LAPACK version.
1366
+ ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
1367
+ }
1368
+
1369
+ return a;
1370
+ }*/
1371
+
1372
+
1373
+ /*
1374
+ * Call any of the clapack_xgetrs functions as directly as possible.
1375
+ */
1376
+ /*
1377
+ static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb) {
1378
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE Trans, const int N,
1379
+ const int NRHS, const void* A, const int lda, const int* ipiv, void* B,
1380
+ const int ldb) = {
1381
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1382
+ nm::math::clapack_getrs<float>,
1383
+ nm::math::clapack_getrs<double>,
1384
+ #ifdef HAVE_CLAPACK_H
1385
+ clapack_cgetrs, clapack_zgetrs, // call directly, same function signature!
1386
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1387
+ nm::math::clapack_getrs<nm::Complex64>,
1388
+ nm::math::clapack_getrs<nm::Complex128>,
1389
+ #endif
1390
+ nm::math::clapack_getrs<nm::Rational32>,
1391
+ nm::math::clapack_getrs<nm::Rational64>,
1392
+ nm::math::clapack_getrs<nm::Rational128>,
1393
+ nm::math::clapack_getrs<nm::RubyObject>
1394
+ };
1395
+
1396
+ // Allocate the C version of the pivot index array
1397
+ // TODO: Allow for an NVector here also, maybe?
1398
+ int* ipiv_;
1399
+ if (TYPE(ipiv) != T_ARRAY) {
1400
+ rb_raise(rb_eArgError, "ipiv must be of type Array");
1401
+ } else {
1402
+ ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
1403
+ for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
1404
+ ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
1405
+ }
1406
+ }
1407
+
1408
+ if (!ttable[NM_DTYPE(a)]) {
1409
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1410
+ } else {
1411
+
1412
+ // Call either our version of getrs or the LAPACK version.
1413
+ ttable[NM_DTYPE(a)](blas_order_sym(order), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
1414
+ ipiv_, NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
1415
+ }
1416
+
1417
+ // b is both returned and modified directly in the argument list.
1418
+ return b;
1419
+ }*/
1420
+
1421
+
1422
+ /*
1423
+ * Call any of the clapack_xpotrs functions as directly as possible.
1424
+ */
1425
+ /*
1426
+ static VALUE nm_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb) {
1427
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
1428
+ const int NRHS, const void* A, const int lda, void* B, const int ldb) = {
1429
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1430
+ nm::math::clapack_potrs<float,false>,
1431
+ nm::math::clapack_potrs<double,false>,
1432
+ #ifdef HAVE_CLAPACK_H
1433
+ clapack_cpotrs, clapack_zpotrs, // call directly, same function signature!
1434
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1435
+ nm::math::clapack_potrs<nm::Complex64,true>,
1436
+ nm::math::clapack_potrs<nm::Complex128,true>,
1437
+ #endif
1438
+ nm::math::clapack_potrs<nm::Rational32,false>,
1439
+ nm::math::clapack_potrs<nm::Rational64,false>,
1440
+ nm::math::clapack_potrs<nm::Rational128,false>,
1441
+ nm::math::clapack_potrs<nm::RubyObject,false>
1442
+ };
1443
+
1444
+
1445
+ if (!ttable[NM_DTYPE(a)]) {
1446
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1447
+ } else {
1448
+
1449
+ // Call either our version of potrs or the LAPACK version.
1450
+ ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
1451
+ NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
1452
+ }
1453
+
1454
+ // b is both returned and modified directly in the argument list.
1455
+ return b;
1456
+ }*/
1457
+
1458
+
1459
+ /* Call any of the clapack_xgetri functions as directly as possible.
1460
+ *
1461
+ * You probably don't want to call this function. Instead, why don't you try clapack_getri, which is more flexible
1462
+ * with its arguments?
1463
+ *
1464
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1465
+ * handling, so you can easily crash Ruby!
1466
+ *
1467
+ * Returns an array giving the pivot indices (normally these are argument #5).
1468
+ */
1469
+ /*
1470
+ static VALUE nm_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv) {
1471
+ #ifndef HAVE_CLAPACK_H
1472
+ rb_raise(rb_eNotImpError, "getri currently requires CLAPACK");
1473
+ #endif
1474
+
1475
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int n, void* a, const int lda, const int* ipiv) = {
1476
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1477
+ nm::math::clapack_getri<float>,
1478
+ nm::math::clapack_getri<double>,
1479
+ #ifdef HAVE_CLAPACK_H
1480
+ clapack_cgetri, clapack_zgetri, // call directly, same function signature!
1481
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1482
+ nm::math::clapack_getri<nm::Complex64>,
1483
+ nm::math::clapack_getri<nm::Complex128>,
1484
+ #endif
1485
+ NULL, NULL, NULL, NULL*/ /*
1486
+ nm::math::clapack_getri<nm::Rational32>,
1487
+ nm::math::clapack_getri<nm::Rational64>,
1488
+ nm::math::clapack_getri<nm::Rational128>,
1489
+ nm::math::clapack_getri<nm::RubyObject> */
1490
+ /* };
1491
+
1492
+ // Allocate the C version of the pivot index array
1493
+ // TODO: Allow for an NVector here also, maybe?
1494
+ int* ipiv_;
1495
+ if (TYPE(ipiv) != T_ARRAY) {
1496
+ rb_raise(rb_eArgError, "ipiv must be of type Array");
1497
+ } else {
1498
+ ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
1499
+ for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
1500
+ ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
1501
+ }
1502
+ }
1503
+
1504
+ if (!ttable[NM_DTYPE(a)]) {
1505
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1506
+ // FIXME: Once non-BLAS dtypes are implemented, replace error above with the error below.
1507
+ //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1508
+ } else {
1509
+ // Call either our version of getri or the LAPACK version.
1510
+ ttable[NM_DTYPE(a)](blas_order_sym(order), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv_);
1511
+ }
1512
+
1513
+ return a;
1514
+ }*/
1515
+
1516
+
1517
+ /* Call any of the clapack_xpotri functions as directly as possible.
1518
+ *
1519
+ * You probably don't want to call this function. Instead, why don't you try clapack_potri, which is more flexible
1520
+ * with its arguments?
1521
+ *
1522
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1523
+ * handling, so you can easily crash Ruby!
1524
+ *
1525
+ * Returns an array giving the pivot indices (normally these are argument #5).
1526
+ */
1527
+ /*
1528
+ static VALUE nm_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
1529
+ #ifndef HAVE_CLAPACK_H
1530
+ rb_raise(rb_eNotImpError, "getri currently requires CLAPACK");
1531
+ #endif
1532
+
1533
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
1534
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1535
+ nm::math::clapack_potri<float>,
1536
+ nm::math::clapack_potri<double>,
1537
+ #ifdef HAVE_CLAPACK_H
1538
+ clapack_cpotri, clapack_zpotri, // call directly, same function signature!
1539
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1540
+ nm::math::clapack_potri<nm::Complex64>,
1541
+ nm::math::clapack_potri<nm::Complex128>,
1542
+ #endif
1543
+ NULL, NULL, NULL, NULL*/ /*
1544
+ nm::math::clapack_getri<nm::Rational32>,
1545
+ nm::math::clapack_getri<nm::Rational64>,
1546
+ nm::math::clapack_getri<nm::Rational128>,
1547
+ nm::math::clapack_getri<nm::RubyObject> */
1548
+ /*};
1549
+
1550
+ if (!ttable[NM_DTYPE(a)]) {
1551
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1552
+ // FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
1553
+ //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1554
+ } else {
1555
+ // Call either our version of getri or the LAPACK version.
1556
+ ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
1557
+ }
1558
+
1559
+ return a;
1560
+ }*/
1561
+
1562
+
1563
+ /*
1564
+ * Call any of the clapack_xlaswp functions as directly as possible.
1565
+ *
1566
+ * Note that LAPACK's xlaswp functions accept a column-order matrix, but NMatrix uses row-order. Thus, n should be the
1567
+ * number of rows and lda should be the number of columns, no matter what it says in the documentation for dlaswp.f.
1568
+ */
1569
+ /*
1570
+ static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx) {
1571
+ static void (*ttable[nm::NUM_DTYPES])(const int n, void* a, const int lda, const int k1, const int k2, const int* ipiv, const int incx) = {
1572
+ nm::math::clapack_laswp<uint8_t>,
1573
+ nm::math::clapack_laswp<int8_t>,
1574
+ nm::math::clapack_laswp<int16_t>,
1575
+ nm::math::clapack_laswp<int32_t>,
1576
+ nm::math::clapack_laswp<int64_t>,
1577
+ nm::math::clapack_laswp<float>,
1578
+ nm::math::clapack_laswp<double>,
1579
+ //#ifdef HAVE_CLAPACK_H // laswp doesn't actually exist in clapack.h!
1580
+ // clapack_claswp, clapack_zlaswp, // call directly, same function signature!
1581
+ //#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1582
+ nm::math::clapack_laswp<nm::Complex64>,
1583
+ nm::math::clapack_laswp<nm::Complex128>,
1584
+ //#endif
1585
+ nm::math::clapack_laswp<nm::Rational32>,
1586
+ nm::math::clapack_laswp<nm::Rational64>,
1587
+ nm::math::clapack_laswp<nm::Rational128>,
1588
+ nm::math::clapack_laswp<nm::RubyObject>
1589
+ };
1590
+
1591
+ // Allocate the C version of the pivot index array
1592
+ // TODO: Allow for an NVector here also, maybe?
1593
+ int* ipiv_;
1594
+ if (TYPE(ipiv) != T_ARRAY) {
1595
+ rb_raise(rb_eArgError, "ipiv must be of type Array");
1596
+ } else {
1597
+ ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
1598
+ for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
1599
+ ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
1600
+ }
1601
+ }
1602
+
1603
+ // Call either our version of laswp or the LAPACK version.
1604
+ ttable[NM_DTYPE(a)](FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), FIX2INT(k1), FIX2INT(k2), ipiv_, FIX2INT(incx));
1605
+
1606
+ // a is both returned and modified directly in the argument list.
1607
+ return a;
1608
+ }*/
1609
+
1610
+
1611
+ /*
1612
+ * C accessor for calculating an exact determinant.
1613
+ */
1614
+ /*
1615
+ void nm_math_det_exact(const int M, const void* elements, const int lda, nm::dtype_t dtype, void* result) {
1616
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::det_exact, void, const int M, const void* A_elements, const int lda, void* result_arg);
1617
+
1618
+ ttable[dtype](M, elements, lda, result);
1619
+ }*/
1620
+
1621
+
1622
+ /*
1623
+ * Transpose an array of elements that represent a row-major dense matrix. Does not allocate anything, only does an memcpy.
1624
+ */
1625
+ /*
1626
+ void nm_math_transpose_generic(const size_t M, const size_t N, const void* A, const int lda, void* B, const int ldb, size_t element_size) {
1627
+ for (size_t i = 0; i < N; ++i) {
1628
+ for (size_t j = 0; j < M; ++j) {
1629
+
1630
+ memcpy(reinterpret_cast<char*>(B) + (i*ldb+j)*element_size,
1631
+ reinterpret_cast<const char*>(A) + (j*lda+i)*element_size,
1632
+ element_size);
1633
+
1634
+ }
1635
+ }
1636
+ }*/
1637
+
1638
+
1639
+ } // end of extern "C" block