nmatrix-gemv 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +29 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +14 -0
  5. data/Gemfile +7 -0
  6. data/README.md +29 -0
  7. data/Rakefile +225 -0
  8. data/ext/nmatrix_gemv/binary_format.txt +53 -0
  9. data/ext/nmatrix_gemv/data/complex.h +399 -0
  10. data/ext/nmatrix_gemv/data/data.cpp +298 -0
  11. data/ext/nmatrix_gemv/data/data.h +771 -0
  12. data/ext/nmatrix_gemv/data/meta.h +70 -0
  13. data/ext/nmatrix_gemv/data/rational.h +436 -0
  14. data/ext/nmatrix_gemv/data/ruby_object.h +471 -0
  15. data/ext/nmatrix_gemv/extconf.rb +254 -0
  16. data/ext/nmatrix_gemv/math.cpp +1639 -0
  17. data/ext/nmatrix_gemv/math/asum.h +143 -0
  18. data/ext/nmatrix_gemv/math/geev.h +82 -0
  19. data/ext/nmatrix_gemv/math/gemm.h +271 -0
  20. data/ext/nmatrix_gemv/math/gemv.h +212 -0
  21. data/ext/nmatrix_gemv/math/ger.h +96 -0
  22. data/ext/nmatrix_gemv/math/gesdd.h +80 -0
  23. data/ext/nmatrix_gemv/math/gesvd.h +78 -0
  24. data/ext/nmatrix_gemv/math/getf2.h +86 -0
  25. data/ext/nmatrix_gemv/math/getrf.h +240 -0
  26. data/ext/nmatrix_gemv/math/getri.h +108 -0
  27. data/ext/nmatrix_gemv/math/getrs.h +129 -0
  28. data/ext/nmatrix_gemv/math/idamax.h +86 -0
  29. data/ext/nmatrix_gemv/math/inc.h +47 -0
  30. data/ext/nmatrix_gemv/math/laswp.h +165 -0
  31. data/ext/nmatrix_gemv/math/long_dtype.h +52 -0
  32. data/ext/nmatrix_gemv/math/math.h +1069 -0
  33. data/ext/nmatrix_gemv/math/nrm2.h +181 -0
  34. data/ext/nmatrix_gemv/math/potrs.h +129 -0
  35. data/ext/nmatrix_gemv/math/rot.h +141 -0
  36. data/ext/nmatrix_gemv/math/rotg.h +115 -0
  37. data/ext/nmatrix_gemv/math/scal.h +73 -0
  38. data/ext/nmatrix_gemv/math/swap.h +73 -0
  39. data/ext/nmatrix_gemv/math/trsm.h +387 -0
  40. data/ext/nmatrix_gemv/nm_memory.h +60 -0
  41. data/ext/nmatrix_gemv/nmatrix_gemv.cpp +90 -0
  42. data/ext/nmatrix_gemv/nmatrix_gemv.h +374 -0
  43. data/ext/nmatrix_gemv/ruby_constants.cpp +153 -0
  44. data/ext/nmatrix_gemv/ruby_constants.h +107 -0
  45. data/ext/nmatrix_gemv/ruby_nmatrix.c +84 -0
  46. data/ext/nmatrix_gemv/ttable_helper.rb +122 -0
  47. data/ext/nmatrix_gemv/types.h +54 -0
  48. data/ext/nmatrix_gemv/util/util.h +78 -0
  49. data/lib/nmatrix-gemv.rb +43 -0
  50. data/lib/nmatrix_gemv/blas.rb +85 -0
  51. data/lib/nmatrix_gemv/nmatrix_gemv.rb +35 -0
  52. data/lib/nmatrix_gemv/rspec.rb +75 -0
  53. data/nmatrix-gemv.gemspec +31 -0
  54. data/spec/blas_spec.rb +154 -0
  55. data/spec/spec_helper.rb +128 -0
  56. metadata +186 -0
@@ -0,0 +1,254 @@
1
+ # = NMatrix
2
+ #
3
+ # A linear algebra library for scientific computation in Ruby.
4
+ # NMatrix is part of SciRuby.
5
+ #
6
+ # NMatrix was originally inspired by and derived from NArray, by
7
+ # Masahiro Tanaka: http://narray.rubyforge.org
8
+ #
9
+ # == Copyright Information
10
+ #
11
+ # SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
12
+ # NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
13
+ #
14
+ # Please see LICENSE.txt for additional copyright notices.
15
+ #
16
+ # == Contributing
17
+ #
18
+ # By contributing source code to SciRuby, you agree to be bound by
19
+ # our Contributor Agreement:
20
+ #
21
+ # * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
22
+ #
23
+ # == extconf.rb
24
+ #
25
+ # This file checks for ATLAS and other necessary headers, and
26
+ # generates a Makefile for compiling NMatrix.
27
+
28
+ require "mkmf"
29
+
30
+
31
+ # Function derived from NArray's extconf.rb.
32
+ def have_type(type, header=nil) #:nodoc:
33
+ printf "checking for %s... ", type
34
+ STDOUT.flush
35
+
36
+ src = <<"SRC"
37
+ #include <ruby.h>
38
+ SRC
39
+
40
+
41
+ src << <<"SRC" unless header.nil?
42
+ #include <#{header}>
43
+ SRC
44
+
45
+ r = try_link(src + <<"SRC")
46
+ int main() { return 0; }
47
+ int t() { #{type} a; return 0; }
48
+ SRC
49
+
50
+ unless r
51
+ print "no\n"
52
+ return false
53
+ end
54
+
55
+ $defs.push(format("-DHAVE_%s", type.upcase))
56
+
57
+ print "yes\n"
58
+
59
+ return true
60
+ end
61
+
62
+ # Function derived from NArray's extconf.rb.
63
+ def create_conf_h(file) #:nodoc:
64
+ print "creating #{file}\n"
65
+ File.open(file, 'w') do |hfile|
66
+ header_guard = file.upcase.sub(/\s|\./, '_')
67
+
68
+ hfile.puts "#ifndef #{header_guard}"
69
+ hfile.puts "#define #{header_guard}"
70
+ hfile.puts
71
+
72
+ # FIXME: Find a better way to do this:
73
+ hfile.puts "#define RUBY_2 1" if RUBY_VERSION >= '2.0'
74
+ hfile.puts "#define OLD_RB_SCAN_ARGS" if RUBY_VERSION < '1.9.3'
75
+
76
+ for line in $defs
77
+ line =~ /^-D(.*)/
78
+ hfile.printf "#define %s 1\n", $1
79
+ end
80
+
81
+ hfile.puts
82
+ hfile.puts "#endif"
83
+ end
84
+ end
85
+
86
+ if RUBY_VERSION < '1.9'
87
+ raise(NotImplementedError, "Sorry, you need at least Ruby 1.9!")
88
+ else
89
+ $INSTALLFILES = [['nmatrix_gemv.h', '$(archdir)'], ['nmatrix_gemv.hpp', '$(archdir)'], ['nmatrix_gemv_config.h', '$(archdir)']]
90
+ if /cygwin|mingw/ =~ RUBY_PLATFORM
91
+ $INSTALLFILES << ['libnmatrix_gemv.a', '$(archdir)']
92
+ end
93
+ end
94
+
95
+ if /cygwin|mingw/ =~ RUBY_PLATFORM
96
+ CONFIG["DLDFLAGS"] << " --output-lib libnmatrix_gemv.a"
97
+ end
98
+
99
+ $DEBUG = true
100
+ $CFLAGS = ["-Wall -Werror=return-type",$CFLAGS].join(" ")
101
+ $CXXFLAGS = ["-Wall -Werror=return-type",$CXXFLAGS].join(" ")
102
+ $CPPFLAGS = ["-Wall -Werror=return-type",$CPPFLAGS].join(" ")
103
+
104
+ # When adding objects here, make sure their directories are included in CLEANOBJS down at the bottom of extconf.rb.
105
+ basenames = %w{nmatrix_gemv ruby_constants data/data math }
106
+ $objs = basenames.map { |b| "#{b}.o" }
107
+ $srcs = basenames.map { |b| "#{b}.cpp" }
108
+
109
+ #CONFIG['CXX'] = 'clang++'
110
+ CONFIG['CXX'] = 'g++'
111
+
112
+ def find_newer_gplusplus #:nodoc:
113
+ print "checking for apparent GNU g++ binary with C++0x/C++11 support... "
114
+ [9,8,7,6,5,4,3].each do |minor|
115
+ ver = "4.#{minor}"
116
+ gpp = "g++-#{ver}"
117
+ result = `which #{gpp}`
118
+ next if result.empty?
119
+ CONFIG['CXX'] = gpp
120
+ puts ver
121
+ return CONFIG['CXX']
122
+ end
123
+ false
124
+ end
125
+
126
+ def gplusplus_version #:nodoc:
127
+ cxxvar = proc { |n| `#{CONFIG['CXX']} -E -dM - </dev/null | grep #{n}`.chomp.split(' ')[2] }
128
+ major = cxxvar.call('__GNUC__')
129
+ minor = cxxvar.call('__GNUC_MINOR__')
130
+ patch = cxxvar.call('__GNUC_PATCHLEVEL__')
131
+
132
+ raise("unable to determine g++ version (match to get version was nil)") if major.nil? || minor.nil? || patch.nil?
133
+
134
+ "#{major}.#{minor}.#{patch}"
135
+ end
136
+
137
+
138
+ if CONFIG['CXX'] == 'clang++'
139
+ $CPP_STANDARD = 'c++11'
140
+
141
+ else
142
+ version = gplusplus_version
143
+ if version < '4.3.0' && CONFIG['CXX'] == 'g++' # see if we can find a newer G++, unless it's been overridden by user
144
+ if !find_newer_gplusplus
145
+ raise("You need a version of g++ which supports -std=c++0x or -std=c++11. If you're on a Mac and using Homebrew, we recommend using mac-brew-gcc.sh to install a more recent g++.")
146
+ end
147
+ version = gplusplus_version
148
+ end
149
+
150
+ if version < '4.7.0'
151
+ $CPP_STANDARD = 'c++0x'
152
+ else
153
+ $CPP_STANDARD = 'c++11'
154
+ end
155
+ puts "using C++ standard... #{$CPP_STANDARD}"
156
+ puts "g++ reports version... " + `#{CONFIG['CXX']} --version|head -n 1|cut -f 3 -d " "`
157
+ end
158
+
159
+ # add smmp in to get generic transp; remove smmp2 to eliminate funcptr transp
160
+
161
+ # The next line allows the user to supply --with-atlas-dir=/usr/local/atlas,
162
+ # --with-atlas-lib or --with-atlas-include and tell the compiler where to look
163
+ # for ATLAS. The same for all the others
164
+ #
165
+ #dir_config("clapack", ["/usr/local/atlas/include"], [])
166
+ #
167
+ #
168
+
169
+ # Is g++ having trouble finding your header files?
170
+ # Try this:
171
+ # export C_INCLUDE_PATH=/usr/local/atlas/include
172
+ # export CPLUS_INCLUDE_PATH=/usr/local/atlas/include
173
+ # (substituting in the path of your cblas.h and clapack.h for the path I used). -- JW 8/27/12
174
+
175
+ idefaults = {lapack: ["/usr/include/atlas"],
176
+ cblas: ["/usr/local/atlas/include", "/usr/include/atlas"],
177
+ atlas: ["/usr/local/atlas/include", "/usr/include/atlas"]}
178
+
179
+ # For some reason, if we try to look for /usr/lib64/atlas on a Mac OS X Mavericks system, and the directory does not
180
+ # exist, it will give a linker error -- even if the lib dir is already correctly included with -L. So we need to check
181
+ # that Dir.exists?(d) for each.
182
+ ldefaults = {lapack: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) },
183
+ cblas: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) },
184
+ atlas: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) }}
185
+
186
+ if have_library("clapack") # Usually only applies for Mac OS X
187
+ $libs += " -lclapack "
188
+ end
189
+
190
+ unless have_library("lapack")
191
+ dir_config("lapack", idefaults[:lapack], ldefaults[:lapack])
192
+ end
193
+
194
+ unless have_library("cblas")
195
+ dir_config("cblas", idefaults[:cblas], ldefaults[:cblas])
196
+ end
197
+
198
+ unless have_library("atlas")
199
+ dir_config("atlas", idefaults[:atlas], ldefaults[:atlas])
200
+ end
201
+
202
+ # If BLAS and LAPACK headers are in an atlas directory, prefer those. Otherwise,
203
+ # we try our luck with the default location.
204
+ if have_header("atlas/cblas.h")
205
+ have_header("atlas/clapack.h")
206
+ else
207
+ have_header("cblas.h")
208
+ have_header("clapack.h")
209
+ end
210
+
211
+
212
+ have_func("clapack_dgetrf", ["cblas.h", "clapack.h"])
213
+ have_func("clapack_dgetri", ["cblas.h", "clapack.h"])
214
+ have_func("dgesvd_", "clapack.h") # This may not do anything. dgesvd_ seems to be in LAPACK, not CLAPACK.
215
+
216
+ have_func("cblas_dgemm", "cblas.h")
217
+
218
+ #have_func("rb_scan_args", "ruby.h")
219
+
220
+ #find_library("lapack", "clapack_dgetrf")
221
+ #find_library("cblas", "cblas_dgemm")
222
+ #find_library("atlas", "ATL_dgemmNN")
223
+ # Order matters here: ATLAS has to go after LAPACK: http://mail.scipy.org/pipermail/scipy-user/2007-January/010717.html
224
+ $libs += " -llapack -lcblas -latlas "
225
+ #$libs += " -lprofiler "
226
+
227
+
228
+ # For release, these next two should both be changed to -O3.
229
+ $CFLAGS += " -O3 -g" #" -O0 -g "
230
+ #$CFLAGS += " -static -O0 -g "
231
+ $CPPFLAGS += " -O3 -std=#{$CPP_STANDARD} -g" #" -O0 -g -std=#{$CPP_STANDARD} " #-fmax-errors=10 -save-temps
232
+ #$CPPFLAGS += " -static -O0 -g -std=#{$CPP_STANDARD} "
233
+
234
+ CONFIG['warnflags'].gsub!('-Wshorten-64-to-32', '') # doesn't work except in Mac-patched gcc (4.2)
235
+ CONFIG['warnflags'].gsub!('-Wdeclaration-after-statement', '')
236
+ CONFIG['warnflags'].gsub!('-Wimplicit-function-declaration', '')
237
+
238
+ create_conf_h("nmatrix_gemv_config.h")
239
+ create_makefile("nmatrix_gemv")
240
+
241
+ Dir.mkdir("data") unless Dir.exists?("data")
242
+ #Dir.mkdir("util") unless Dir.exists?("util")
243
+ #Dir.mkdir("storage") unless Dir.exists?("storage")
244
+ #Dir.chdir("storage") do
245
+ # Dir.mkdir("yale") unless Dir.exists?("yale")
246
+ # Dir.mkdir("list") unless Dir.exists?("list")
247
+ # Dir.mkdir("dense") unless Dir.exists?("dense")
248
+ #end
249
+
250
+ # to clean up object files in subdirectories:
251
+ open('Makefile', 'a') do |f|
252
+ clean_objs_paths = %w{data}.map { |d| "#{d}/*.#{CONFIG["OBJEXT"]}" }
253
+ f.write("CLEANOBJS := $(CLEANOBJS) #{clean_objs_paths.join(' ')}")
254
+ end
@@ -0,0 +1,1639 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == math.cpp
25
+ //
26
+ // Ruby-exposed BLAS functions.
27
+ //
28
+ // === Procedure for adding LAPACK or CBLAS functions to math.cpp/math.h:
29
+ //
30
+ // This procedure is written as if for a fictional function with double
31
+ // version dbacon, which we'll say is from LAPACK.
32
+ //
33
+ // 1. Write a default templated version which probably returns a boolean.
34
+ // Call it bacon, and put it in math.h.
35
+ //
36
+ // Order will always be row-major, so we don't need to pass that.
37
+ // CBLAS_TRANSPOSE-type arguments, however, should be passed.
38
+ //
39
+ // Otherwise, arguments should look like those in cblas.h or clapack.h:
40
+ //
41
+ // template <typename DType>
42
+ // bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, DType* A, ...) {
43
+ // rb_raise(rb_eNotImpError, "only implemented for ATLAS types (float32, float64, complex64, complex128)");
44
+ // }
45
+ //
46
+ // 2. In math.cpp, add a templated inline static version of the function which takes
47
+ // only void* pointers and uses reinterpret_cast to convert them to the
48
+ // proper dtype.
49
+ //
50
+ // This function may also need to switch m and n if these arguments are given.
51
+ //
52
+ // For an example, see cblas_gemm. This function should do nothing other than cast
53
+ // appropriately. If clapack_dbacon, clapack_sbacon, clapack_cbacon, and clapack_zbacon
54
+ // all take void* only, and no other pointers that vary between functions, you can skip
55
+ // this particular step -- as we can call them directly using a custom function pointer
56
+ // array (same function signature!).
57
+ //
58
+ // This version of the function will be the one exposed through NMatrix::LAPACK. We
59
+ // want it to be as close to the actual LAPACK version of the function as possible,
60
+ // and with as few checks as possible.
61
+ //
62
+ // You will probably need a forward declaration in the extern "C" block.
63
+ //
64
+ // Note: In that case, the function you wrote in Step 1 should also take exactly the
65
+ // same arguments as clapack_xbacon. Otherwise Bad Things will happen.
66
+ //
67
+ // 3. In math.cpp, add inline specialized versions of bacon for the different ATLAS types.
68
+ //
69
+ // You could do this with a macro, if the arguments are all similar (see #define LAPACK_GETRF).
70
+ // Or you may prefer to do it by hand:
71
+ //
72
+ // template <>
73
+ // inline bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, float* A, ...) {
74
+ // clapack_sbacon(trans, M, N, A, ...);
75
+ // return true;
76
+ // }
77
+ //
78
+ // Make sure these functions are in the namespace nm::math.
79
+ //
80
+ // Note that you should do everything in your power here to parse any return values
81
+ // clapack_sbacon may give you. We're not trying very hard in this example, but you might
82
+ // look at getrf to see how it might be done.
83
+ //
84
+ // 4. Expose the function in nm_math_init_blas(), in math.cpp:
85
+ //
86
+ // rb_define_singleton_method(cNMatrix_LAPACK, "clapack_bacon", (METHOD)nm_lapack_bacon, 5);
87
+ //
88
+ // Here, we're telling Ruby that nm_lapack_bacon takes five arguments as a Ruby function.
89
+ //
90
+ // 5. In blas.rb, write a bacon function which accesses clapack_bacon, but does all the
91
+ // sanity checks we left out in step 2.
92
+ //
93
+ // 6. Write tests for NMatrix::LAPACK::getrf, confirming that it works for the ATLAS dtypes.
94
+ //
95
+ // 7. After you get it working properly with ATLAS, download dbacon.f from NETLIB, and use
96
+ // f2c to convert it to C. Clean it up so it's readable. Remove the extra indices -- f2c
97
+ // inserts a lot of unnecessary stuff.
98
+ //
99
+ // Copy and paste the output into the default templated function you wrote in Step 1.
100
+ // Fix it so it works as a template instead of just for doubles.
101
+ //
102
+ // 8. Write tests to confirm that it works for integers, rationals, and Ruby objects.
103
+ //
104
+ // 9. See about adding a Ruby-like interface, such as matrix_matrix_multiply for cblas_gemm,
105
+ // or matrix_vector_multiply for cblas_gemv. This step is not mandatory.
106
+ //
107
+ // 10. Pull request!
108
+
109
+
110
+
111
+ /*
112
+ * Project Includes
113
+ */
114
+
115
+
116
+ #include <algorithm>
117
+ #include <limits>
118
+
119
+
120
+ //#include "math/inc.h"
121
+ #include "data/data.h"
122
+ /*
123
+ #include "math/gesdd.h"
124
+ #include "math/gesvd.h"
125
+ #include "math/geev.h"
126
+ #include "math/swap.h"
127
+ #include "math/idamax.h"
128
+ #include "math/scal.h"
129
+ #include "math/ger.h"
130
+ #include "math/getf2.h"
131
+ #include "math/laswp.h"
132
+ #include "math/trsm.h"
133
+ */
134
+ #include "math/long_dtype.h" // for gemm.h
135
+ //#include "math/gemm.h"
136
+ #include "math/gemv.h"
137
+ /*
138
+ #include "math/asum.h"
139
+ #include "math/nrm2.h"
140
+ #include "math/getrf.h"
141
+ #include "math/getri.h"
142
+ #include "math/getrs.h"
143
+ #include "math/potrs.h"
144
+ #include "math/rot.h"
145
+ #include "math/rotg.h"
146
+ */
147
+ //#include "math/math.h"
148
+ //#include "storage/dense/dense.h"
149
+
150
+ #include "nmatrix_gemv.h"
151
+ #include "ruby_constants.h"
152
+
153
+ /*
154
+ * Forward Declarations
155
+ */
156
+
157
+ extern "C" {
158
+ #if defined HAVE_CLAPACK_H
159
+ #include <clapack.h>
160
+ #elif defined HAVE_ATLAS_CLAPACK_H
161
+ #include <atlas/clapack.h>
162
+ #endif
163
+
164
+ /*
165
+ static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx);
166
+ static VALUE nm_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx);
167
+ static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s);
168
+ static VALUE nm_cblas_rotg(VALUE self, VALUE ab);
169
+
170
+ static VALUE nm_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha,
171
+ VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc);
172
+ */
173
+ static VALUE nm_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
174
+ VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);
175
+ /*
176
+ static VALUE nm_cblas_trsm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
177
+ VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
178
+ static VALUE nm_cblas_trmm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
179
+ VALUE alpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
180
+ static VALUE nm_cblas_herk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
181
+ VALUE lda, VALUE beta, VALUE c, VALUE ldc);
182
+ static VALUE nm_cblas_syrk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
183
+ VALUE lda, VALUE beta, VALUE c, VALUE ldc);
184
+
185
+ static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda);
186
+ static VALUE nm_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
187
+ static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb);
188
+ static VALUE nm_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb);
189
+ static VALUE nm_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv);
190
+ static VALUE nm_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
191
+ static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx);
192
+ static VALUE nm_clapack_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx);
193
+ static VALUE nm_clapack_lauum(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
194
+
195
+ static VALUE nm_lapack_gesvd(VALUE self, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lworkspace_size);
196
+ static VALUE nm_lapack_gesdd(VALUE self, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lworkspace_size);
197
+ static VALUE nm_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr, VALUE lwork);
198
+ */
199
+ } // end of extern "C" block
200
+
201
+ ////////////////////
202
+ // Math Functions //
203
+ ////////////////////
204
+
205
+ namespace nm { namespace math {
206
+
207
+ /*
208
+ * Calculate the determinant for a dense matrix (A [elements]) of size 2 or 3. Return the result.
209
+ */
210
+
211
+ /*
212
+ template <typename DType>
213
+ void det_exact(const int M, const void* A_elements, const int lda, void* result_arg) {
214
+ DType* result = reinterpret_cast<DType*>(result_arg);
215
+ const DType* A = reinterpret_cast<const DType*>(A_elements);
216
+
217
+ typename LongDType<DType>::type x, y;
218
+
219
+ if (M == 2) {
220
+ *result = A[0] * A[lda+1] - A[1] * A[lda];
221
+
222
+ } else if (M == 3) {
223
+ x = A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]; // ei - fh
224
+ y = A[lda] * A[2*lda+2] - A[lda+2] * A[2*lda]; // fg - di
225
+ x = A[0]*x - A[1]*y ; // a*(ei-fh) - b*(fg-di)
226
+
227
+ y = A[lda] * A[2*lda+1] - A[lda+1] * A[2*lda]; // dh - eg
228
+ *result = A[2]*y + x; // c*(dh-eg) + _
229
+ } else if (M < 2) {
230
+ rb_raise(rb_eArgError, "can only calculate exact determinant of a square matrix of size 2 or larger");
231
+ } else {
232
+ rb_raise(rb_eNotImpError, "exact determinant calculation needed for matrices larger than 3x3");
233
+ }
234
+ }*/
235
+
236
+
237
+ /*
238
+ * Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
239
+ */
240
+ /*
241
+ template <typename DType, typename CType>
242
+ inline static int lapack_gesvd(char jobu, char jobvt, int m, int n, void* a, int lda, void* s, void* u, int ldu, void* vt, int ldvt, void* work, int lwork, void* rwork) {
243
+ return gesvd<DType,CType>(jobu, jobvt, m, n, reinterpret_cast<DType*>(a), lda, reinterpret_cast<DType*>(s), reinterpret_cast<DType*>(u), ldu, reinterpret_cast<DType*>(vt), ldvt, reinterpret_cast<DType*>(work), lwork, reinterpret_cast<CType*>(rwork));
244
+ }
245
+ */
246
+
247
+ /*
248
+ * Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
249
+ */
250
+ /*
251
+ template <typename DType, typename CType>
252
+ inline static int lapack_gesdd(char jobz, int m, int n, void* a, int lda, void* s, void* u, int ldu, void* vt, int ldvt, void* work, int lwork, int* iwork, void* rwork) {
253
+ return gesdd<DType,CType>(jobz, m, n, reinterpret_cast<DType*>(a), lda, reinterpret_cast<DType*>(s), reinterpret_cast<DType*>(u), ldu, reinterpret_cast<DType*>(vt), ldvt, reinterpret_cast<DType*>(work), lwork, iwork, reinterpret_cast<CType*>(rwork));
254
+ }
255
+ */
256
+
257
+ /*
258
+ * Function signature conversion for calling CBLAS' gemm functions as directly as possible.
259
+ *
260
+ * For documentation: http://www.netlib.org/blas/dgemm.f
261
+ */
262
+ /*
263
+ template <typename DType>
264
+ inline static void cblas_gemm(const enum CBLAS_ORDER order,
265
+ const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b,
266
+ int m, int n, int k,
267
+ void* alpha,
268
+ void* a, int lda,
269
+ void* b, int ldb,
270
+ void* beta,
271
+ void* c, int ldc)
272
+ {
273
+ gemm<DType>(order, trans_a, trans_b, m, n, k, reinterpret_cast<DType*>(alpha),
274
+ reinterpret_cast<DType*>(a), lda,
275
+ reinterpret_cast<DType*>(b), ldb, reinterpret_cast<DType*>(beta),
276
+ reinterpret_cast<DType*>(c), ldc);
277
+ }*/
278
+
279
+
280
+ /*
281
+ * Function signature conversion for calling CBLAS's gemv functions as directly as possible.
282
+ *
283
+ * For documentation: http://www.netlib.org/lapack/double/dgetrf.f
284
+ */
285
+ template <typename DType>
286
+ inline static bool cblas_gemv(const enum CBLAS_TRANSPOSE trans,
287
+ const int m, const int n,
288
+ const void* alpha,
289
+ const void* a, const int lda,
290
+ const void* x, const int incx,
291
+ const void* beta,
292
+ void* y, const int incy)
293
+ {
294
+ return gemv<DType>(trans,
295
+ m, n, reinterpret_cast<const DType*>(alpha),
296
+ reinterpret_cast<const DType*>(a), lda,
297
+ reinterpret_cast<const DType*>(x), incx, reinterpret_cast<const DType*>(beta),
298
+ reinterpret_cast<DType*>(y), incy);
299
+ }
300
+
301
+
302
+ /*
303
+ * Function signature conversion for calling CBLAS' trsm functions as directly as possible.
304
+ *
305
+ * For documentation: http://www.netlib.org/blas/dtrsm.f
306
+ */
307
+ /*
308
+ template <typename DType>
309
+ inline static void cblas_trsm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
310
+ const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_DIAG diag,
311
+ const int m, const int n, const void* alpha, const void* a,
312
+ const int lda, void* b, const int ldb)
313
+ {
314
+ trsm<DType>(order, side, uplo, trans_a, diag, m, n, *reinterpret_cast<const DType*>(alpha),
315
+ reinterpret_cast<const DType*>(a), lda, reinterpret_cast<DType*>(b), ldb);
316
+ }*/
317
+
318
+
319
+ /*
320
+ * Function signature conversion for calling CBLAS' trmm functions as directly as possible.
321
+ *
322
+ * For documentation: http://www.netlib.org/blas/dtrmm.f
323
+ */
324
+ /*
325
+ template <typename DType>
326
+ inline static void cblas_trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
327
+ const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const void* alpha,
328
+ const void* A, const int lda, void* B, const int ldb)
329
+ {
330
+ trmm<DType>(order, side, uplo, ta, diag, m, n, reinterpret_cast<const DType*>(alpha),
331
+ reinterpret_cast<const DType*>(A), lda, reinterpret_cast<DType*>(B), ldb);
332
+ }*/
333
+
334
+
335
+ /*
336
+ * Function signature conversion for calling CBLAS' syrk functions as directly as possible.
337
+ *
338
+ * For documentation: http://www.netlib.org/blas/dsyrk.f
339
+ */
340
+ /*
341
+ template <typename DType>
342
+ inline static void cblas_syrk(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const enum CBLAS_TRANSPOSE trans,
343
+ const int n, const int k, const void* alpha,
344
+ const void* A, const int lda, const void* beta, void* C, const int ldc)
345
+ {
346
+ syrk<DType>(order, uplo, trans, n, k, reinterpret_cast<const DType*>(alpha),
347
+ reinterpret_cast<const DType*>(A), lda, reinterpret_cast<const DType*>(beta), reinterpret_cast<DType*>(C), ldc);
348
+ }*/
349
+
350
+
351
+
352
+
353
+ }} // end of namespace nm::math
354
+
355
+
356
+ extern "C" {
357
+
358
+ ///////////////////
359
+ // Ruby Bindings //
360
+ ///////////////////
361
+
362
+ void nm_math_init_blas() {
363
+
364
+ //cNMatrix_LAPACK = rb_define_module_under(cNMatrix, "LAPACK");
365
+
366
+ /* ATLAS-CLAPACK Functions */
367
+ /*
368
+ rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getrf", (METHOD)nm_clapack_getrf, 5);
369
+ rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potrf", (METHOD)nm_clapack_potrf, 5);
370
+ rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getrs", (METHOD)nm_clapack_getrs, 9);
371
+ rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potrs", (METHOD)nm_clapack_potrs, 8);
372
+ rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getri", (METHOD)nm_clapack_getri, 5);
373
+ rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potri", (METHOD)nm_clapack_potri, 5);
374
+ rb_define_singleton_method(cNMatrix_LAPACK, "clapack_laswp", (METHOD)nm_clapack_laswp, 7);
375
+ rb_define_singleton_method(cNMatrix_LAPACK, "clapack_scal", (METHOD)nm_clapack_scal, 4);
376
+ rb_define_singleton_method(cNMatrix_LAPACK, "clapack_lauum", (METHOD)nm_clapack_lauum, 5);
377
+ */
378
+
379
+ /* Non-ATLAS regular LAPACK Functions called via Fortran interface */
380
+ /*
381
+ rb_define_singleton_method(cNMatrix_LAPACK, "lapack_gesvd", (METHOD)nm_lapack_gesvd, 12);
382
+ rb_define_singleton_method(cNMatrix_LAPACK, "lapack_gesdd", (METHOD)nm_lapack_gesdd, 11);
383
+ rb_define_singleton_method(cNMatrix_LAPACK, "lapack_geev", (METHOD)nm_lapack_geev, 12);
384
+ */
385
+
386
+ cNMatrix_BLAS = rb_define_module_under(cNMatrix, "BLAS");
387
+
388
+ /*
389
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_nrm2", (METHOD)nm_cblas_nrm2, 3);
390
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_asum", (METHOD)nm_cblas_asum, 3);
391
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_rot", (METHOD)nm_cblas_rot, 7);
392
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_rotg", (METHOD)nm_cblas_rotg, 1);
393
+
394
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemm", (METHOD)nm_cblas_gemm, 14);
395
+ */
396
+ //rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11);
397
+
398
+ rb_define_method(cNMatrix_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11);
399
+ /*
400
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_trsm", (METHOD)nm_cblas_trsm, 12);
401
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_trmm", (METHOD)nm_cblas_trmm, 12);
402
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_syrk", (METHOD)nm_cblas_syrk, 11);
403
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_herk", (METHOD)nm_cblas_herk, 11);
404
+ */
405
+ }
406
+
407
+ /*
408
+ * Interprets lapack jobu and jobvt arguments, for which LAPACK needs character values A, S, O, or N.
409
+ *
410
+ * Called by lapack_gesvd -- basically inline. svd stands for singular value decomposition.
411
+ */
412
+ /*
413
+ static inline char lapack_svd_job_sym(VALUE op) {
414
+ if (rb_to_id(op) == rb_intern("all") || rb_to_id(op) == rb_intern("a")) return 'A';
415
+ else if (rb_to_id(op) == rb_intern("return") || rb_to_id(op) == rb_intern("s")) return 'S';
416
+ else if (rb_to_id(op) == rb_intern("overwrite") || rb_to_id(op) == rb_intern("o")) return 'O';
417
+ else if (rb_to_id(op) == rb_intern("none") || rb_to_id(op) == rb_intern("n")) return 'N';
418
+ else rb_raise(rb_eArgError, "Expected :all, :return, :overwrite, :none (or :a, :s, :o, :n, respectively)");
419
+ return 'a';
420
+ }
421
+ */
422
+
423
+
424
+ /*
425
+ * Interprets lapack jobvl and jobvr arguments, for which LAPACK needs character values N or V.
426
+ *
427
+ * Called by lapack_geev -- basically inline. evd stands for eigenvalue decomposition.
428
+ */
429
+ /*
430
+ static inline char lapack_evd_job_sym(VALUE op) {
431
+ if (op == Qfalse || op == Qnil || rb_to_id(op) == rb_intern("n")) return 'N';
432
+ else return 'V';
433
+ }
434
+ */
435
+
436
+
437
+ /* Interprets cblas argument which could be any of false/:no_transpose, :transpose, or :complex_conjugate,
438
+ * into an enum recognized by cblas.
439
+ *
440
+ * Called by nm_cblas_gemm -- basically inline.
441
+ *
442
+ */
443
+ static inline enum CBLAS_TRANSPOSE blas_transpose_sym(VALUE op) {
444
+ if (op == Qfalse || rb_to_id(op) == nm_rb_no_transpose) return CblasNoTrans;
445
+ else if (rb_to_id(op) == nm_rb_transpose) return CblasTrans;
446
+ else if (rb_to_id(op) == nm_rb_complex_conjugate) return CblasConjTrans;
447
+ else rb_raise(rb_eArgError, "Expected false, :transpose, or :complex_conjugate");
448
+ return CblasNoTrans;
449
+ }
450
+
451
+
452
+ /*
453
+ * Interprets cblas argument which could be :left or :right
454
+ *
455
+ * Called by nm_cblas_trsm -- basically inline
456
+ */
457
+ /*
458
+ static inline enum CBLAS_SIDE blas_side_sym(VALUE op) {
459
+ ID op_id = rb_to_id(op);
460
+ if (op_id == nm_rb_left) return CblasLeft;
461
+ if (op_id == nm_rb_right) return CblasRight;
462
+ rb_raise(rb_eArgError, "Expected :left or :right for side argument");
463
+ return CblasLeft;
464
+ }*/
465
+
466
+ /*
467
+ * Interprets cblas argument which could be :upper or :lower
468
+ *
469
+ * Called by nm_cblas_trsm -- basically inline
470
+ */
471
+ /*
472
+ static inline enum CBLAS_UPLO blas_uplo_sym(VALUE op) {
473
+ ID op_id = rb_to_id(op);
474
+ if (op_id == nm_rb_upper) return CblasUpper;
475
+ if (op_id == nm_rb_lower) return CblasLower;
476
+ rb_raise(rb_eArgError, "Expected :upper or :lower for uplo argument");
477
+ return CblasUpper;
478
+ }*/
479
+
480
+
481
+ /*
482
+ * Interprets cblas argument which could be :unit (true) or :nonunit (false or anything other than true/:unit)
483
+ *
484
+ * Called by nm_cblas_trsm -- basically inline
485
+ */
486
+ /*
487
+ static inline enum CBLAS_DIAG blas_diag_sym(VALUE op) {
488
+ if (rb_to_id(op) == nm_rb_unit || op == Qtrue) return CblasUnit;
489
+ return CblasNonUnit;
490
+ }*/
491
+
492
+ /*
493
+ * Interprets cblas argument which could be :row or :col
494
+ */
495
+ /*
496
+ static inline enum CBLAS_ORDER blas_order_sym(VALUE op) {
497
+ if (rb_to_id(op) == rb_intern("row") || rb_to_id(op) == rb_intern("row_major")) return CblasRowMajor;
498
+ else if (rb_to_id(op) == rb_intern("col") || rb_to_id(op) == rb_intern("col_major") ||
499
+ rb_to_id(op) == rb_intern("column") || rb_to_id(op) == rb_intern("column_major")) return CblasColMajor;
500
+ rb_raise(rb_eArgError, "Expected :row or :col for order argument");
501
+ return CblasRowMajor;
502
+ }*/
503
+
504
+
505
+ /*
506
+ * Call any of the cblas_xrotg functions as directly as possible.
507
+ *
508
+ * xROTG computes the elements of a Givens plane rotation matrix such that:
509
+ *
510
+ * | c s | | a | | r |
511
+ * | -s c | * | b | = | 0 |
512
+ *
513
+ * where r = +- sqrt( a**2 + b**2 ) and c**2 + s**2 = 1.
514
+ *
515
+ * The Givens plane rotation can be used to introduce zero elements into a matrix selectively.
516
+ *
517
+ * This function differs from most of the other raw BLAS accessors. Instead of providing a, b, c, s as arguments, you
518
+ * should only provide a and b (the inputs), and you should provide them as a single NVector (or the first two elements
519
+ * of any dense NMatrix or NVector type, specifically).
520
+ *
521
+ * The outputs [c,s] will be returned in a Ruby Array at the end; the input NVector will also be modified in-place.
522
+ *
523
+ * If you provide rationals, be aware that there's a high probability of an error, since rotg includes a square root --
524
+ * and most rationals' square roots are irrational. You're better off converting to Float first.
525
+ *
526
+ * This function, like the other cblas_ functions, does minimal type-checking.
527
+ */
528
+ /*
529
+ static VALUE nm_cblas_rotg(VALUE self, VALUE ab) {
530
+ static void (*ttable[nm::NUM_DTYPES])(void* a, void* b, void* c, void* s) = {
531
+ NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
532
+ nm::math::cblas_rotg<float>,
533
+ nm::math::cblas_rotg<double>,
534
+ nm::math::cblas_rotg<nm::Complex64>,
535
+ nm::math::cblas_rotg<nm::Complex128>,
536
+ NULL, NULL, NULL, // no rationals
537
+ NULL //nm::math::cblas_rotg<nm::RubyObject>
538
+ };
539
+
540
+ nm::dtype_t dtype = NM_DTYPE(ab);
541
+
542
+ if (!ttable[dtype]) {
543
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer and rational vectors");
544
+ return Qnil;
545
+
546
+ } else {
547
+ NM_CONSERVATIVE(nm_register_value(self));
548
+ NM_CONSERVATIVE(nm_register_value(ab));
549
+ void *pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
550
+ *pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
551
+
552
+ // extract A and B from the NVector (first two elements)
553
+ void* pA = NM_STORAGE_DENSE(ab)->elements;
554
+ void* pB = (char*)(NM_STORAGE_DENSE(ab)->elements) + DTYPE_SIZES[dtype];
555
+ // c and s are output
556
+
557
+ ttable[dtype](pA, pB, pC, pS);
558
+
559
+ VALUE result = rb_ary_new2(2);
560
+
561
+ if (dtype == nm::RUBYOBJ) {
562
+ rb_ary_store(result, 0, *reinterpret_cast<VALUE*>(pC));
563
+ rb_ary_store(result, 1, *reinterpret_cast<VALUE*>(pS));
564
+ } else {
565
+ rb_ary_store(result, 0, rubyobj_from_cval(pC, dtype).rval);
566
+ rb_ary_store(result, 1, rubyobj_from_cval(pS, dtype).rval);
567
+ }
568
+ NM_CONSERVATIVE(nm_unregister_value(ab));
569
+ NM_CONSERVATIVE(nm_unregister_value(self));
570
+ return result;
571
+ }
572
+ }*/
573
+
574
+
575
+ /*
576
+ * Call any of the cblas_xrot functions as directly as possible.
577
+ *
578
+ * xROT is a BLAS level 1 routine (taking two vectors) which applies a plane rotation.
579
+ *
580
+ * It's tough to find documentation on xROT. Here are what we think the arguments are for:
581
+ * * n :: number of elements to consider in x and y
582
+ * * x :: a vector (expects an NVector)
583
+ * * incx :: stride of x
584
+ * * y :: a vector (expects an NVector)
585
+ * * incy :: stride of y
586
+ * * c :: cosine of the angle of rotation
587
+ * * s :: sine of the angle of rotation
588
+ *
589
+ * Note that c and s will be the same dtype as x and y, except when x and y are complex. If x and y are complex, c and s
590
+ * will be float for Complex64 or double for Complex128.
591
+ *
592
+ * You probably don't want to call this function. Instead, why don't you try rot, which is more flexible
593
+ * with its arguments?
594
+ *
595
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
596
+ * handling, so you can easily crash Ruby!
597
+ */
598
+ /*
599
+ static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s) {
600
+ static void (*ttable[nm::NUM_DTYPES])(const int N, void*, const int, void*, const int, const void*, const void*) = {
601
+ NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
602
+ nm::math::cblas_rot<float,float>,
603
+ nm::math::cblas_rot<double,double>,
604
+ nm::math::cblas_rot<nm::Complex64,float>,
605
+ nm::math::cblas_rot<nm::Complex128,double>,
606
+ nm::math::cblas_rot<nm::Rational32,nm::Rational32>,
607
+ nm::math::cblas_rot<nm::Rational64,nm::Rational64>,
608
+ nm::math::cblas_rot<nm::Rational128,nm::Rational128>,
609
+ nm::math::cblas_rot<nm::RubyObject,nm::RubyObject>
610
+ };
611
+
612
+ nm::dtype_t dtype = NM_DTYPE(x);
613
+
614
+
615
+ if (!ttable[dtype]) {
616
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
617
+ return Qfalse;
618
+ } else {
619
+ void *pC, *pS;
620
+
621
+ // We need to ensure the cosine and sine arguments are the correct dtype -- which may differ from the actual dtype.
622
+ if (dtype == nm::COMPLEX64) {
623
+ pC = NM_ALLOCA_N(float,1);
624
+ pS = NM_ALLOCA_N(float,1);
625
+ rubyval_to_cval(c, nm::FLOAT32, pC);
626
+ rubyval_to_cval(s, nm::FLOAT32, pS);
627
+ } else if (dtype == nm::COMPLEX128) {
628
+ pC = NM_ALLOCA_N(double,1);
629
+ pS = NM_ALLOCA_N(double,1);
630
+ rubyval_to_cval(c, nm::FLOAT64, pC);
631
+ rubyval_to_cval(s, nm::FLOAT64, pS);
632
+ } else {
633
+ pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
634
+ pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
635
+ rubyval_to_cval(c, dtype, pC);
636
+ rubyval_to_cval(s, dtype, pS);
637
+ }
638
+
639
+
640
+ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), NM_STORAGE_DENSE(y)->elements, FIX2INT(incy), pC, pS);
641
+
642
+ return Qtrue;
643
+ }
644
+ }*/
645
+
646
+
647
+ /*
648
+ * Call any of the cblas_xnrm2 functions as directly as possible.
649
+ *
650
+ * xNRM2 is a BLAS level 1 routine which calculates the 2-norm of an n-vector x.
651
+ *
652
+ * Arguments:
653
+ * * n :: length of x, must be at least 0
654
+ * * x :: pointer to first entry of input vector
655
+ * * incx :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
656
+ *
657
+ * You probably don't want to call this function. Instead, why don't you try nrm2, which is more flexible
658
+ * with its arguments?
659
+ *
660
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
661
+ * handling, so you can easily crash Ruby!
662
+ */
663
+ /*
664
+ static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx) {
665
+
666
+ static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {*/
667
+ /* nm::math::cblas_nrm2<uint8_t,uint8_t>,
668
+ nm::math::cblas_nrm2<int8_t,int8_t>,
669
+ nm::math::cblas_nrm2<int16_t,int16_t>,
670
+ nm::math::cblas_nrm2<int32_t,int32_t>, */
671
+ /*NULL, NULL, NULL, NULL, NULL, // no help for integers
672
+ nm::math::cblas_nrm2<float32_t,float32_t>,
673
+ nm::math::cblas_nrm2<float64_t,float64_t>,
674
+ nm::math::cblas_nrm2<float32_t,nm::Complex64>,
675
+ nm::math::cblas_nrm2<float64_t,nm::Complex128>,
676
+ //nm::math::cblas_nrm2<nm::Rational32,nm::Rational32>,
677
+ //nm::math::cblas_nrm2<nm::Rational64,nm::Rational64>,
678
+ //nm::math::cblas_nrm2<nm::Rational128,nm::Rational128>,
679
+ NULL, NULL, NULL,
680
+ nm::math::cblas_nrm2<nm::RubyObject,nm::RubyObject>
681
+ };
682
+
683
+ nm::dtype_t dtype = NM_DTYPE(x);
684
+
685
+ if (!ttable[dtype]) {
686
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer and rational vectors");
687
+ return Qnil;
688
+
689
+ } else {
690
+ // Determine the return dtype and allocate it
691
+ nm::dtype_t rdtype = dtype;
692
+ if (dtype == nm::COMPLEX64) rdtype = nm::FLOAT32;
693
+ else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;
694
+
695
+ void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);
696
+
697
+ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
698
+
699
+ return rubyobj_from_cval(Result, rdtype).rval;
700
+ }
701
+ }*/
702
+
703
+
704
+
705
+ /*
706
+ * Call any of the cblas_xasum functions as directly as possible.
707
+ *
708
+ * xASUM is a BLAS level 1 routine which calculates the sum of absolute values of the entries
709
+ * of a vector x.
710
+ *
711
+ * Arguments:
712
+ * * n :: length of x, must be at least 0
713
+ * * x :: pointer to first entry of input vector
714
+ * * incx :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
715
+ *
716
+ * You probably don't want to call this function. Instead, why don't you try asum, which is more flexible
717
+ * with its arguments?
718
+ *
719
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
720
+ * handling, so you can easily crash Ruby!
721
+ */
722
+ /*
723
+ static VALUE nm_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx) {
724
+
725
+ static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
726
+ nm::math::cblas_asum<uint8_t,uint8_t>,
727
+ nm::math::cblas_asum<int8_t,int8_t>,
728
+ nm::math::cblas_asum<int16_t,int16_t>,
729
+ nm::math::cblas_asum<int32_t,int32_t>,
730
+ nm::math::cblas_asum<int64_t,int64_t>,
731
+ nm::math::cblas_asum<float32_t,float32_t>,
732
+ nm::math::cblas_asum<float64_t,float64_t>,
733
+ nm::math::cblas_asum<float32_t,nm::Complex64>,
734
+ nm::math::cblas_asum<float64_t,nm::Complex128>,
735
+ nm::math::cblas_asum<nm::Rational32,nm::Rational32>,
736
+ nm::math::cblas_asum<nm::Rational64,nm::Rational64>,
737
+ nm::math::cblas_asum<nm::Rational128,nm::Rational128>,
738
+ nm::math::cblas_asum<nm::RubyObject,nm::RubyObject>
739
+ };
740
+
741
+ nm::dtype_t dtype = NM_DTYPE(x);
742
+
743
+ // Determine the return dtype and allocate it
744
+ nm::dtype_t rdtype = dtype;
745
+ if (dtype == nm::COMPLEX64) rdtype = nm::FLOAT32;
746
+ else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;
747
+
748
+ void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);
749
+
750
+ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
751
+
752
+ return rubyobj_from_cval(Result, rdtype).rval;
753
+ }*/
754
+
755
+
756
+ /* Call any of the cblas_xgemm functions as directly as possible.
757
+ *
758
+ * The cblas_xgemm functions (dgemm, sgemm, cgemm, and zgemm) define the following operation:
759
+ *
760
+ * C = alpha*op(A)*op(B) + beta*C
761
+ *
762
+ * where op(X) is one of <tt>op(X) = X</tt>, <tt>op(X) = X**T</tt>, or the complex conjugate of X.
763
+ *
764
+ * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
765
+ * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
766
+ * expose the ultra-optimized ATLAS versions.
767
+ *
768
+ * == Arguments
769
+ * See: http://www.netlib.org/blas/dgemm.f
770
+ *
771
+ * You probably don't want to call this function. Instead, why don't you try gemm, which is more flexible
772
+ * with its arguments?
773
+ *
774
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
775
+ * handling, so you can easily crash Ruby!
776
+ */
777
+ /*
778
+ static VALUE nm_cblas_gemm(VALUE self,
779
+ VALUE order,
780
+ VALUE trans_a, VALUE trans_b,
781
+ VALUE m, VALUE n, VALUE k,
782
+ VALUE alpha,
783
+ VALUE a, VALUE lda,
784
+ VALUE b, VALUE ldb,
785
+ VALUE beta,
786
+ VALUE c, VALUE ldc)
787
+ {
788
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemm, void, const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, int m, int n, int k, void* alpha, void* a, int lda, void* b, int ldb, void* beta, void* c, int ldc);
789
+
790
+ nm::dtype_t dtype = NM_DTYPE(a);
791
+
792
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
793
+ *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
794
+ rubyval_to_cval(alpha, dtype, pAlpha);
795
+ rubyval_to_cval(beta, dtype, pBeta);
796
+
797
+ ttable[dtype](blas_order_sym(order), blas_transpose_sym(trans_a), blas_transpose_sym(trans_b), FIX2INT(m), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
798
+
799
+ return c;
800
+ }*/
801
+
802
+
803
+ /* Call any of the cblas_xgemv functions as directly as possible.
804
+ *
805
+ * The cblas_xgemv functions (dgemv, sgemv, cgemv, and zgemv) define the following operation:
806
+ *
807
+ * y = alpha*op(A)*x + beta*y
808
+ *
809
+ * where op(A) is one of <tt>op(A) = A</tt>, <tt>op(A) = A**T</tt>, or the complex conjugate of A.
810
+ *
811
+ * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
812
+ * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
813
+ * expose the ultra-optimized ATLAS versions.
814
+ *
815
+ * == Arguments
816
+ * See: http://www.netlib.org/blas/dgemm.f
817
+ *
818
+ * You probably don't want to call this function. Instead, why don't you try cblas_gemv, which is more flexible
819
+ * with its arguments?
820
+ *
821
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
822
+ * handling, so you can easily crash Ruby!
823
+ */
824
+ static VALUE nm_cblas_gemv(VALUE self,
825
+ VALUE trans_a,
826
+ VALUE m, VALUE n,
827
+ VALUE alpha,
828
+ VALUE a, VALUE lda,
829
+ VALUE x, VALUE incx,
830
+ VALUE beta,
831
+ VALUE y, VALUE incy)
832
+ {
833
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemv, bool, const enum CBLAS_TRANSPOSE, const int, const int, const void*, const void*, const int, const void*, const int, const void*, void*, const int)
834
+
835
+ nm::dtype_t dtype = NM_DTYPE(a);
836
+
837
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
838
+ *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
839
+ rubyval_to_cval(alpha, dtype, pAlpha);
840
+ rubyval_to_cval(beta, dtype, pBeta);
841
+
842
+ return ttable[dtype](blas_transpose_sym(trans_a), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), pBeta, NM_STORAGE_DENSE(y)->elements, FIX2INT(incy)) ? Qtrue : Qfalse;
843
+ }
844
+
845
+
846
+ /*
847
+ static VALUE nm_cblas_trsm(VALUE self,
848
+ VALUE order,
849
+ VALUE side, VALUE uplo,
850
+ VALUE trans_a, VALUE diag,
851
+ VALUE m, VALUE n,
852
+ VALUE alpha,
853
+ VALUE a, VALUE lda,
854
+ VALUE b, VALUE ldb)
855
+ {
856
+ static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_SIDE, const enum CBLAS_UPLO,
857
+ const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
858
+ const int m, const int n, const void* alpha, const void* a,
859
+ const int lda, void* b, const int ldb) = {
860
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
861
+ nm::math::cblas_trsm<float>,
862
+ nm::math::cblas_trsm<double>,
863
+ cblas_ctrsm, cblas_ztrsm, // call directly, same function signature!
864
+ nm::math::cblas_trsm<nm::Rational32>,
865
+ nm::math::cblas_trsm<nm::Rational64>,
866
+ nm::math::cblas_trsm<nm::Rational128>,
867
+ nm::math::cblas_trsm<nm::RubyObject>
868
+ };
869
+
870
+ nm::dtype_t dtype = NM_DTYPE(a);
871
+
872
+ if (!ttable[dtype]) {
873
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
874
+ } else {
875
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
876
+ rubyval_to_cval(alpha, dtype, pAlpha);
877
+
878
+ ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
879
+ }
880
+
881
+ return Qtrue;
882
+ }
883
+
884
+
885
+ static VALUE nm_cblas_trmm(VALUE self,
886
+ VALUE order,
887
+ VALUE side, VALUE uplo,
888
+ VALUE trans_a, VALUE diag,
889
+ VALUE m, VALUE n,
890
+ VALUE alpha,
891
+ VALUE a, VALUE lda,
892
+ VALUE b, VALUE ldb)
893
+ {
894
+ static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER,
895
+ const enum CBLAS_SIDE, const enum CBLAS_UPLO,
896
+ const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
897
+ const int m, const int n, const void* alpha, const void* a,
898
+ const int lda, void* b, const int ldb) = {
899
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
900
+ nm::math::cblas_trmm<float>,
901
+ nm::math::cblas_trmm<double>,
902
+ cblas_ctrmm, cblas_ztrmm // call directly, same function signature!
903
+ */
904
+ /*
905
+ nm::math::cblas_trmm<nm::Rational32>,
906
+ nm::math::cblas_trmm<nm::Rational64>,
907
+ nm::math::cblas_trmm<nm::Rational128>,
908
+ nm::math::cblas_trmm<nm::RubyObject>*/
909
+ /*};
910
+
911
+ nm::dtype_t dtype = NM_DTYPE(a);
912
+
913
+ if (!ttable[dtype]) {
914
+ rb_raise(nm_eDataTypeError, "this matrix operation not yet defined for non-BLAS dtypes");
915
+ } else {
916
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
917
+ rubyval_to_cval(alpha, dtype, pAlpha);
918
+
919
+ ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
920
+ }
921
+
922
+ return b;
923
+ }
924
+
925
+
926
+ static VALUE nm_cblas_syrk(VALUE self,
927
+ VALUE order,
928
+ VALUE uplo,
929
+ VALUE trans,
930
+ VALUE n, VALUE k,
931
+ VALUE alpha,
932
+ VALUE a, VALUE lda,
933
+ VALUE beta,
934
+ VALUE c, VALUE ldc)
935
+ {
936
+ static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE,
937
+ const int n, const int k, const void* alpha, const void* a,
938
+ const int lda, const void* beta, void* c, const int ldc) = {
939
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
940
+ nm::math::cblas_syrk<float>,
941
+ nm::math::cblas_syrk<double>,
942
+ cblas_csyrk, cblas_zsyrk// call directly, same function signature!
943
+ */
944
+ /*nm::math::cblas_trsm<nm::Rational32>,
945
+ nm::math::cblas_trsm<nm::Rational64>,
946
+ nm::math::cblas_trsm<nm::Rational128>,
947
+ nm::math::cblas_trsm<nm::RubyObject>*/
948
+ /* };
949
+
950
+ nm::dtype_t dtype = NM_DTYPE(a);
951
+
952
+ if (!ttable[dtype]) {
953
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
954
+ } else {
955
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
956
+ *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
957
+ rubyval_to_cval(alpha, dtype, pAlpha);
958
+ rubyval_to_cval(beta, dtype, pBeta);
959
+
960
+ ttable[dtype](blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
961
+ }
962
+
963
+ return Qtrue;
964
+ }
965
+
966
+
967
+ static VALUE nm_cblas_herk(VALUE self,
968
+ VALUE order,
969
+ VALUE uplo,
970
+ VALUE trans,
971
+ VALUE n, VALUE k,
972
+ VALUE alpha,
973
+ VALUE a, VALUE lda,
974
+ VALUE beta,
975
+ VALUE c, VALUE ldc)
976
+ {
977
+
978
+ nm::dtype_t dtype = NM_DTYPE(a);
979
+
980
+ if (dtype == nm::COMPLEX64) {
981
+ cblas_cherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
982
+ } else if (dtype == nm::COMPLEX128) {
983
+ cblas_zherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
984
+ } else
985
+ rb_raise(rb_eNotImpError, "this matrix operation undefined for non-complex dtypes");
986
+ return Qtrue;
987
+ }*/
988
+
989
+
990
+ /*
991
+ * Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
992
+ *
993
+ * xGESVD computes the singular value decomposition (SVD) of a real
994
+ * M-by-N matrix A, optionally computing the left and/or right singular
995
+ * vectors. The SVD is written
996
+ *
997
+ * A = U * SIGMA * transpose(V)
998
+ *
999
+ * where SIGMA is an M-by-N matrix which is zero except for its
1000
+ * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
1001
+ * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
1002
+ * are the singular values of A; they are real and non-negative, and
1003
+ * are returned in descending order. The first min(m,n) columns of
1004
+ * U and V are the left and right singular vectors of A.
1005
+ *
1006
+ * Note that the routine returns V**T, not V.
1007
+ */
1008
+ /*
1009
+ static VALUE nm_lapack_gesvd(VALUE self, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lwork) {
1010
+ static int (*gesvd_table[nm::NUM_DTYPES])(char, char, int, int, void* a, int, void* s, void* u, int, void* vt, int, void* work, int, void* rwork) = {
1011
+ NULL, NULL, NULL, NULL, NULL, // no integer ops
1012
+ nm::math::lapack_gesvd<float,float>,
1013
+ nm::math::lapack_gesvd<double,double>,
1014
+ nm::math::lapack_gesvd<nm::Complex64,float>,
1015
+ nm::math::lapack_gesvd<nm::Complex128,double>,
1016
+ NULL, NULL, NULL, NULL // no rationals or Ruby objects
1017
+ };
1018
+
1019
+ nm::dtype_t dtype = NM_DTYPE(a);
1020
+
1021
+
1022
+ if (!gesvd_table[dtype]) {
1023
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1024
+ return Qfalse;
1025
+ } else {
1026
+ int M = FIX2INT(m),
1027
+ N = FIX2INT(n);
1028
+
1029
+ int min_mn = NM_MIN(M,N);
1030
+ int max_mn = NM_MAX(M,N);
1031
+
1032
+ char JOBU = lapack_svd_job_sym(jobu),
1033
+ JOBVT = lapack_svd_job_sym(jobvt);
1034
+
1035
+ // only need rwork for complex matrices
1036
+ int rwork_size = (dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128) ? 5 * min_mn : 0;
1037
+ void* rwork = rwork_size > 0 ? NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size) : NULL;
1038
+ int work_size = FIX2INT(lwork);
1039
+
1040
+ // ignore user argument for lwork if it's too small.
1041
+ work_size = NM_MAX((dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128 ? 2 * min_mn + max_mn : NM_MAX(3*min_mn + max_mn, 5*min_mn)), work_size);
1042
+ void* work = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);
1043
+
1044
+ int info = gesvd_table[dtype](JOBU, JOBVT, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
1045
+ NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
1046
+ work, work_size, rwork);
1047
+ return INT2FIX(info);
1048
+ }
1049
+ }*/
1050
+
1051
+ /*
1052
+ * Function signature conversion for calling CBLAS' gesdd functions as directly as possible.
1053
+ *
1054
+ * xGESDD uses a divide-and-conquer strategy to compute the singular value decomposition (SVD) of a real
1055
+ * M-by-N matrix A, optionally computing the left and/or right singular
1056
+ * vectors. The SVD is written
1057
+ *
1058
+ * A = U * SIGMA * transpose(V)
1059
+ *
1060
+ * where SIGMA is an M-by-N matrix which is zero except for its
1061
+ * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
1062
+ * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
1063
+ * are the singular values of A; they are real and non-negative, and
1064
+ * are returned in descending order. The first min(m,n) columns of
1065
+ * U and V are the left and right singular vectors of A.
1066
+ *
1067
+ * Note that the routine returns V**T, not V.
1068
+ */
1069
+ /*
1070
+ static VALUE nm_lapack_gesdd(VALUE self, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lwork) {
1071
+ static int (*gesdd_table[nm::NUM_DTYPES])(char, int, int, void* a, int, void* s, void* u, int, void* vt, int, void* work, int, int* iwork, void* rwork) = {
1072
+ NULL, NULL, NULL, NULL, NULL, // no integer ops
1073
+ nm::math::lapack_gesdd<float,float>,
1074
+ nm::math::lapack_gesdd<double,double>,
1075
+ nm::math::lapack_gesdd<nm::Complex64,float>,
1076
+ nm::math::lapack_gesdd<nm::Complex128,double>,
1077
+ NULL, NULL, NULL, NULL // no rationals or Ruby objects
1078
+ };
1079
+
1080
+ nm::dtype_t dtype = NM_DTYPE(a);
1081
+
1082
+ if (!gesdd_table[dtype]) {
1083
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1084
+ return Qfalse;
1085
+ } else {
1086
+ int M = FIX2INT(m),
1087
+ N = FIX2INT(n);
1088
+
1089
+ int min_mn = NM_MIN(M,N);
1090
+ int max_mn = NM_MAX(M,N);
1091
+
1092
+ char JOBZ = lapack_svd_job_sym(jobz);
1093
+
1094
+ // only need rwork for complex matrices
1095
+ void* rwork = NULL;
1096
+
1097
+ int work_size = FIX2INT(lwork); // Make sure we allocate enough work, regardless of the user request.
1098
+ if (dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128) {
1099
+ int rwork_size = min_mn * (JOBZ == 'N' ? 5 : NM_MAX(5*min_mn + 7, 2*max_mn + 2*min_mn + 1));
1100
+ rwork = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size);
1101
+
1102
+ if (JOBZ == 'N') work_size = NM_MAX(work_size, 3*min_mn + NM_MAX(max_mn, 6*min_mn));
1103
+ else if (JOBZ == 'O') work_size = NM_MAX(work_size, 3*min_mn*min_mn + NM_MAX(max_mn, 5*min_mn*min_mn + 4*min_mn));
1104
+ else work_size = NM_MAX(work_size, 3*min_mn*min_mn + NM_MAX(max_mn, 4*min_mn*min_mn + 4*min_mn));
1105
+ } else {
1106
+ if (JOBZ == 'N') work_size = NM_MAX(work_size, 2*min_mn + max_mn);
1107
+ else if (JOBZ == 'O') work_size = NM_MAX(work_size, 2*min_mn*min_mn + max_mn + 2*min_mn);
1108
+ else work_size = NM_MAX(work_size, min_mn*min_mn + max_mn + 2*min_mn);
1109
+ }
1110
+ void* work = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);
1111
+ int* iwork = NM_ALLOCA_N(int, 8*min_mn);
1112
+
1113
+ int info = gesdd_table[dtype](JOBZ, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
1114
+ NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
1115
+ work, work_size, iwork, rwork);
1116
+ return INT2FIX(info);
1117
+ }
1118
+ }*/
1119
+
1120
+
1121
+ /*
1122
+ * Function signature conversion for calling CBLAS' geev functions as directly as possible.
1123
+ *
1124
+ * GEEV computes for an N-by-N real nonsymmetric matrix A, the
1125
+ * eigenvalues and, optionally, the left and/or right eigenvectors.
1126
+ *
1127
+ * The right eigenvector v(j) of A satisfies
1128
+ * A * v(j) = lambda(j) * v(j)
1129
+ * where lambda(j) is its eigenvalue.
1130
+ *
1131
+ * The left eigenvector u(j) of A satisfies
1132
+ * u(j)**H * A = lambda(j) * u(j)**H
1133
+ * where u(j)**H denotes the conjugate transpose of u(j).
1134
+ *
1135
+ * The computed eigenvectors are normalized to have Euclidean norm
1136
+ * equal to 1 and largest component real.
1137
+ */
1138
+ /*
1139
+ static VALUE nm_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr, VALUE lwork) {
1140
+ static int (*geev_table[nm::NUM_DTYPES])(char, char, int, void* a, int, void* w, void* wi, void* vl, int, void* vr, int, void* work, int, void* rwork) = {
1141
+ NULL, NULL, NULL, NULL, NULL, // no integer ops
1142
+ nm::math::lapack_geev<float,float>,
1143
+ nm::math::lapack_geev<double,double>,
1144
+ nm::math::lapack_geev<nm::Complex64,float>,
1145
+ nm::math::lapack_geev<nm::Complex128,double>,
1146
+ NULL, NULL, NULL, NULL // no rationals or Ruby objects
1147
+ };
1148
+
1149
+ nm::dtype_t dtype = NM_DTYPE(a);
1150
+
1151
+
1152
+ if (!geev_table[dtype]) {
1153
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1154
+ return Qfalse;
1155
+ } else {
1156
+ int N = FIX2INT(n);
1157
+
1158
+ char JOBVL = lapack_evd_job_sym(compute_left),
1159
+ JOBVR = lapack_evd_job_sym(compute_right);
1160
+
1161
+ void* A = NM_STORAGE_DENSE(a)->elements;
1162
+ void* WR = NM_STORAGE_DENSE(w)->elements;
1163
+ void* WI = wi == Qnil ? NULL : NM_STORAGE_DENSE(wi)->elements;
1164
+ void* VL = NM_STORAGE_DENSE(vl)->elements;
1165
+ void* VR = NM_STORAGE_DENSE(vr)->elements;
1166
+
1167
+ // only need rwork for complex matrices (wi == Qnil for complex)
1168
+ int rwork_size = dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128 ? N * DTYPE_SIZES[dtype] : 0; // 2*N*floattype for complex only, otherwise 0
1169
+ void* rwork = rwork_size > 0 ? NM_ALLOCA_N(char, rwork_size) : NULL;
1170
+ int work_size = FIX2INT(lwork);
1171
+ void* work;
1172
+
1173
+ int info;
1174
+
1175
+ // if work size is 0 or -1, query.
1176
+ if (work_size <= 0) {
1177
+ work_size = -1;
1178
+ work = NM_ALLOC_N(char, DTYPE_SIZES[dtype]); //2*N * DTYPE_SIZES[dtype]);
1179
+ info = geev_table[dtype](JOBVL, JOBVR, N, A, FIX2INT(lda), WR, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr), work, work_size, rwork);
1180
+ work_size = (int)(dtype == nm::COMPLEX64 || dtype == nm::FLOAT32 ? reinterpret_cast<float*>(work)[0] : reinterpret_cast<double*>(work)[0]);
1181
+ // line above is basically: work_size = (int)(work[0]); // now have new work_size
1182
+ NM_FREE(work);
1183
+ if (info == 0)
1184
+ rb_warn("geev: calculated optimal lwork of %d; to eliminate this message, use a positive value for lwork (at least 2*shape[i])", work_size);
1185
+ else return INT2FIX(info); // error of some kind on query!
1186
+ }
1187
+
1188
+ // if work size is < 2*N, just set it to 2*N
1189
+ if (work_size < 2*N) work_size = 2*N;
1190
+ if (work_size < 3*N && (dtype == nm::FLOAT32 || dtype == nm::FLOAT64)) {
1191
+ work_size = JOBVL == 'V' || JOBVR == 'V' ? 4*N : 3*N;
1192
+ }
1193
+
1194
+ // Allocate work array for actual run
1195
+ work = NM_ALLOCA_N(char, work_size * DTYPE_SIZES[dtype]);
1196
+
1197
+ // Perform the actual calculation.
1198
+ info = geev_table[dtype](JOBVL, JOBVR, N, A, FIX2INT(lda), WR, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr), work, work_size, rwork);
1199
+
1200
+ return INT2FIX(info);
1201
+ }
1202
+ }*/
1203
+
1204
+
1205
+ /*
1206
+ * Based on LAPACK's dscal function, but for any dtype.
1207
+ *
1208
+ * In-place modification; returns the modified vector as well.
1209
+ */
1210
+ /*
1211
+ static VALUE nm_clapack_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx) {
1212
+ nm::dtype_t dtype = NM_DTYPE(vector);
1213
+
1214
+ void* da = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
1215
+ rubyval_to_cval(scale, dtype, da);
1216
+
1217
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::clapack_scal, void, const int n, const void* da, void* dx, const int incx);
1218
+
1219
+ ttable[dtype](FIX2INT(n), da, NM_STORAGE_DENSE(vector)->elements, FIX2INT(incx));
1220
+
1221
+ return vector;
1222
+ }
1223
+
1224
+
1225
+ static VALUE nm_clapack_lauum(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
1226
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {*/
1227
+ /*nm::math::clapack_lauum<uint8_t, false>,
1228
+ nm::math::clapack_lauum<int8_t, false>,
1229
+ nm::math::clapack_lauum<int16_t, false>,
1230
+ nm::math::clapack_lauum<uint32_t, false>,
1231
+ nm::math::clapack_lauum<uint64_t, false>,*/
1232
+ /*NULL, NULL, NULL, NULL, NULL,
1233
+ nm::math::clapack_lauum<false, float>,
1234
+ nm::math::clapack_lauum<false, double>,
1235
+ #ifdef HAVE_CLAPACK_H
1236
+ clapack_clauum, clapack_zlauum, // call directly, same function signature!
1237
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1238
+ nm::math::clapack_lauum<true, nm::Complex64>,
1239
+ nm::math::clapack_lauum<true, nm::Complex128>,
1240
+ #endif*/
1241
+ /*
1242
+ nm::math::clapack_lauum<nm::Rational32, false>,
1243
+ nm::math::clapack_lauum<nm::Rational64, false>,
1244
+ nm::math::clapack_lauum<nm::Rational128, false>,
1245
+ nm::math::clapack_lauum<nm::RubyObject, false>
1246
+
1247
+ */
1248
+ /*};
1249
+
1250
+ if (!ttable[NM_DTYPE(a)]) {
1251
+ rb_raise(rb_eNotImpError, "does not yet work for non-BLAS dtypes (needs herk, syrk, trmm)");
1252
+ } else {
1253
+ // Call either our version of lauum or the LAPACK version.
1254
+ ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
1255
+ }
1256
+
1257
+ return a;
1258
+ }*/
1259
+
1260
+
1261
+ /* Call any of the clapack_xgetrf functions as directly as possible.
1262
+ *
1263
+ * The clapack_getrf functions (dgetrf, sgetrf, cgetrf, and zgetrf) compute an LU factorization of a general M-by-N
1264
+ * matrix A using partial pivoting with row interchanges.
1265
+ *
1266
+ * The factorization has the form:
1267
+ * A = P * L * U
1268
+ * where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n),
1269
+ * and U is upper triangular (upper trapezoidal if m < n).
1270
+ *
1271
+ * This is the right-looking level 3 BLAS version of the algorithm.
1272
+ *
1273
+ * == Arguments
1274
+ * See: http://www.netlib.org/lapack/double/dgetrf.f
1275
+ * (You don't need argument 5; this is the value returned by this function.)
1276
+ *
1277
+ * You probably don't want to call this function. Instead, why don't you try clapack_getrf, which is more flexible
1278
+ * with its arguments?
1279
+ *
1280
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1281
+ * handling, so you can easily crash Ruby!
1282
+ *
1283
+ * Returns an array giving the pivot indices (normally these are argument #5).
1284
+ */
1285
+ /*
1286
+ static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda) {
1287
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int m, const int n, void* a, const int lda, int* ipiv) = {
1288
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1289
+ nm::math::clapack_getrf<float>,
1290
+ nm::math::clapack_getrf<double>,
1291
+ #ifdef HAVE_CLAPACK_H
1292
+ clapack_cgetrf, clapack_zgetrf, // call directly, same function signature!
1293
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1294
+ nm::math::clapack_getrf<nm::Complex64>,
1295
+ nm::math::clapack_getrf<nm::Complex128>,
1296
+ #endif
1297
+ nm::math::clapack_getrf<nm::Rational32>,
1298
+ nm::math::clapack_getrf<nm::Rational64>,
1299
+ nm::math::clapack_getrf<nm::Rational128>,
1300
+ nm::math::clapack_getrf<nm::RubyObject>
1301
+ };
1302
+
1303
+ int M = FIX2INT(m),
1304
+ N = FIX2INT(n);
1305
+
1306
+ // Allocate the pivot index array, which is of size MIN(M, N).
1307
+ size_t ipiv_size = std::min(M,N);
1308
+ int* ipiv = NM_ALLOCA_N(int, ipiv_size);
1309
+
1310
+ if (!ttable[NM_DTYPE(a)]) {
1311
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1312
+ } else {
1313
+ // Call either our version of getrf or the LAPACK version.
1314
+ ttable[NM_DTYPE(a)](blas_order_sym(order), M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv);
1315
+ }
1316
+
1317
+ // Result will be stored in a. We return ipiv as an array.
1318
+ VALUE ipiv_array = rb_ary_new2(ipiv_size);
1319
+ for (size_t i = 0; i < ipiv_size; ++i) {
1320
+ rb_ary_store(ipiv_array, i, INT2FIX(ipiv[i]));
1321
+ }
1322
+
1323
+ return ipiv_array;
1324
+ }*/
1325
+
1326
+
1327
+ /* Call any of the clapack_xpotrf functions as directly as possible.
1328
+ *
1329
+ * You probably don't want to call this function. Instead, why don't you try clapack_potrf, which is more flexible
1330
+ * with its arguments?
1331
+ *
1332
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1333
+ * handling, so you can easily crash Ruby!
1334
+ *
1335
+ * Returns an array giving the pivot indices (normally these are argument #5).
1336
+ */
1337
+ /*
1338
+ static VALUE nm_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
1339
+ #ifndef HAVE_CLAPACK_H
1340
+ rb_raise(rb_eNotImpError, "potrf currently requires CLAPACK");
1341
+ #endif
1342
+
1343
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
1344
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1345
+ nm::math::clapack_potrf<float>,
1346
+ nm::math::clapack_potrf<double>,
1347
+ #ifdef HAVE_CLAPACK_H
1348
+ clapack_cpotrf, clapack_zpotrf, // call directly, same function signature!
1349
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1350
+ nm::math::clapack_potrf<nm::Complex64>,
1351
+ nm::math::clapack_potrf<nm::Complex128>,
1352
+ #endif
1353
+ NULL, NULL, NULL, NULL*/ /*
1354
+ nm::math::clapack_potrf<nm::Rational32>,
1355
+ nm::math::clapack_potrf<nm::Rational64>,
1356
+ nm::math::clapack_potrf<nm::Rational128>,
1357
+ nm::math::clapack_potrf<nm::RubyObject> */
1358
+ /*};
1359
+
1360
+ if (!ttable[NM_DTYPE(a)]) {
1361
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1362
+ // FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
1363
+ //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1364
+ } else {
1365
+ // Call either our version of potrf or the LAPACK version.
1366
+ ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
1367
+ }
1368
+
1369
+ return a;
1370
+ }*/
1371
+
1372
+
1373
+ /*
1374
+ * Call any of the clapack_xgetrs functions as directly as possible.
1375
+ */
1376
+ /*
1377
+ static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb) {
1378
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE Trans, const int N,
1379
+ const int NRHS, const void* A, const int lda, const int* ipiv, void* B,
1380
+ const int ldb) = {
1381
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1382
+ nm::math::clapack_getrs<float>,
1383
+ nm::math::clapack_getrs<double>,
1384
+ #ifdef HAVE_CLAPACK_H
1385
+ clapack_cgetrs, clapack_zgetrs, // call directly, same function signature!
1386
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1387
+ nm::math::clapack_getrs<nm::Complex64>,
1388
+ nm::math::clapack_getrs<nm::Complex128>,
1389
+ #endif
1390
+ nm::math::clapack_getrs<nm::Rational32>,
1391
+ nm::math::clapack_getrs<nm::Rational64>,
1392
+ nm::math::clapack_getrs<nm::Rational128>,
1393
+ nm::math::clapack_getrs<nm::RubyObject>
1394
+ };
1395
+
1396
+ // Allocate the C version of the pivot index array
1397
+ // TODO: Allow for an NVector here also, maybe?
1398
+ int* ipiv_;
1399
+ if (TYPE(ipiv) != T_ARRAY) {
1400
+ rb_raise(rb_eArgError, "ipiv must be of type Array");
1401
+ } else {
1402
+ ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
1403
+ for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
1404
+ ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
1405
+ }
1406
+ }
1407
+
1408
+ if (!ttable[NM_DTYPE(a)]) {
1409
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1410
+ } else {
1411
+
1412
+ // Call either our version of getrs or the LAPACK version.
1413
+ ttable[NM_DTYPE(a)](blas_order_sym(order), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
1414
+ ipiv_, NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
1415
+ }
1416
+
1417
+ // b is both returned and modified directly in the argument list.
1418
+ return b;
1419
+ }*/
1420
+
1421
+
1422
+ /*
1423
+ * Call any of the clapack_xpotrs functions as directly as possible.
1424
+ */
1425
+ /*
1426
+ static VALUE nm_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb) {
1427
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
1428
+ const int NRHS, const void* A, const int lda, void* B, const int ldb) = {
1429
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1430
+ nm::math::clapack_potrs<float,false>,
1431
+ nm::math::clapack_potrs<double,false>,
1432
+ #ifdef HAVE_CLAPACK_H
1433
+ clapack_cpotrs, clapack_zpotrs, // call directly, same function signature!
1434
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1435
+ nm::math::clapack_potrs<nm::Complex64,true>,
1436
+ nm::math::clapack_potrs<nm::Complex128,true>,
1437
+ #endif
1438
+ nm::math::clapack_potrs<nm::Rational32,false>,
1439
+ nm::math::clapack_potrs<nm::Rational64,false>,
1440
+ nm::math::clapack_potrs<nm::Rational128,false>,
1441
+ nm::math::clapack_potrs<nm::RubyObject,false>
1442
+ };
1443
+
1444
+
1445
+ if (!ttable[NM_DTYPE(a)]) {
1446
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1447
+ } else {
1448
+
1449
+ // Call either our version of potrs or the LAPACK version.
1450
+ ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
1451
+ NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
1452
+ }
1453
+
1454
+ // b is both returned and modified directly in the argument list.
1455
+ return b;
1456
+ }*/
1457
+
1458
+
1459
+ /* Call any of the clapack_xgetri functions as directly as possible.
1460
+ *
1461
+ * You probably don't want to call this function. Instead, why don't you try clapack_getri, which is more flexible
1462
+ * with its arguments?
1463
+ *
1464
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1465
+ * handling, so you can easily crash Ruby!
1466
+ *
1467
+ * Returns an array giving the pivot indices (normally these are argument #5).
1468
+ */
1469
+ /*
1470
+ static VALUE nm_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv) {
1471
+ #ifndef HAVE_CLAPACK_H
1472
+ rb_raise(rb_eNotImpError, "getri currently requires CLAPACK");
1473
+ #endif
1474
+
1475
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int n, void* a, const int lda, const int* ipiv) = {
1476
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1477
+ nm::math::clapack_getri<float>,
1478
+ nm::math::clapack_getri<double>,
1479
+ #ifdef HAVE_CLAPACK_H
1480
+ clapack_cgetri, clapack_zgetri, // call directly, same function signature!
1481
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1482
+ nm::math::clapack_getri<nm::Complex64>,
1483
+ nm::math::clapack_getri<nm::Complex128>,
1484
+ #endif
1485
+ NULL, NULL, NULL, NULL*/ /*
1486
+ nm::math::clapack_getri<nm::Rational32>,
1487
+ nm::math::clapack_getri<nm::Rational64>,
1488
+ nm::math::clapack_getri<nm::Rational128>,
1489
+ nm::math::clapack_getri<nm::RubyObject> */
1490
+ /* };
1491
+
1492
+ // Allocate the C version of the pivot index array
1493
+ // TODO: Allow for an NVector here also, maybe?
1494
+ int* ipiv_;
1495
+ if (TYPE(ipiv) != T_ARRAY) {
1496
+ rb_raise(rb_eArgError, "ipiv must be of type Array");
1497
+ } else {
1498
+ ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
1499
+ for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
1500
+ ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
1501
+ }
1502
+ }
1503
+
1504
+ if (!ttable[NM_DTYPE(a)]) {
1505
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1506
+ // FIXME: Once non-BLAS dtypes are implemented, replace error above with the error below.
1507
+ //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1508
+ } else {
1509
+ // Call either our version of getri or the LAPACK version.
1510
+ ttable[NM_DTYPE(a)](blas_order_sym(order), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv_);
1511
+ }
1512
+
1513
+ return a;
1514
+ }*/
1515
+
1516
+
1517
+ /* Call any of the clapack_xpotri functions as directly as possible.
1518
+ *
1519
+ * You probably don't want to call this function. Instead, why don't you try clapack_potri, which is more flexible
1520
+ * with its arguments?
1521
+ *
1522
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1523
+ * handling, so you can easily crash Ruby!
1524
+ *
1525
+ * Returns an array giving the pivot indices (normally these are argument #5).
1526
+ */
1527
+ /*
1528
+ static VALUE nm_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
1529
+ #ifndef HAVE_CLAPACK_H
1530
+ rb_raise(rb_eNotImpError, "getri currently requires CLAPACK");
1531
+ #endif
1532
+
1533
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
1534
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1535
+ nm::math::clapack_potri<float>,
1536
+ nm::math::clapack_potri<double>,
1537
+ #ifdef HAVE_CLAPACK_H
1538
+ clapack_cpotri, clapack_zpotri, // call directly, same function signature!
1539
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1540
+ nm::math::clapack_potri<nm::Complex64>,
1541
+ nm::math::clapack_potri<nm::Complex128>,
1542
+ #endif
1543
+ NULL, NULL, NULL, NULL*/ /*
1544
+ nm::math::clapack_getri<nm::Rational32>,
1545
+ nm::math::clapack_getri<nm::Rational64>,
1546
+ nm::math::clapack_getri<nm::Rational128>,
1547
+ nm::math::clapack_getri<nm::RubyObject> */
1548
+ /*};
1549
+
1550
+ if (!ttable[NM_DTYPE(a)]) {
1551
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1552
+ // FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
1553
+ //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1554
+ } else {
1555
+ // Call either our version of getri or the LAPACK version.
1556
+ ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
1557
+ }
1558
+
1559
+ return a;
1560
+ }*/
1561
+
1562
+
1563
+ /*
1564
+ * Call any of the clapack_xlaswp functions as directly as possible.
1565
+ *
1566
+ * Note that LAPACK's xlaswp functions accept a column-order matrix, but NMatrix uses row-order. Thus, n should be the
1567
+ * number of rows and lda should be the number of columns, no matter what it says in the documentation for dlaswp.f.
1568
+ */
1569
+ /*
1570
+ static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx) {
1571
+ static void (*ttable[nm::NUM_DTYPES])(const int n, void* a, const int lda, const int k1, const int k2, const int* ipiv, const int incx) = {
1572
+ nm::math::clapack_laswp<uint8_t>,
1573
+ nm::math::clapack_laswp<int8_t>,
1574
+ nm::math::clapack_laswp<int16_t>,
1575
+ nm::math::clapack_laswp<int32_t>,
1576
+ nm::math::clapack_laswp<int64_t>,
1577
+ nm::math::clapack_laswp<float>,
1578
+ nm::math::clapack_laswp<double>,
1579
+ //#ifdef HAVE_CLAPACK_H // laswp doesn't actually exist in clapack.h!
1580
+ // clapack_claswp, clapack_zlaswp, // call directly, same function signature!
1581
+ //#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1582
+ nm::math::clapack_laswp<nm::Complex64>,
1583
+ nm::math::clapack_laswp<nm::Complex128>,
1584
+ //#endif
1585
+ nm::math::clapack_laswp<nm::Rational32>,
1586
+ nm::math::clapack_laswp<nm::Rational64>,
1587
+ nm::math::clapack_laswp<nm::Rational128>,
1588
+ nm::math::clapack_laswp<nm::RubyObject>
1589
+ };
1590
+
1591
+ // Allocate the C version of the pivot index array
1592
+ // TODO: Allow for an NVector here also, maybe?
1593
+ int* ipiv_;
1594
+ if (TYPE(ipiv) != T_ARRAY) {
1595
+ rb_raise(rb_eArgError, "ipiv must be of type Array");
1596
+ } else {
1597
+ ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
1598
+ for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
1599
+ ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
1600
+ }
1601
+ }
1602
+
1603
+ // Call either our version of laswp or the LAPACK version.
1604
+ ttable[NM_DTYPE(a)](FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), FIX2INT(k1), FIX2INT(k2), ipiv_, FIX2INT(incx));
1605
+
1606
+ // a is both returned and modified directly in the argument list.
1607
+ return a;
1608
+ }*/
1609
+
1610
+
1611
+ /*
1612
+ * C accessor for calculating an exact determinant.
1613
+ */
1614
+ /*
1615
+ void nm_math_det_exact(const int M, const void* elements, const int lda, nm::dtype_t dtype, void* result) {
1616
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::det_exact, void, const int M, const void* A_elements, const int lda, void* result_arg);
1617
+
1618
+ ttable[dtype](M, elements, lda, result);
1619
+ }*/
1620
+
1621
+
1622
+ /*
1623
+ * Transpose an array of elements that represent a row-major dense matrix. Does not allocate anything, only does an memcpy.
1624
+ */
1625
+ /*
1626
+ void nm_math_transpose_generic(const size_t M, const size_t N, const void* A, const int lda, void* B, const int ldb, size_t element_size) {
1627
+ for (size_t i = 0; i < N; ++i) {
1628
+ for (size_t j = 0; j < M; ++j) {
1629
+
1630
+ memcpy(reinterpret_cast<char*>(B) + (i*ldb+j)*element_size,
1631
+ reinterpret_cast<const char*>(A) + (j*lda+i)*element_size,
1632
+ element_size);
1633
+
1634
+ }
1635
+ }
1636
+ }*/
1637
+
1638
+
1639
+ } // end of extern "C" block