nmatrix-atlas 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/ext/nmatrix/data/complex.h +364 -0
  3. data/ext/nmatrix/data/data.h +638 -0
  4. data/ext/nmatrix/data/meta.h +64 -0
  5. data/ext/nmatrix/data/ruby_object.h +389 -0
  6. data/ext/nmatrix/math/asum.h +120 -0
  7. data/ext/nmatrix/math/cblas_enums.h +36 -0
  8. data/ext/nmatrix/math/cblas_templates_core.h +507 -0
  9. data/ext/nmatrix/math/gemm.h +241 -0
  10. data/ext/nmatrix/math/gemv.h +178 -0
  11. data/ext/nmatrix/math/getrf.h +255 -0
  12. data/ext/nmatrix/math/getrs.h +121 -0
  13. data/ext/nmatrix/math/imax.h +79 -0
  14. data/ext/nmatrix/math/laswp.h +165 -0
  15. data/ext/nmatrix/math/long_dtype.h +49 -0
  16. data/ext/nmatrix/math/math.h +744 -0
  17. data/ext/nmatrix/math/nrm2.h +160 -0
  18. data/ext/nmatrix/math/rot.h +117 -0
  19. data/ext/nmatrix/math/rotg.h +106 -0
  20. data/ext/nmatrix/math/scal.h +71 -0
  21. data/ext/nmatrix/math/trsm.h +332 -0
  22. data/ext/nmatrix/math/util.h +148 -0
  23. data/ext/nmatrix/nm_memory.h +60 -0
  24. data/ext/nmatrix/nmatrix.h +408 -0
  25. data/ext/nmatrix/ruby_constants.h +106 -0
  26. data/ext/nmatrix/storage/common.h +176 -0
  27. data/ext/nmatrix/storage/dense/dense.h +128 -0
  28. data/ext/nmatrix/storage/list/list.h +137 -0
  29. data/ext/nmatrix/storage/storage.h +98 -0
  30. data/ext/nmatrix/storage/yale/class.h +1139 -0
  31. data/ext/nmatrix/storage/yale/iterators/base.h +142 -0
  32. data/ext/nmatrix/storage/yale/iterators/iterator.h +130 -0
  33. data/ext/nmatrix/storage/yale/iterators/row.h +449 -0
  34. data/ext/nmatrix/storage/yale/iterators/row_stored.h +139 -0
  35. data/ext/nmatrix/storage/yale/iterators/row_stored_nd.h +168 -0
  36. data/ext/nmatrix/storage/yale/iterators/stored_diagonal.h +123 -0
  37. data/ext/nmatrix/storage/yale/math/transpose.h +110 -0
  38. data/ext/nmatrix/storage/yale/yale.h +202 -0
  39. data/ext/nmatrix/types.h +54 -0
  40. data/ext/nmatrix/util/io.h +115 -0
  41. data/ext/nmatrix/util/sl_list.h +143 -0
  42. data/ext/nmatrix/util/util.h +78 -0
  43. data/ext/nmatrix_atlas/extconf.rb +250 -0
  44. data/ext/nmatrix_atlas/math_atlas.cpp +1206 -0
  45. data/ext/nmatrix_atlas/math_atlas/cblas_templates_atlas.h +72 -0
  46. data/ext/nmatrix_atlas/math_atlas/clapack_templates.h +332 -0
  47. data/ext/nmatrix_atlas/math_atlas/geev.h +82 -0
  48. data/ext/nmatrix_atlas/math_atlas/gesdd.h +83 -0
  49. data/ext/nmatrix_atlas/math_atlas/gesvd.h +81 -0
  50. data/ext/nmatrix_atlas/math_atlas/inc.h +47 -0
  51. data/ext/nmatrix_atlas/nmatrix_atlas.cpp +44 -0
  52. data/lib/nmatrix/atlas.rb +213 -0
  53. data/lib/nmatrix/lapack_ext_common.rb +69 -0
  54. data/spec/00_nmatrix_spec.rb +730 -0
  55. data/spec/01_enum_spec.rb +190 -0
  56. data/spec/02_slice_spec.rb +389 -0
  57. data/spec/03_nmatrix_monkeys_spec.rb +78 -0
  58. data/spec/2x2_dense_double.mat +0 -0
  59. data/spec/4x4_sparse.mat +0 -0
  60. data/spec/4x5_dense.mat +0 -0
  61. data/spec/blas_spec.rb +193 -0
  62. data/spec/elementwise_spec.rb +303 -0
  63. data/spec/homogeneous_spec.rb +99 -0
  64. data/spec/io/fortran_format_spec.rb +88 -0
  65. data/spec/io/harwell_boeing_spec.rb +98 -0
  66. data/spec/io/test.rua +9 -0
  67. data/spec/io_spec.rb +149 -0
  68. data/spec/lapack_core_spec.rb +482 -0
  69. data/spec/leakcheck.rb +16 -0
  70. data/spec/math_spec.rb +730 -0
  71. data/spec/nmatrix_yale_resize_test_associations.yaml +2802 -0
  72. data/spec/nmatrix_yale_spec.rb +286 -0
  73. data/spec/plugins/atlas/atlas_spec.rb +242 -0
  74. data/spec/rspec_monkeys.rb +56 -0
  75. data/spec/rspec_spec.rb +34 -0
  76. data/spec/shortcuts_spec.rb +310 -0
  77. data/spec/slice_set_spec.rb +157 -0
  78. data/spec/spec_helper.rb +140 -0
  79. data/spec/stat_spec.rb +203 -0
  80. data/spec/test.pcd +20 -0
  81. data/spec/utm5940.mtx +83844 -0
  82. metadata +159 -0
@@ -0,0 +1,78 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == util.h
25
+ //
26
+ // Header file for utility functions and data.
27
+
28
+ #ifndef UTIL_H
29
+ #define UTIL_H
30
+
31
+ /*
32
+ * Standard Includes
33
+ */
34
+
35
+ /*
36
+ * Project Includes
37
+ */
38
+
39
+ #include "types.h"
40
+
41
+ /*
42
+ * Macros
43
+ */
44
+
45
+ /*
46
+ * Types
47
+ */
48
+
49
+ /*
50
+ * Data
51
+ */
52
+
53
+ /*
54
+ * Functions
55
+ */
56
+ namespace nm {
57
+ template <typename Type>
58
+ inline Type gcf(Type x, Type y) {
59
+ Type t;
60
+
61
+ if (x < 0) x = -x;
62
+ if (y < 0) y = -y;
63
+
64
+ if (x == 0) return y;
65
+ if (y == 0) return x;
66
+
67
+ while (x > 0) {
68
+ t = x;
69
+ x = y % x;
70
+ y = t;
71
+ }
72
+
73
+ return y;
74
+ }
75
+ } // end of namespace nm
76
+
77
+
78
+ #endif // UTIL_H
@@ -0,0 +1,250 @@
1
+ # = NMatrix
2
+ #
3
+ # A linear algebra library for scientific computation in Ruby.
4
+ # NMatrix is part of SciRuby.
5
+ #
6
+ # NMatrix was originally inspired by and derived from NArray, by
7
+ # Masahiro Tanaka: http://narray.rubyforge.org
8
+ #
9
+ # == Copyright Information
10
+ #
11
+ # SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
12
+ # NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
13
+ #
14
+ # Please see LICENSE.txt for additional copyright notices.
15
+ #
16
+ # == Contributing
17
+ #
18
+ # By contributing source code to SciRuby, you agree to be bound by
19
+ # our Contributor Agreement:
20
+ #
21
+ # * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
22
+ #
23
+ # == extconf.rb
24
+ #
25
+ # This file checks for ATLAS and other necessary headers, and
26
+ # generates a Makefile for compiling NMatrix.
27
+
28
+ require "mkmf"
29
+
30
+
31
+ # Function derived from NArray's extconf.rb.
32
+ def have_type(type, header=nil) #:nodoc:
33
+ printf "checking for %s... ", type
34
+ STDOUT.flush
35
+
36
+ src = <<"SRC"
37
+ #include <ruby.h>
38
+ SRC
39
+
40
+
41
+ src << <<"SRC" unless header.nil?
42
+ #include <#{header}>
43
+ SRC
44
+
45
+ r = try_link(src + <<"SRC")
46
+ int main() { return 0; }
47
+ int t() { #{type} a; return 0; }
48
+ SRC
49
+
50
+ unless r
51
+ print "no\n"
52
+ return false
53
+ end
54
+
55
+ $defs.push(format("-DHAVE_%s", type.upcase))
56
+
57
+ print "yes\n"
58
+
59
+ return true
60
+ end
61
+
62
+ # Function derived from NArray's extconf.rb.
63
+ def create_conf_h(file) #:nodoc:
64
+ print "creating #{file}\n"
65
+ File.open(file, 'w') do |hfile|
66
+ header_guard = file.upcase.sub(/\s|\./, '_')
67
+
68
+ hfile.puts "#ifndef #{header_guard}"
69
+ hfile.puts "#define #{header_guard}"
70
+ hfile.puts
71
+
72
+ # FIXME: Find a better way to do this:
73
+ hfile.puts "#define RUBY_2 1" if RUBY_VERSION >= '2.0'
74
+
75
+ for line in $defs
76
+ line =~ /^-D(.*)/
77
+ hfile.printf "#define %s 1\n", $1
78
+ end
79
+
80
+ hfile.puts
81
+ hfile.puts "#endif"
82
+ end
83
+ end
84
+
85
+ if RUBY_VERSION < '1.9'
86
+ raise(NotImplementedError, "Sorry, you need at least Ruby 1.9!")
87
+ else
88
+ #$INSTALLFILES = [['nmatrix.h', '$(archdir)'], ['nmatrix.hpp', '$(archdir)'], ['nmatrix_config.h', '$(archdir)'], ['nm_memory.h', '$(archdir)']]
89
+ if /cygwin|mingw/ =~ RUBY_PLATFORM
90
+ #$INSTALLFILES << ['libnmatrix.a', '$(archdir)']
91
+ end
92
+ end
93
+
94
+ if /cygwin|mingw/ =~ RUBY_PLATFORM
95
+ CONFIG["DLDFLAGS"] << " --output-lib libnmatrix.a"
96
+ end
97
+
98
+ $DEBUG = true
99
+ #not the right way to add this include directory
100
+ $CFLAGS = ["-Wall -Werror=return-type -I$(srcdir)/../nmatrix",$CFLAGS].join(" ")
101
+ $CXXFLAGS = ["-Wall -Werror=return-type -I$(srcdir)/../nmatrix",$CXXFLAGS].join(" ")
102
+ $CPPFLAGS = ["-Wall -Werror=return-type -I$(srcdir)/../nmatrix",$CPPFLAGS].join(" ")
103
+
104
+ # When adding objects here, make sure their directories are included in CLEANOBJS down at the bottom of extconf.rb.
105
+ # Why not just autogenerate this list from all .c/.cpp files in directory?
106
+ basenames = %w{nmatrix_atlas math_atlas}
107
+ $objs = basenames.map { |b| "#{b}.o" }
108
+ $srcs = basenames.map { |b| "#{b}.cpp" }
109
+
110
+ #CONFIG['CXX'] = 'clang++'
111
+ CONFIG['CXX'] = 'g++'
112
+
113
+ def find_newer_gplusplus #:nodoc:
114
+ print "checking for apparent GNU g++ binary with C++0x/C++11 support... "
115
+ [9,8,7,6,5,4,3].each do |minor|
116
+ ver = "4.#{minor}"
117
+ gpp = "g++-#{ver}"
118
+ result = `which #{gpp}`
119
+ next if result.empty?
120
+ CONFIG['CXX'] = gpp
121
+ puts ver
122
+ return CONFIG['CXX']
123
+ end
124
+ false
125
+ end
126
+
127
+ def gplusplus_version
128
+ cxxvar = proc { |n| `#{CONFIG['CXX']} -E -dM - </dev/null | grep #{n}`.chomp.split(' ')[2] }
129
+ major = cxxvar.call('__GNUC__')
130
+ minor = cxxvar.call('__GNUC_MINOR__')
131
+ patch = cxxvar.call('__GNUC_PATCHLEVEL__')
132
+
133
+ raise("unable to determine g++ version (match to get version was nil)") if major.nil? || minor.nil? || patch.nil?
134
+
135
+ "#{major}.#{minor}.#{patch}"
136
+ end
137
+
138
+
139
+ if CONFIG['CXX'] == 'clang++'
140
+ $CPP_STANDARD = 'c++11'
141
+
142
+ else
143
+ version = gplusplus_version
144
+ if version < '4.3.0' && CONFIG['CXX'] == 'g++' # see if we can find a newer G++, unless it's been overridden by user
145
+ if !find_newer_gplusplus
146
+ raise("You need a version of g++ which supports -std=c++0x or -std=c++11. If you're on a Mac and using Homebrew, we recommend using mac-brew-gcc.sh to install a more recent g++.")
147
+ end
148
+ version = gplusplus_version
149
+ end
150
+
151
+ if version < '4.7.0'
152
+ $CPP_STANDARD = 'c++0x'
153
+ else
154
+ $CPP_STANDARD = 'c++11'
155
+ end
156
+ puts "using C++ standard... #{$CPP_STANDARD}"
157
+ puts "g++ reports version... " + `#{CONFIG['CXX']} --version|head -n 1|cut -f 3 -d " "`
158
+ end
159
+
160
+ # add smmp in to get generic transp; remove smmp2 to eliminate funcptr transp
161
+
162
+ # The next line allows the user to supply --with-atlas-dir=/usr/local/atlas,
163
+ # --with-atlas-lib or --with-atlas-include and tell the compiler where to look
164
+ # for ATLAS. The same for all the others
165
+ #
166
+ #dir_config("clapack", ["/usr/local/atlas/include"], [])
167
+ #
168
+ #
169
+
170
+ # Is g++ having trouble finding your header files?
171
+ # Try this:
172
+ # export C_INCLUDE_PATH=/usr/local/atlas/include
173
+ # export CPLUS_INCLUDE_PATH=/usr/local/atlas/include
174
+ # (substituting in the path of your cblas.h and clapack.h for the path I used). -- JW 8/27/12
175
+
176
+ idefaults = {lapack: ["/usr/include/atlas"],
177
+ cblas: ["/usr/local/atlas/include", "/usr/include/atlas"],
178
+ atlas: ["/usr/local/atlas/include", "/usr/include/atlas"]}
179
+
180
+ # For some reason, if we try to look for /usr/lib64/atlas on a Mac OS X Mavericks system, and the directory does not
181
+ # exist, it will give a linker error -- even if the lib dir is already correctly included with -L. So we need to check
182
+ # that Dir.exists?(d) for each.
183
+ ldefaults = {lapack: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) },
184
+ cblas: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) },
185
+ atlas: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) }}
186
+
187
+ if have_library("clapack") # Usually only applies for Mac OS X
188
+ $libs += " -lclapack "
189
+ end
190
+
191
+ unless have_library("lapack")
192
+ dir_config("lapack", idefaults[:lapack], ldefaults[:lapack])
193
+ end
194
+
195
+ unless have_library("cblas")
196
+ dir_config("cblas", idefaults[:cblas], ldefaults[:cblas])
197
+ end
198
+
199
+ unless have_library("atlas")
200
+ dir_config("atlas", idefaults[:atlas], ldefaults[:atlas])
201
+ end
202
+
203
+ # If BLAS and LAPACK headers are in an atlas directory, prefer those. Otherwise,
204
+ # we try our luck with the default location.
205
+ if have_header("atlas/cblas.h")
206
+ have_header("atlas/clapack.h")
207
+ else
208
+ have_header("cblas.h")
209
+ have_header("clapack.h")
210
+ end
211
+
212
+
213
+ # Although have_func is supposed to take a list as its second argument, I find that it simply
214
+ # applies a :to_s to the second arg and doesn't actually check each one. We may want to put
215
+ # have_func calls inside an :each block which checks atlas/clapack.h, cblas.h, clapack.h, and
216
+ # lastly lapack.h. On Ubuntu, it only works if I use atlas/clapack.h. --@mohawkjohn 8/20/14
217
+ have_func("clapack_dgetrf", "atlas/clapack.h")
218
+ have_func("clapack_dgetri", "atlas/clapack.h")
219
+ have_func("dgesvd_", "clapack.h") # This may not do anything. dgesvd_ seems to be in LAPACK, not CLAPACK.
220
+
221
+ have_func("cblas_dgemm", "cblas.h")
222
+
223
+ #have_func("rb_scan_args", "ruby.h")
224
+
225
+ #find_library("lapack", "clapack_dgetrf")
226
+ #find_library("cblas", "cblas_dgemm")
227
+ #find_library("atlas", "ATL_dgemmNN")
228
+ # Order matters here: ATLAS has to go after LAPACK: http://mail.scipy.org/pipermail/scipy-user/2007-January/010717.html
229
+ $libs += " -llapack -lcblas -latlas "
230
+ #$libs += " -lprofiler "
231
+
232
+
233
+ # For release, these next two should both be changed to -O3.
234
+ $CFLAGS += " -O3" #" -O0 -g "
235
+ #$CFLAGS += " -static -O0 -g "
236
+ $CPPFLAGS += " -O3 -std=#{$CPP_STANDARD}" #" -O0 -g -std=#{$CPP_STANDARD} " #-fmax-errors=10 -save-temps
237
+ #$CPPFLAGS += " -static -O0 -g -std=#{$CPP_STANDARD} "
238
+
239
+ CONFIG['warnflags'].gsub!('-Wshorten-64-to-32', '') # doesn't work except in Mac-patched gcc (4.2)
240
+ CONFIG['warnflags'].gsub!('-Wdeclaration-after-statement', '')
241
+ CONFIG['warnflags'].gsub!('-Wimplicit-function-declaration', '')
242
+
243
+ create_conf_h("nmatrix_atlas_config.h")
244
+ create_makefile("nmatrix_atlas")
245
+
246
+ # to clean up object files in subdirectories:
247
+ open('Makefile', 'a') do |f|
248
+ clean_objs_paths = %w{ }.map { |d| "#{d}/*.#{CONFIG["OBJEXT"]}" }
249
+ f.write("CLEANOBJS := $(CLEANOBJS) #{clean_objs_paths.join(' ')}")
250
+ end
@@ -0,0 +1,1206 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == math_atlas.cpp
25
+ //
26
+ // Ruby-exposed CBLAS and LAPACK functions that call ATLAS
27
+ // functions.
28
+ //
29
+
30
+ /*
31
+ * Project Includes
32
+ */
33
+
34
+ #include "data/data.h"
35
+
36
+ #include "math_atlas/inc.h"
37
+
38
+ #include "math/util.h"
39
+
40
+ //BLAS
41
+ #include "math_atlas/cblas_templates_atlas.h"
42
+
43
+ //LAPACK
44
+ #include "math/laswp.h"
45
+ #include "math_atlas/clapack_templates.h"
46
+
47
+ #include "math_atlas/gesvd.h"
48
+ #include "math_atlas/gesdd.h"
49
+ #include "math_atlas/geev.h"
50
+
51
+
52
+ /*
53
+ * Forward Declarations
54
+ */
55
+
56
+ extern "C" {
57
+ /* BLAS Level 1. */
58
+ static VALUE nm_atlas_cblas_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx);
59
+ static VALUE nm_atlas_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx);
60
+ static VALUE nm_atlas_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx);
61
+ static VALUE nm_atlas_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s);
62
+ static VALUE nm_atlas_cblas_rotg(VALUE self, VALUE ab);
63
+ static VALUE nm_atlas_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx);
64
+
65
+ /* BLAS Level 2. */
66
+ static VALUE nm_atlas_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
67
+ VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);
68
+
69
+ /* BLAS Level 3. */
70
+ static VALUE nm_atlas_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha,
71
+ VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc);
72
+ static VALUE nm_atlas_cblas_trsm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
73
+ VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
74
+ static VALUE nm_atlas_cblas_trmm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
75
+ VALUE alpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
76
+ static VALUE nm_atlas_cblas_herk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
77
+ VALUE lda, VALUE beta, VALUE c, VALUE ldc);
78
+ static VALUE nm_atlas_cblas_syrk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
79
+ VALUE lda, VALUE beta, VALUE c, VALUE ldc);
80
+
81
+ /* LAPACK. */
82
+ static VALUE nm_atlas_has_clapack(VALUE self);
83
+ static VALUE nm_atlas_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda);
84
+ static VALUE nm_atlas_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
85
+ static VALUE nm_atlas_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb);
86
+ static VALUE nm_atlas_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb);
87
+ static VALUE nm_atlas_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv);
88
+ static VALUE nm_atlas_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
89
+ static VALUE nm_atlas_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx);
90
+
91
+ static VALUE nm_atlas_lapack_gesvd(VALUE self, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lworkspace_size);
92
+ static VALUE nm_atlas_lapack_gesdd(VALUE self, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lworkspace_size);
93
+ static VALUE nm_atlas_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr, VALUE lwork);
94
+ }
95
+
96
+ ////////////////////
97
+ // Math Functions //
98
+ ////////////////////
99
+
100
+ namespace nm {
101
+ namespace math {
102
+ namespace atlas {
103
+
104
+ /*
105
+ * Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
106
+ */
107
+ template <typename DType, typename CType>
108
+ inline static int lapack_gesvd(char jobu, char jobvt, int m, int n, void* a, int lda, void* s, void* u, int ldu, void* vt, int ldvt, void* work, int lwork, void* rwork) {
109
+ return gesvd<DType,CType>(jobu, jobvt, m, n, reinterpret_cast<DType*>(a), lda, reinterpret_cast<CType*>(s), reinterpret_cast<DType*>(u), ldu, reinterpret_cast<DType*>(vt), ldvt, reinterpret_cast<DType*>(work), lwork, reinterpret_cast<CType*>(rwork));
110
+ }
111
+
112
+ /*
113
+ * Function signature conversion for calling CBLAS' gesdd functions as directly as possible.
114
+ */
115
+ template <typename DType, typename CType>
116
+ inline static int lapack_gesdd(char jobz, int m, int n, void* a, int lda, void* s, void* u, int ldu, void* vt, int ldvt, void* work, int lwork, int* iwork, void* rwork) {
117
+ return gesdd<DType,CType>(jobz, m, n, reinterpret_cast<DType*>(a), lda, reinterpret_cast<CType*>(s), reinterpret_cast<DType*>(u), ldu, reinterpret_cast<DType*>(vt), ldvt, reinterpret_cast<DType*>(work), lwork, iwork, reinterpret_cast<CType*>(rwork));
118
+ }
119
+
120
+
121
+ }
122
+ }
123
+ }
124
+
125
+ extern "C" {
126
+
127
+ ///////////////////
128
+ // Ruby Bindings //
129
+ ///////////////////
130
+
131
+ void nm_math_init_atlas() {
132
+ VALUE cNMatrix_ATLAS = rb_define_module_under(cNMatrix, "ATLAS");
133
+
134
+ rb_define_singleton_method(cNMatrix, "has_clapack?", (METHOD)nm_atlas_has_clapack, 0);
135
+
136
+ VALUE cNMatrix_ATLAS_LAPACK = rb_define_module_under(cNMatrix_ATLAS, "LAPACK");
137
+
138
+ /* ATLAS-CLAPACK Functions */
139
+ rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "clapack_getrf", (METHOD)nm_atlas_clapack_getrf, 5);
140
+ rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "clapack_potrf", (METHOD)nm_atlas_clapack_potrf, 5);
141
+ rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "clapack_getrs", (METHOD)nm_atlas_clapack_getrs, 9);
142
+ rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "clapack_potrs", (METHOD)nm_atlas_clapack_potrs, 8);
143
+ rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "clapack_getri", (METHOD)nm_atlas_clapack_getri, 5);
144
+ rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "clapack_potri", (METHOD)nm_atlas_clapack_potri, 5);
145
+ rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "clapack_laswp", (METHOD)nm_atlas_clapack_laswp, 7);
146
+
147
+ /* Non-ATLAS regular LAPACK Functions called via Fortran interface */
148
+ rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "lapack_gesvd", (METHOD)nm_atlas_lapack_gesvd, 12);
149
+ rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "lapack_gesdd", (METHOD)nm_atlas_lapack_gesdd, 11);
150
+ rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "lapack_geev", (METHOD)nm_atlas_lapack_geev, 12);
151
+
152
+ VALUE cNMatrix_ATLAS_BLAS = rb_define_module_under(cNMatrix_ATLAS, "BLAS");
153
+
154
+ //BLAS Level 1
155
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_scal", (METHOD)nm_atlas_cblas_scal, 4);
156
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_nrm2", (METHOD)nm_atlas_cblas_nrm2, 3);
157
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_asum", (METHOD)nm_atlas_cblas_asum, 3);
158
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_rot", (METHOD)nm_atlas_cblas_rot, 7);
159
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_rotg", (METHOD)nm_atlas_cblas_rotg, 1);
160
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_imax", (METHOD)nm_atlas_cblas_imax, 3);
161
+
162
+ //BLAS Level 2
163
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_gemv", (METHOD)nm_atlas_cblas_gemv, 11);
164
+
165
+ //BLAS Level 3
166
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_gemm", (METHOD)nm_atlas_cblas_gemm, 14);
167
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_trsm", (METHOD)nm_atlas_cblas_trsm, 12);
168
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_trmm", (METHOD)nm_atlas_cblas_trmm, 12);
169
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_syrk", (METHOD)nm_atlas_cblas_syrk, 11);
170
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_herk", (METHOD)nm_atlas_cblas_herk, 11);
171
+
172
+ }
173
+
174
+ /*
175
+ * Simple way to check from within Ruby code if clapack functions are available, without
176
+ * having to wait around for an exception to be thrown.
177
+ */
178
+ static VALUE nm_atlas_has_clapack(VALUE self) {
179
+ #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
180
+ return Qtrue;
181
+ #else
182
+ return Qfalse;
183
+ #endif
184
+ }
185
+
186
+ /*
187
+ * call-seq:
188
+ * NMatrix::BLAS.cblas_scal(n, alpha, vector, inc) -> NMatrix
189
+ *
190
+ * BLAS level 1 function +scal+. Works with all dtypes.
191
+ *
192
+ * Scale +vector+ in-place by +alpha+ and also return it. The operation is as
193
+ * follows:
194
+ * x <- alpha * x
195
+ *
196
+ * - +n+ -> Number of elements of +vector+.
197
+ * - +alpha+ -> Scalar value used in the operation.
198
+ * - +vector+ -> NMatrix of shape [n,1] or [1,n]. Modified in-place.
199
+ * - +inc+ -> Increment used in the scaling function. Should generally be 1.
200
+ */
201
+ static VALUE nm_atlas_cblas_scal(VALUE self, VALUE n, VALUE alpha, VALUE vector, VALUE incx) {
202
+ nm::dtype_t dtype = NM_DTYPE(vector);
203
+
204
+ void* scalar = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
205
+ rubyval_to_cval(alpha, dtype, scalar);
206
+
207
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::atlas::cblas_scal, void, const int n,
208
+ const void* scalar, void* x, const int incx);
209
+
210
+ ttable[dtype](FIX2INT(n), scalar, NM_STORAGE_DENSE(vector)->elements,
211
+ FIX2INT(incx));
212
+
213
+ return vector;
214
+ }
215
+
216
+ /*
217
+ * Call any of the cblas_xrotg functions as directly as possible.
218
+ *
219
+ * xROTG computes the elements of a Givens plane rotation matrix such that:
220
+ *
221
+ * | c s | | a | | r |
222
+ * | -s c | * | b | = | 0 |
223
+ *
224
+ * where r = +- sqrt( a**2 + b**2 ) and c**2 + s**2 = 1.
225
+ *
226
+ * The Givens plane rotation can be used to introduce zero elements into a matrix selectively.
227
+ *
228
+ * This function differs from most of the other raw BLAS accessors. Instead of
229
+ * providing a, b, c, s as arguments, you should only provide a and b (the
230
+ * inputs), and you should provide them as the first two elements of any dense
231
+ * NMatrix type.
232
+ *
233
+ * The outputs [c,s] will be returned in a Ruby Array at the end; the input
234
+ * NMatrix will also be modified in-place.
235
+ *
236
+ * This function, like the other cblas_ functions, does minimal type-checking.
237
+ */
238
+ static VALUE nm_atlas_cblas_rotg(VALUE self, VALUE ab) {
239
+ static void (*ttable[nm::NUM_DTYPES])(void* a, void* b, void* c, void* s) = {
240
+ NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
241
+ nm::math::atlas::cblas_rotg<float>,
242
+ nm::math::atlas::cblas_rotg<double>,
243
+ nm::math::atlas::cblas_rotg<nm::Complex64>,
244
+ nm::math::atlas::cblas_rotg<nm::Complex128>,
245
+ NULL //nm::math::atlas::cblas_rotg<nm::RubyObject>
246
+ };
247
+
248
+ nm::dtype_t dtype = NM_DTYPE(ab);
249
+
250
+ if (!ttable[dtype]) {
251
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
252
+ return Qnil;
253
+
254
+ } else {
255
+ NM_CONSERVATIVE(nm_register_value(&self));
256
+ NM_CONSERVATIVE(nm_register_value(&ab));
257
+ void *pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
258
+ *pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
259
+
260
+ // extract A and B from the NVector (first two elements)
261
+ void* pA = NM_STORAGE_DENSE(ab)->elements;
262
+ void* pB = (char*)(NM_STORAGE_DENSE(ab)->elements) + DTYPE_SIZES[dtype];
263
+ // c and s are output
264
+
265
+ ttable[dtype](pA, pB, pC, pS);
266
+
267
+ VALUE result = rb_ary_new2(2);
268
+
269
+ if (dtype == nm::RUBYOBJ) {
270
+ rb_ary_store(result, 0, *reinterpret_cast<VALUE*>(pC));
271
+ rb_ary_store(result, 1, *reinterpret_cast<VALUE*>(pS));
272
+ } else {
273
+ rb_ary_store(result, 0, rubyobj_from_cval(pC, dtype).rval);
274
+ rb_ary_store(result, 1, rubyobj_from_cval(pS, dtype).rval);
275
+ }
276
+ NM_CONSERVATIVE(nm_unregister_value(&ab));
277
+ NM_CONSERVATIVE(nm_unregister_value(&self));
278
+ return result;
279
+ }
280
+ }
281
+
282
+
283
+ /*
284
+ * Call any of the cblas_xrot functions as directly as possible.
285
+ *
286
+ * xROT is a BLAS level 1 routine (taking two vectors) which applies a plane rotation.
287
+ *
288
+ * It's tough to find documentation on xROT. Here are what we think the arguments are for:
289
+ * * n :: number of elements to consider in x and y
290
+ * * x :: a vector (expects an NVector)
291
+ * * incx :: stride of x
292
+ * * y :: a vector (expects an NVector)
293
+ * * incy :: stride of y
294
+ * * c :: cosine of the angle of rotation
295
+ * * s :: sine of the angle of rotation
296
+ *
297
+ * Note that c and s will be the same dtype as x and y, except when x and y are complex. If x and y are complex, c and s
298
+ * will be float for Complex64 or double for Complex128.
299
+ *
300
+ * You probably don't want to call this function. Instead, why don't you try rot, which is more flexible
301
+ * with its arguments?
302
+ *
303
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
304
+ * handling, so you can easily crash Ruby!
305
+ */
306
+ static VALUE nm_atlas_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s) {
307
+ static void (*ttable[nm::NUM_DTYPES])(const int N, void*, const int, void*, const int, const void*, const void*) = {
308
+ NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
309
+ nm::math::atlas::cblas_rot<float,float>,
310
+ nm::math::atlas::cblas_rot<double,double>,
311
+ nm::math::atlas::cblas_rot<nm::Complex64,float>,
312
+ nm::math::atlas::cblas_rot<nm::Complex128,double>,
313
+ nm::math::atlas::cblas_rot<nm::RubyObject,nm::RubyObject>
314
+ };
315
+
316
+ nm::dtype_t dtype = NM_DTYPE(x);
317
+
318
+
319
+ if (!ttable[dtype]) {
320
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
321
+ return Qfalse;
322
+ } else {
323
+ void *pC, *pS;
324
+
325
+ // We need to ensure the cosine and sine arguments are the correct dtype -- which may differ from the actual dtype.
326
+ if (dtype == nm::COMPLEX64) {
327
+ pC = NM_ALLOCA_N(float,1);
328
+ pS = NM_ALLOCA_N(float,1);
329
+ rubyval_to_cval(c, nm::FLOAT32, pC);
330
+ rubyval_to_cval(s, nm::FLOAT32, pS);
331
+ } else if (dtype == nm::COMPLEX128) {
332
+ pC = NM_ALLOCA_N(double,1);
333
+ pS = NM_ALLOCA_N(double,1);
334
+ rubyval_to_cval(c, nm::FLOAT64, pC);
335
+ rubyval_to_cval(s, nm::FLOAT64, pS);
336
+ } else {
337
+ pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
338
+ pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
339
+ rubyval_to_cval(c, dtype, pC);
340
+ rubyval_to_cval(s, dtype, pS);
341
+ }
342
+
343
+
344
+ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), NM_STORAGE_DENSE(y)->elements, FIX2INT(incy), pC, pS);
345
+
346
+ return Qtrue;
347
+ }
348
+ }
349
+
350
+
351
+ /*
352
+ * Call any of the cblas_xnrm2 functions as directly as possible.
353
+ *
354
+ * xNRM2 is a BLAS level 1 routine which calculates the 2-norm of an n-vector x.
355
+ *
356
+ * Arguments:
357
+ * * n :: length of x, must be at least 0
358
+ * * x :: pointer to first entry of input vector
359
+ * * incx :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
360
+ *
361
+ * You probably don't want to call this function. Instead, why don't you try nrm2, which is more flexible
362
+ * with its arguments?
363
+ *
364
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
365
+ * handling, so you can easily crash Ruby!
366
+ */
367
+ static VALUE nm_atlas_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx) {
368
+
369
+ static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
370
+ NULL, NULL, NULL, NULL, NULL, // no help for integers
371
+ nm::math::atlas::cblas_nrm2<float32_t,float32_t>,
372
+ nm::math::atlas::cblas_nrm2<float64_t,float64_t>,
373
+ nm::math::atlas::cblas_nrm2<float32_t,nm::Complex64>,
374
+ nm::math::atlas::cblas_nrm2<float64_t,nm::Complex128>,
375
+ nm::math::atlas::cblas_nrm2<nm::RubyObject,nm::RubyObject>
376
+ };
377
+
378
+ nm::dtype_t dtype = NM_DTYPE(x);
379
+
380
+ if (!ttable[dtype]) {
381
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
382
+ return Qnil;
383
+
384
+ } else {
385
+ // Determine the return dtype and allocate it
386
+ nm::dtype_t rdtype = dtype;
387
+ if (dtype == nm::COMPLEX64) rdtype = nm::FLOAT32;
388
+ else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;
389
+
390
+ void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);
391
+
392
+ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
393
+
394
+ return rubyobj_from_cval(Result, rdtype).rval;
395
+ }
396
+ }
397
+
398
+
399
+
400
+ /*
401
+ * Call any of the cblas_xasum functions as directly as possible.
402
+ *
403
+ * xASUM is a BLAS level 1 routine which calculates the sum of absolute values of the entries
404
+ * of a vector x.
405
+ *
406
+ * Arguments:
407
+ * * n :: length of x, must be at least 0
408
+ * * x :: pointer to first entry of input vector
409
+ * * incx :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
410
+ *
411
+ * You probably don't want to call this function. Instead, why don't you try asum, which is more flexible
412
+ * with its arguments?
413
+ *
414
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
415
+ * handling, so you can easily crash Ruby!
416
+ */
417
+ static VALUE nm_atlas_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx) {
418
+
419
+ static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
420
+ nm::math::atlas::cblas_asum<uint8_t,uint8_t>,
421
+ nm::math::atlas::cblas_asum<int8_t,int8_t>,
422
+ nm::math::atlas::cblas_asum<int16_t,int16_t>,
423
+ nm::math::atlas::cblas_asum<int32_t,int32_t>,
424
+ nm::math::atlas::cblas_asum<int64_t,int64_t>,
425
+ nm::math::atlas::cblas_asum<float32_t,float32_t>,
426
+ nm::math::atlas::cblas_asum<float64_t,float64_t>,
427
+ nm::math::atlas::cblas_asum<float32_t,nm::Complex64>,
428
+ nm::math::atlas::cblas_asum<float64_t,nm::Complex128>,
429
+ nm::math::atlas::cblas_asum<nm::RubyObject,nm::RubyObject>
430
+ };
431
+
432
+ nm::dtype_t dtype = NM_DTYPE(x);
433
+
434
+ // Determine the return dtype and allocate it
435
+ nm::dtype_t rdtype = dtype;
436
+ if (dtype == nm::COMPLEX64) rdtype = nm::FLOAT32;
437
+ else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;
438
+
439
+ void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);
440
+
441
+ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
442
+
443
+ return rubyobj_from_cval(Result, rdtype).rval;
444
+ }
445
+
446
+ /*
447
+ * call-seq:
448
+ * NMatrix::BLAS.cblas_imax(n, vector, inc) -> Fixnum
449
+ *
450
+ * BLAS level 1 routine.
451
+ *
452
+ * Return the index of the largest element of +vector+.
453
+ *
454
+ * - +n+ -> Vector's size. Generally, you can use NMatrix#rows or NMatrix#cols.
455
+ * - +vector+ -> A NMatrix of shape [n,1] or [1,n] with any dtype.
456
+ * - +inc+ -> It's the increment used when searching. Use 1 except if you know
457
+ * what you're doing.
458
+ */
459
+ static VALUE nm_atlas_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx) {
460
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::atlas::cblas_imax, int, const int n, const void* x, const int incx);
461
+
462
+ nm::dtype_t dtype = NM_DTYPE(x);
463
+
464
+ int index = ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx));
465
+
466
+ // Convert to Ruby's Int value.
467
+ return INT2FIX(index);
468
+ }
469
+
470
+ /* Call any of the cblas_xgemv functions as directly as possible.
471
+ *
472
+ * The cblas_xgemv functions (dgemv, sgemv, cgemv, and zgemv) define the following operation:
473
+ *
474
+ * y = alpha*op(A)*x + beta*y
475
+ *
476
+ * where op(A) is one of <tt>op(A) = A</tt>, <tt>op(A) = A**T</tt>, or the complex conjugate of A.
477
+ *
478
+ * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
479
+ * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
480
+ * expose the ultra-optimized ATLAS versions.
481
+ *
482
+ * == Arguments
483
+ * See: http://www.netlib.org/blas/dgemm.f
484
+ *
485
+ * You probably don't want to call this function. Instead, why don't you try cblas_gemv, which is more flexible
486
+ * with its arguments?
487
+ *
488
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
489
+ * handling, so you can easily crash Ruby!
490
+ */
491
+ static VALUE nm_atlas_cblas_gemv(VALUE self,
492
+ VALUE trans_a,
493
+ VALUE m, VALUE n,
494
+ VALUE alpha,
495
+ VALUE a, VALUE lda,
496
+ VALUE x, VALUE incx,
497
+ VALUE beta,
498
+ VALUE y, VALUE incy)
499
+ {
500
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::atlas::cblas_gemv, bool, const enum CBLAS_TRANSPOSE, const int, const int, const void*, const void*, const int, const void*, const int, const void*, void*, const int)
501
+
502
+ nm::dtype_t dtype = NM_DTYPE(a);
503
+
504
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
505
+ *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
506
+ rubyval_to_cval(alpha, dtype, pAlpha);
507
+ rubyval_to_cval(beta, dtype, pBeta);
508
+
509
+ return ttable[dtype](blas_transpose_sym(trans_a), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), pBeta, NM_STORAGE_DENSE(y)->elements, FIX2INT(incy)) ? Qtrue : Qfalse;
510
+ }
511
+
512
+ /* Call any of the cblas_xgemm functions as directly as possible.
513
+ *
514
+ * The cblas_xgemm functions (dgemm, sgemm, cgemm, and zgemm) define the following operation:
515
+ *
516
+ * C = alpha*op(A)*op(B) + beta*C
517
+ *
518
+ * where op(X) is one of <tt>op(X) = X</tt>, <tt>op(X) = X**T</tt>, or the complex conjugate of X.
519
+ *
520
+ * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
521
+ * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
522
+ * expose the ultra-optimized ATLAS versions.
523
+ *
524
+ * == Arguments
525
+ * See: http://www.netlib.org/blas/dgemm.f
526
+ *
527
+ * You probably don't want to call this function. Instead, why don't you try gemm, which is more flexible
528
+ * with its arguments?
529
+ *
530
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
531
+ * handling, so you can easily crash Ruby!
532
+ */
533
+ static VALUE nm_atlas_cblas_gemm(VALUE self,
534
+ VALUE order,
535
+ VALUE trans_a, VALUE trans_b,
536
+ VALUE m, VALUE n, VALUE k,
537
+ VALUE alpha,
538
+ VALUE a, VALUE lda,
539
+ VALUE b, VALUE ldb,
540
+ VALUE beta,
541
+ VALUE c, VALUE ldc)
542
+ {
543
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::atlas::cblas_gemm, void, const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, int m, int n, int k, void* alpha, void* a, int lda, void* b, int ldb, void* beta, void* c, int ldc);
544
+
545
+ nm::dtype_t dtype = NM_DTYPE(a);
546
+
547
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
548
+ *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
549
+ rubyval_to_cval(alpha, dtype, pAlpha);
550
+ rubyval_to_cval(beta, dtype, pBeta);
551
+
552
+ ttable[dtype](blas_order_sym(order), blas_transpose_sym(trans_a), blas_transpose_sym(trans_b), FIX2INT(m), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
553
+
554
+ return c;
555
+ }
556
+
557
+
558
+ static VALUE nm_atlas_cblas_trsm(VALUE self,
559
+ VALUE order,
560
+ VALUE side, VALUE uplo,
561
+ VALUE trans_a, VALUE diag,
562
+ VALUE m, VALUE n,
563
+ VALUE alpha,
564
+ VALUE a, VALUE lda,
565
+ VALUE b, VALUE ldb)
566
+ {
567
+ static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_SIDE, const enum CBLAS_UPLO,
568
+ const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
569
+ const int m, const int n, const void* alpha, const void* a,
570
+ const int lda, void* b, const int ldb) = {
571
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
572
+ nm::math::atlas::cblas_trsm<float>,
573
+ nm::math::atlas::cblas_trsm<double>,
574
+ cblas_ctrsm, cblas_ztrsm, // call directly, same function signature!
575
+ nm::math::atlas::cblas_trsm<nm::RubyObject>
576
+ };
577
+
578
+ nm::dtype_t dtype = NM_DTYPE(a);
579
+
580
+ if (!ttable[dtype]) {
581
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
582
+ } else {
583
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
584
+ rubyval_to_cval(alpha, dtype, pAlpha);
585
+
586
+ ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
587
+ }
588
+
589
+ return Qtrue;
590
+ }
591
+
592
+ static VALUE nm_atlas_cblas_trmm(VALUE self,
593
+ VALUE order,
594
+ VALUE side, VALUE uplo,
595
+ VALUE trans_a, VALUE diag,
596
+ VALUE m, VALUE n,
597
+ VALUE alpha,
598
+ VALUE a, VALUE lda,
599
+ VALUE b, VALUE ldb)
600
+ {
601
+ static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER,
602
+ const enum CBLAS_SIDE, const enum CBLAS_UPLO,
603
+ const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
604
+ const int m, const int n, const void* alpha, const void* a,
605
+ const int lda, void* b, const int ldb) = {
606
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
607
+ nm::math::atlas::cblas_trmm<float>,
608
+ nm::math::atlas::cblas_trmm<double>,
609
+ cblas_ctrmm, cblas_ztrmm, // call directly, same function signature!
610
+ NULL
611
+ };
612
+
613
+ nm::dtype_t dtype = NM_DTYPE(a);
614
+
615
+ if (!ttable[dtype]) {
616
+ rb_raise(nm_eDataTypeError, "this matrix operation not yet defined for non-BLAS dtypes");
617
+ } else {
618
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
619
+ rubyval_to_cval(alpha, dtype, pAlpha);
620
+
621
+ ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
622
+ }
623
+
624
+ return b;
625
+ }
626
+
627
+ static VALUE nm_atlas_cblas_syrk(VALUE self,
628
+ VALUE order,
629
+ VALUE uplo,
630
+ VALUE trans,
631
+ VALUE n, VALUE k,
632
+ VALUE alpha,
633
+ VALUE a, VALUE lda,
634
+ VALUE beta,
635
+ VALUE c, VALUE ldc)
636
+ {
637
+ static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE,
638
+ const int n, const int k, const void* alpha, const void* a,
639
+ const int lda, const void* beta, void* c, const int ldc) = {
640
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
641
+ nm::math::atlas::cblas_syrk<float>,
642
+ nm::math::atlas::cblas_syrk<double>,
643
+ cblas_csyrk, cblas_zsyrk, // call directly, same function signature!
644
+ NULL
645
+ };
646
+
647
+ nm::dtype_t dtype = NM_DTYPE(a);
648
+
649
+ if (!ttable[dtype]) {
650
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
651
+ } else {
652
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
653
+ *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
654
+ rubyval_to_cval(alpha, dtype, pAlpha);
655
+ rubyval_to_cval(beta, dtype, pBeta);
656
+
657
+ ttable[dtype](blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
658
+ }
659
+
660
+ return Qtrue;
661
+ }
662
+
663
+ static VALUE nm_atlas_cblas_herk(VALUE self,
664
+ VALUE order,
665
+ VALUE uplo,
666
+ VALUE trans,
667
+ VALUE n, VALUE k,
668
+ VALUE alpha,
669
+ VALUE a, VALUE lda,
670
+ VALUE beta,
671
+ VALUE c, VALUE ldc)
672
+ {
673
+
674
+ nm::dtype_t dtype = NM_DTYPE(a);
675
+
676
+ if (dtype == nm::COMPLEX64) {
677
+ cblas_cherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
678
+ } else if (dtype == nm::COMPLEX128) {
679
+ cblas_zherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
680
+ } else
681
+ rb_raise(rb_eNotImpError, "this matrix operation undefined for non-complex dtypes");
682
+ return Qtrue;
683
+ }
684
+
685
+ /*
686
+ * Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
687
+ *
688
+ * xGESVD computes the singular value decomposition (SVD) of a real
689
+ * M-by-N matrix A, optionally computing the left and/or right singular
690
+ * vectors. The SVD is written
691
+ *
692
+ * A = U * SIGMA * transpose(V)
693
+ *
694
+ * where SIGMA is an M-by-N matrix which is zero except for its
695
+ * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
696
+ * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
697
+ * are the singular values of A; they are real and non-negative, and
698
+ * are returned in descending order. The first min(m,n) columns of
699
+ * U and V are the left and right singular vectors of A.
700
+ *
701
+ * Note that the routine returns V**T, not V.
702
+ */
703
+ static VALUE nm_atlas_lapack_gesvd(VALUE self, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lwork) {
704
+ static int (*gesvd_table[nm::NUM_DTYPES])(char, char, int, int, void* a, int, void* s, void* u, int, void* vt, int, void* work, int, void* rwork) = {
705
+ NULL, NULL, NULL, NULL, NULL, // no integer ops
706
+ nm::math::atlas::lapack_gesvd<float,float>,
707
+ nm::math::atlas::lapack_gesvd<double,double>,
708
+ nm::math::atlas::lapack_gesvd<nm::Complex64,float>,
709
+ nm::math::atlas::lapack_gesvd<nm::Complex128,double>,
710
+ NULL // no Ruby objects
711
+ };
712
+
713
+ nm::dtype_t dtype = NM_DTYPE(a);
714
+
715
+
716
+ if (!gesvd_table[dtype]) {
717
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
718
+ return Qfalse;
719
+ } else {
720
+ int M = FIX2INT(m),
721
+ N = FIX2INT(n);
722
+
723
+ int min_mn = NM_MIN(M,N);
724
+ int max_mn = NM_MAX(M,N);
725
+
726
+ char JOBU = lapack_svd_job_sym(jobu),
727
+ JOBVT = lapack_svd_job_sym(jobvt);
728
+
729
+ // only need rwork for complex matrices
730
+ int rwork_size = (dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128) ? 5 * min_mn : 0;
731
+ void* rwork = rwork_size > 0 ? NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size) : NULL;
732
+ int work_size = FIX2INT(lwork);
733
+
734
+ // ignore user argument for lwork if it's too small.
735
+ work_size = NM_MAX((dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128 ? 2 * min_mn + max_mn : NM_MAX(3*min_mn + max_mn, 5*min_mn)), work_size);
736
+ void* work = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);
737
+
738
+ int info = gesvd_table[dtype](JOBU, JOBVT, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
739
+ NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
740
+ work, work_size, rwork);
741
+ return INT2FIX(info);
742
+ }
743
+ }
744
+
745
+ /*
746
+ * Function signature conversion for calling CBLAS' gesdd functions as directly as possible.
747
+ *
748
+ * xGESDD uses a divide-and-conquer strategy to compute the singular value decomposition (SVD) of a real
749
+ * M-by-N matrix A, optionally computing the left and/or right singular
750
+ * vectors. The SVD is written
751
+ *
752
+ * A = U * SIGMA * transpose(V)
753
+ *
754
+ * where SIGMA is an M-by-N matrix which is zero except for its
755
+ * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
756
+ * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
757
+ * are the singular values of A; they are real and non-negative, and
758
+ * are returned in descending order. The first min(m,n) columns of
759
+ * U and V are the left and right singular vectors of A.
760
+ *
761
+ * Note that the routine returns V**T, not V.
762
+ */
763
+ static VALUE nm_atlas_lapack_gesdd(VALUE self, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lwork) {
764
+ static int (*gesdd_table[nm::NUM_DTYPES])(char, int, int, void* a, int, void* s, void* u, int, void* vt, int, void* work, int, int* iwork, void* rwork) = {
765
+ NULL, NULL, NULL, NULL, NULL, // no integer ops
766
+ nm::math::atlas::lapack_gesdd<float,float>,
767
+ nm::math::atlas::lapack_gesdd<double,double>,
768
+ nm::math::atlas::lapack_gesdd<nm::Complex64,float>,
769
+ nm::math::atlas::lapack_gesdd<nm::Complex128,double>,
770
+ NULL // no Ruby objects
771
+ };
772
+
773
+ nm::dtype_t dtype = NM_DTYPE(a);
774
+
775
+ if (!gesdd_table[dtype]) {
776
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
777
+ return Qfalse;
778
+ } else {
779
+ int M = FIX2INT(m),
780
+ N = FIX2INT(n);
781
+
782
+ int min_mn = NM_MIN(M,N);
783
+ int max_mn = NM_MAX(M,N);
784
+
785
+ char JOBZ = lapack_svd_job_sym(jobz);
786
+
787
+ // only need rwork for complex matrices
788
+ void* rwork = NULL;
789
+
790
+ int work_size = FIX2INT(lwork); // Make sure we allocate enough work, regardless of the user request.
791
+ if (dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128) {
792
+ int rwork_size = min_mn * (JOBZ == 'N' ? 5 : NM_MAX(5*min_mn + 7, 2*max_mn + 2*min_mn + 1));
793
+ rwork = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size);
794
+
795
+ if (JOBZ == 'N') work_size = NM_MAX(work_size, 3*min_mn + NM_MAX(max_mn, 6*min_mn));
796
+ else if (JOBZ == 'O') work_size = NM_MAX(work_size, 3*min_mn*min_mn + NM_MAX(max_mn, 5*min_mn*min_mn + 4*min_mn));
797
+ else work_size = NM_MAX(work_size, 3*min_mn*min_mn + NM_MAX(max_mn, 4*min_mn*min_mn + 4*min_mn));
798
+ } else {
799
+ if (JOBZ == 'N') work_size = NM_MAX(work_size, 2*min_mn + max_mn);
800
+ else if (JOBZ == 'O') work_size = NM_MAX(work_size, 2*min_mn*min_mn + max_mn + 2*min_mn);
801
+ else work_size = NM_MAX(work_size, min_mn*min_mn + max_mn + 2*min_mn);
802
+ }
803
+ void* work = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);
804
+ int* iwork = NM_ALLOCA_N(int, 8*min_mn);
805
+
806
+ int info = gesdd_table[dtype](JOBZ, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
807
+ NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
808
+ work, work_size, iwork, rwork);
809
+ return INT2FIX(info);
810
+ }
811
+ }
812
+
813
+ /*
814
+ * Function signature conversion for calling CBLAS' geev functions as directly as possible.
815
+ *
816
+ * GEEV computes for an N-by-N real nonsymmetric matrix A, the
817
+ * eigenvalues and, optionally, the left and/or right eigenvectors.
818
+ *
819
+ * The right eigenvector v(j) of A satisfies
820
+ * A * v(j) = lambda(j) * v(j)
821
+ * where lambda(j) is its eigenvalue.
822
+ *
823
+ * The left eigenvector u(j) of A satisfies
824
+ * u(j)**H * A = lambda(j) * u(j)**H
825
+ * where u(j)**H denotes the conjugate transpose of u(j).
826
+ *
827
+ * The computed eigenvectors are normalized to have Euclidean norm
828
+ * equal to 1 and largest component real.
829
+ */
830
+ static VALUE nm_atlas_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr, VALUE lwork) {
831
+ static int (*geev_table[nm::NUM_DTYPES])(char, char, int, void* a, int, void* w, void* wi, void* vl, int, void* vr, int, void* work, int, void* rwork) = {
832
+ NULL, NULL, NULL, NULL, NULL, // no integer ops
833
+ nm::math::atlas::lapack_geev<float,float>,
834
+ nm::math::atlas::lapack_geev<double,double>,
835
+ nm::math::atlas::lapack_geev<nm::Complex64,float>,
836
+ nm::math::atlas::lapack_geev<nm::Complex128,double>,
837
+ NULL // no Ruby objects
838
+ };
839
+
840
+ nm::dtype_t dtype = NM_DTYPE(a);
841
+
842
+
843
+ if (!geev_table[dtype]) {
844
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
845
+ return Qfalse;
846
+ } else {
847
+ int N = FIX2INT(n);
848
+
849
+ char JOBVL = lapack_evd_job_sym(compute_left),
850
+ JOBVR = lapack_evd_job_sym(compute_right);
851
+
852
+ void* A = NM_STORAGE_DENSE(a)->elements;
853
+ void* WR = NM_STORAGE_DENSE(w)->elements;
854
+ void* WI = wi == Qnil ? NULL : NM_STORAGE_DENSE(wi)->elements;
855
+ void* VL = JOBVL == 'V' ? NM_STORAGE_DENSE(vl)->elements : NULL;
856
+ void* VR = JOBVR == 'V' ? NM_STORAGE_DENSE(vr)->elements : NULL;
857
+
858
+ // only need rwork for complex matrices (wi == Qnil for complex)
859
+ int rwork_size = dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128 ? N * DTYPE_SIZES[dtype] : 0; // 2*N*floattype for complex only, otherwise 0
860
+ void* rwork = rwork_size > 0 ? NM_ALLOCA_N(char, rwork_size) : NULL;
861
+ int work_size = FIX2INT(lwork);
862
+ void* work;
863
+
864
+ int info;
865
+
866
+ // if work size is 0 or -1, query.
867
+ if (work_size <= 0) {
868
+ work_size = -1;
869
+ work = NM_ALLOC_N(char, DTYPE_SIZES[dtype]); //2*N * DTYPE_SIZES[dtype]);
870
+ info = geev_table[dtype](JOBVL, JOBVR, N, A, FIX2INT(lda), WR, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr), work, work_size, rwork);
871
+ work_size = (int)(dtype == nm::COMPLEX64 || dtype == nm::FLOAT32 ? reinterpret_cast<float*>(work)[0] : reinterpret_cast<double*>(work)[0]);
872
+ // line above is basically: work_size = (int)(work[0]); // now have new work_size
873
+ NM_FREE(work);
874
+ if (info == 0)
875
+ rb_warn("geev: calculated optimal lwork of %d; to eliminate this message, use a positive value for lwork (at least 2*shape[i])", work_size);
876
+ else return INT2FIX(info); // error of some kind on query!
877
+ }
878
+
879
+ // if work size is < 2*N, just set it to 2*N
880
+ if (work_size < 2*N) work_size = 2*N;
881
+ if (work_size < 3*N && (dtype == nm::FLOAT32 || dtype == nm::FLOAT64)) {
882
+ work_size = JOBVL == 'V' || JOBVR == 'V' ? 4*N : 3*N;
883
+ }
884
+
885
+ // Allocate work array for actual run
886
+ work = NM_ALLOCA_N(char, work_size * DTYPE_SIZES[dtype]);
887
+
888
+ // Perform the actual calculation.
889
+ info = geev_table[dtype](JOBVL, JOBVR, N, A, FIX2INT(lda), WR, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr), work, work_size, rwork);
890
+
891
+ return INT2FIX(info);
892
+ }
893
+ }
894
+
895
+ /* Call any of the clapack_xgetrf functions as directly as possible.
896
+ *
897
+ * The clapack_getrf functions (dgetrf, sgetrf, cgetrf, and zgetrf) compute an LU factorization of a general M-by-N
898
+ * matrix A using partial pivoting with row interchanges.
899
+ *
900
+ * The factorization has the form:
901
+ * A = P * L * U
902
+ * where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n),
903
+ * and U is upper triangular (upper trapezoidal if m < n).
904
+ *
905
+ * This is the right-looking level 3 BLAS version of the algorithm.
906
+ *
907
+ * == Arguments
908
+ * See: http://www.netlib.org/lapack/double/dgetrf.f
909
+ * (You don't need argument 5; this is the value returned by this function.)
910
+ *
911
+ * You probably don't want to call this function. Instead, why don't you try clapack_getrf, which is more flexible
912
+ * with its arguments?
913
+ *
914
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
915
+ * handling, so you can easily crash Ruby!
916
+ *
917
+ * Returns an array giving the pivot indices (normally these are argument #5).
918
+ */
919
+ static VALUE nm_atlas_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda) {
920
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int m, const int n, void* a, const int lda, int* ipiv) = {
921
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
922
+ nm::math::atlas::clapack_getrf<float>,
923
+ nm::math::atlas::clapack_getrf<double>,
924
+ #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
925
+ clapack_cgetrf, clapack_zgetrf, // call directly, same function signature!
926
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
927
+ nm::math::atlas::clapack_getrf<nm::Complex64>,
928
+ nm::math::atlas::clapack_getrf<nm::Complex128>,
929
+ #endif
930
+ nm::math::atlas::clapack_getrf<nm::RubyObject>
931
+ };
932
+
933
+ int M = FIX2INT(m),
934
+ N = FIX2INT(n);
935
+
936
+ // Allocate the pivot index array, which is of size MIN(M, N).
937
+ size_t ipiv_size = std::min(M,N);
938
+ int* ipiv = NM_ALLOCA_N(int, ipiv_size);
939
+
940
+ if (!ttable[NM_DTYPE(a)]) {
941
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
942
+ } else {
943
+ // Call either our version of getrf or the LAPACK version.
944
+ ttable[NM_DTYPE(a)](blas_order_sym(order), M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv);
945
+ }
946
+
947
+ // Result will be stored in a. We return ipiv as an array.
948
+ VALUE ipiv_array = rb_ary_new2(ipiv_size);
949
+ for (size_t i = 0; i < ipiv_size; ++i) {
950
+ rb_ary_store(ipiv_array, i, INT2FIX(ipiv[i]));
951
+ }
952
+
953
+ return ipiv_array;
954
+ }
955
+
956
+
957
+ /* Call any of the clapack_xpotrf functions as directly as possible.
958
+ *
959
+ * You probably don't want to call this function. Instead, why don't you try clapack_potrf, which is more flexible
960
+ * with its arguments?
961
+ *
962
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
963
+ * handling, so you can easily crash Ruby!
964
+ */
965
+ static VALUE nm_atlas_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
966
+ #if !defined(HAVE_CLAPACK_H) && !defined(HAVE_ATLAS_CLAPACK_H)
967
+ rb_raise(rb_eNotImpError, "potrf currently requires CLAPACK");
968
+ #endif
969
+
970
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
971
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
972
+ nm::math::atlas::clapack_potrf<float>,
973
+ nm::math::atlas::clapack_potrf<double>,
974
+ #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
975
+ clapack_cpotrf, clapack_zpotrf, // call directly, same function signature!
976
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
977
+ nm::math::atlas::clapack_potrf<nm::Complex64>,
978
+ nm::math::atlas::clapack_potrf<nm::Complex128>,
979
+ #endif
980
+ NULL
981
+ };
982
+
983
+ if (!ttable[NM_DTYPE(a)]) {
984
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
985
+ // FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
986
+ //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
987
+ } else {
988
+ // Call either our version of potrf or the LAPACK version.
989
+ ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
990
+ }
991
+
992
+ return a;
993
+ }
994
+
995
+
996
+ /*
997
+ * Call any of the clapack_xgetrs functions as directly as possible.
998
+ */
999
+ static VALUE nm_atlas_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb) {
1000
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE Trans, const int N,
1001
+ const int NRHS, const void* A, const int lda, const int* ipiv, void* B,
1002
+ const int ldb) = {
1003
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1004
+ nm::math::atlas::clapack_getrs<float>,
1005
+ nm::math::atlas::clapack_getrs<double>,
1006
+ #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1007
+ clapack_cgetrs, clapack_zgetrs, // call directly, same function signature!
1008
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1009
+ nm::math::atlas::clapack_getrs<nm::Complex64>,
1010
+ nm::math::atlas::clapack_getrs<nm::Complex128>,
1011
+ #endif
1012
+ nm::math::atlas::clapack_getrs<nm::RubyObject>
1013
+ };
1014
+
1015
+ // Allocate the C version of the pivot index array
1016
+ int* ipiv_;
1017
+ if (TYPE(ipiv) != T_ARRAY) {
1018
+ rb_raise(rb_eArgError, "ipiv must be of type Array");
1019
+ } else {
1020
+ ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
1021
+ for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
1022
+ ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
1023
+ }
1024
+ }
1025
+
1026
+ if (!ttable[NM_DTYPE(a)]) {
1027
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1028
+ } else {
1029
+
1030
+ // Call either our version of getrs or the LAPACK version.
1031
+ ttable[NM_DTYPE(a)](blas_order_sym(order), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
1032
+ ipiv_, NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
1033
+ }
1034
+
1035
+ // b is both returned and modified directly in the argument list.
1036
+ return b;
1037
+ }
1038
+
1039
+
1040
+ /*
1041
+ * Call any of the clapack_xpotrs functions as directly as possible.
1042
+ */
1043
+ static VALUE nm_atlas_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb) {
1044
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
1045
+ const int NRHS, const void* A, const int lda, void* B, const int ldb) = {
1046
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1047
+ nm::math::atlas::clapack_potrs<float>,
1048
+ nm::math::atlas::clapack_potrs<double>,
1049
+ #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1050
+ clapack_cpotrs, clapack_zpotrs, // call directly, same function signature!
1051
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1052
+ nm::math::atlas::clapack_potrs<nm::Complex64>,
1053
+ nm::math::atlas::clapack_potrs<nm::Complex128>,
1054
+ #endif
1055
+ nm::math::atlas::clapack_potrs<nm::RubyObject>
1056
+ };
1057
+
1058
+
1059
+ if (!ttable[NM_DTYPE(a)]) {
1060
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1061
+ } else {
1062
+
1063
+ // Call either our version of potrs or the LAPACK version.
1064
+ ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
1065
+ NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
1066
+ }
1067
+
1068
+ // b is both returned and modified directly in the argument list.
1069
+ return b;
1070
+ }
1071
+
1072
+ /* Call any of the clapack_xgetri functions as directly as possible.
1073
+ *
1074
+ * You probably don't want to call this function. Instead, why don't you try clapack_getri, which is more flexible
1075
+ * with its arguments?
1076
+ *
1077
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1078
+ * handling, so you can easily crash Ruby!
1079
+ *
1080
+ * Returns an array giving the pivot indices (normally these are argument #5).
1081
+ */
1082
+ static VALUE nm_atlas_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv) {
1083
+ #if !defined (HAVE_CLAPACK_H) && !defined (HAVE_ATLAS_CLAPACK_H)
1084
+ rb_raise(rb_eNotImpError, "getri currently requires CLAPACK");
1085
+ #endif
1086
+
1087
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int n, void* a, const int lda, const int* ipiv) = {
1088
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1089
+ nm::math::atlas::clapack_getri<float>,
1090
+ nm::math::atlas::clapack_getri<double>,
1091
+ #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1092
+ clapack_cgetri, clapack_zgetri, // call directly, same function signature!
1093
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1094
+ nm::math::atlas::clapack_getri<nm::Complex64>,
1095
+ nm::math::atlas::clapack_getri<nm::Complex128>,
1096
+ #endif
1097
+ NULL
1098
+ };
1099
+
1100
+ // Allocate the C version of the pivot index array
1101
+ int* ipiv_;
1102
+ if (TYPE(ipiv) != T_ARRAY) {
1103
+ rb_raise(rb_eArgError, "ipiv must be of type Array");
1104
+ } else {
1105
+ ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
1106
+ for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
1107
+ ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
1108
+ }
1109
+ }
1110
+
1111
+ if (!ttable[NM_DTYPE(a)]) {
1112
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1113
+ // FIXME: Once non-BLAS dtypes are implemented, replace error above with the error below.
1114
+ //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1115
+ } else {
1116
+ // Call either our version of getri or the LAPACK version.
1117
+ ttable[NM_DTYPE(a)](blas_order_sym(order), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv_);
1118
+ }
1119
+
1120
+ return a;
1121
+ }
1122
+
1123
+
1124
+ /* Call any of the clapack_xpotri functions as directly as possible.
1125
+ *
1126
+ * You probably don't want to call this function. Instead, why don't you try clapack_potri, which is more flexible
1127
+ * with its arguments?
1128
+ *
1129
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1130
+ * handling, so you can easily crash Ruby!
1131
+ */
1132
+ static VALUE nm_atlas_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
1133
+ #if !defined (HAVE_CLAPACK_H) && !defined (HAVE_ATLAS_CLAPACK_H)
1134
+ rb_raise(rb_eNotImpError, "getri currently requires CLAPACK");
1135
+ #endif
1136
+
1137
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
1138
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1139
+ nm::math::atlas::clapack_potri<float>,
1140
+ nm::math::atlas::clapack_potri<double>,
1141
+ #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1142
+ clapack_cpotri, clapack_zpotri, // call directly, same function signature!
1143
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1144
+ nm::math::atlas::clapack_potri<nm::Complex64>,
1145
+ nm::math::atlas::clapack_potri<nm::Complex128>,
1146
+ #endif
1147
+ NULL
1148
+ };
1149
+
1150
+ if (!ttable[NM_DTYPE(a)]) {
1151
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1152
+ // FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
1153
+ //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1154
+ } else {
1155
+ // Call either our version of getri or the LAPACK version.
1156
+ ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
1157
+ }
1158
+
1159
+ return a;
1160
+ }
1161
+
1162
+
1163
+ /*
1164
+ * Call any of the clapack_xlaswp functions as directly as possible.
1165
+ *
1166
+ * Note that LAPACK's xlaswp functions accept a column-order matrix, but NMatrix uses row-order. Thus, n should be the
1167
+ * number of rows and lda should be the number of columns, no matter what it says in the documentation for dlaswp.f.
1168
+ */
1169
+ static VALUE nm_atlas_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx) {
1170
+ //We have actually never used the ATLAS version of laswp. For the time being
1171
+ //I will leave it like that and just always call the internal implementation.
1172
+ //I don't know if there is a good reason for this or not.
1173
+ //Maybe because our internal version swaps columns instead of rows.
1174
+ static void (*ttable[nm::NUM_DTYPES])(const int n, void* a, const int lda, const int k1, const int k2, const int* ipiv, const int incx) = {
1175
+ nm::math::clapack_laswp<uint8_t>,
1176
+ nm::math::clapack_laswp<int8_t>,
1177
+ nm::math::clapack_laswp<int16_t>,
1178
+ nm::math::clapack_laswp<int32_t>,
1179
+ nm::math::clapack_laswp<int64_t>,
1180
+ nm::math::clapack_laswp<float>,
1181
+ nm::math::clapack_laswp<double>,
1182
+ nm::math::clapack_laswp<nm::Complex64>,
1183
+ nm::math::clapack_laswp<nm::Complex128>,
1184
+ nm::math::clapack_laswp<nm::RubyObject>
1185
+ };
1186
+
1187
+ // Allocate the C version of the pivot index array
1188
+ int* ipiv_;
1189
+ if (TYPE(ipiv) != T_ARRAY) {
1190
+ rb_raise(rb_eArgError, "ipiv must be of type Array");
1191
+ } else {
1192
+ ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
1193
+ for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
1194
+ ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
1195
+ }
1196
+ }
1197
+
1198
+ // Call either our version of laswp or the LAPACK version.
1199
+ ttable[NM_DTYPE(a)](FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), FIX2INT(k1), FIX2INT(k2), ipiv_, FIX2INT(incx));
1200
+
1201
+ // a is both returned and modified directly in the argument list.
1202
+ return a;
1203
+ }
1204
+
1205
+
1206
+ }