nmatrix-atlas 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/ext/nmatrix/data/complex.h +364 -0
  3. data/ext/nmatrix/data/data.h +638 -0
  4. data/ext/nmatrix/data/meta.h +64 -0
  5. data/ext/nmatrix/data/ruby_object.h +389 -0
  6. data/ext/nmatrix/math/asum.h +120 -0
  7. data/ext/nmatrix/math/cblas_enums.h +36 -0
  8. data/ext/nmatrix/math/cblas_templates_core.h +507 -0
  9. data/ext/nmatrix/math/gemm.h +241 -0
  10. data/ext/nmatrix/math/gemv.h +178 -0
  11. data/ext/nmatrix/math/getrf.h +255 -0
  12. data/ext/nmatrix/math/getrs.h +121 -0
  13. data/ext/nmatrix/math/imax.h +79 -0
  14. data/ext/nmatrix/math/laswp.h +165 -0
  15. data/ext/nmatrix/math/long_dtype.h +49 -0
  16. data/ext/nmatrix/math/math.h +744 -0
  17. data/ext/nmatrix/math/nrm2.h +160 -0
  18. data/ext/nmatrix/math/rot.h +117 -0
  19. data/ext/nmatrix/math/rotg.h +106 -0
  20. data/ext/nmatrix/math/scal.h +71 -0
  21. data/ext/nmatrix/math/trsm.h +332 -0
  22. data/ext/nmatrix/math/util.h +148 -0
  23. data/ext/nmatrix/nm_memory.h +60 -0
  24. data/ext/nmatrix/nmatrix.h +408 -0
  25. data/ext/nmatrix/ruby_constants.h +106 -0
  26. data/ext/nmatrix/storage/common.h +176 -0
  27. data/ext/nmatrix/storage/dense/dense.h +128 -0
  28. data/ext/nmatrix/storage/list/list.h +137 -0
  29. data/ext/nmatrix/storage/storage.h +98 -0
  30. data/ext/nmatrix/storage/yale/class.h +1139 -0
  31. data/ext/nmatrix/storage/yale/iterators/base.h +142 -0
  32. data/ext/nmatrix/storage/yale/iterators/iterator.h +130 -0
  33. data/ext/nmatrix/storage/yale/iterators/row.h +449 -0
  34. data/ext/nmatrix/storage/yale/iterators/row_stored.h +139 -0
  35. data/ext/nmatrix/storage/yale/iterators/row_stored_nd.h +168 -0
  36. data/ext/nmatrix/storage/yale/iterators/stored_diagonal.h +123 -0
  37. data/ext/nmatrix/storage/yale/math/transpose.h +110 -0
  38. data/ext/nmatrix/storage/yale/yale.h +202 -0
  39. data/ext/nmatrix/types.h +54 -0
  40. data/ext/nmatrix/util/io.h +115 -0
  41. data/ext/nmatrix/util/sl_list.h +143 -0
  42. data/ext/nmatrix/util/util.h +78 -0
  43. data/ext/nmatrix_atlas/extconf.rb +250 -0
  44. data/ext/nmatrix_atlas/math_atlas.cpp +1206 -0
  45. data/ext/nmatrix_atlas/math_atlas/cblas_templates_atlas.h +72 -0
  46. data/ext/nmatrix_atlas/math_atlas/clapack_templates.h +332 -0
  47. data/ext/nmatrix_atlas/math_atlas/geev.h +82 -0
  48. data/ext/nmatrix_atlas/math_atlas/gesdd.h +83 -0
  49. data/ext/nmatrix_atlas/math_atlas/gesvd.h +81 -0
  50. data/ext/nmatrix_atlas/math_atlas/inc.h +47 -0
  51. data/ext/nmatrix_atlas/nmatrix_atlas.cpp +44 -0
  52. data/lib/nmatrix/atlas.rb +213 -0
  53. data/lib/nmatrix/lapack_ext_common.rb +69 -0
  54. data/spec/00_nmatrix_spec.rb +730 -0
  55. data/spec/01_enum_spec.rb +190 -0
  56. data/spec/02_slice_spec.rb +389 -0
  57. data/spec/03_nmatrix_monkeys_spec.rb +78 -0
  58. data/spec/2x2_dense_double.mat +0 -0
  59. data/spec/4x4_sparse.mat +0 -0
  60. data/spec/4x5_dense.mat +0 -0
  61. data/spec/blas_spec.rb +193 -0
  62. data/spec/elementwise_spec.rb +303 -0
  63. data/spec/homogeneous_spec.rb +99 -0
  64. data/spec/io/fortran_format_spec.rb +88 -0
  65. data/spec/io/harwell_boeing_spec.rb +98 -0
  66. data/spec/io/test.rua +9 -0
  67. data/spec/io_spec.rb +149 -0
  68. data/spec/lapack_core_spec.rb +482 -0
  69. data/spec/leakcheck.rb +16 -0
  70. data/spec/math_spec.rb +730 -0
  71. data/spec/nmatrix_yale_resize_test_associations.yaml +2802 -0
  72. data/spec/nmatrix_yale_spec.rb +286 -0
  73. data/spec/plugins/atlas/atlas_spec.rb +242 -0
  74. data/spec/rspec_monkeys.rb +56 -0
  75. data/spec/rspec_spec.rb +34 -0
  76. data/spec/shortcuts_spec.rb +310 -0
  77. data/spec/slice_set_spec.rb +157 -0
  78. data/spec/spec_helper.rb +140 -0
  79. data/spec/stat_spec.rb +203 -0
  80. data/spec/test.pcd +20 -0
  81. data/spec/utm5940.mtx +83844 -0
  82. metadata +159 -0
@@ -0,0 +1,78 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == util.h
25
+ //
26
+ // Header file for utility functions and data.
27
+
28
+ #ifndef UTIL_H
29
+ #define UTIL_H
30
+
31
+ /*
32
+ * Standard Includes
33
+ */
34
+
35
+ /*
36
+ * Project Includes
37
+ */
38
+
39
+ #include "types.h"
40
+
41
+ /*
42
+ * Macros
43
+ */
44
+
45
+ /*
46
+ * Types
47
+ */
48
+
49
+ /*
50
+ * Data
51
+ */
52
+
53
+ /*
54
+ * Functions
55
+ */
56
+ namespace nm {
57
+ template <typename Type>
58
+ inline Type gcf(Type x, Type y) {
59
+ Type t;
60
+
61
+ if (x < 0) x = -x;
62
+ if (y < 0) y = -y;
63
+
64
+ if (x == 0) return y;
65
+ if (y == 0) return x;
66
+
67
+ while (x > 0) {
68
+ t = x;
69
+ x = y % x;
70
+ y = t;
71
+ }
72
+
73
+ return y;
74
+ }
75
+ } // end of namespace nm
76
+
77
+
78
+ #endif // UTIL_H
@@ -0,0 +1,250 @@
1
+ # = NMatrix
2
+ #
3
+ # A linear algebra library for scientific computation in Ruby.
4
+ # NMatrix is part of SciRuby.
5
+ #
6
+ # NMatrix was originally inspired by and derived from NArray, by
7
+ # Masahiro Tanaka: http://narray.rubyforge.org
8
+ #
9
+ # == Copyright Information
10
+ #
11
+ # SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
12
+ # NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
13
+ #
14
+ # Please see LICENSE.txt for additional copyright notices.
15
+ #
16
+ # == Contributing
17
+ #
18
+ # By contributing source code to SciRuby, you agree to be bound by
19
+ # our Contributor Agreement:
20
+ #
21
+ # * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
22
+ #
23
+ # == extconf.rb
24
+ #
25
+ # This file checks for ATLAS and other necessary headers, and
26
+ # generates a Makefile for compiling NMatrix.
27
+
28
+ require "mkmf"
29
+
30
+
31
+ # Function derived from NArray's extconf.rb.
32
+ def have_type(type, header=nil) #:nodoc:
33
+ printf "checking for %s... ", type
34
+ STDOUT.flush
35
+
36
+ src = <<"SRC"
37
+ #include <ruby.h>
38
+ SRC
39
+
40
+
41
+ src << <<"SRC" unless header.nil?
42
+ #include <#{header}>
43
+ SRC
44
+
45
+ r = try_link(src + <<"SRC")
46
+ int main() { return 0; }
47
+ int t() { #{type} a; return 0; }
48
+ SRC
49
+
50
+ unless r
51
+ print "no\n"
52
+ return false
53
+ end
54
+
55
+ $defs.push(format("-DHAVE_%s", type.upcase))
56
+
57
+ print "yes\n"
58
+
59
+ return true
60
+ end
61
+
62
+ # Function derived from NArray's extconf.rb.
63
+ def create_conf_h(file) #:nodoc:
64
+ print "creating #{file}\n"
65
+ File.open(file, 'w') do |hfile|
66
+ header_guard = file.upcase.sub(/\s|\./, '_')
67
+
68
+ hfile.puts "#ifndef #{header_guard}"
69
+ hfile.puts "#define #{header_guard}"
70
+ hfile.puts
71
+
72
+ # FIXME: Find a better way to do this:
73
+ hfile.puts "#define RUBY_2 1" if RUBY_VERSION >= '2.0'
74
+
75
+ for line in $defs
76
+ line =~ /^-D(.*)/
77
+ hfile.printf "#define %s 1\n", $1
78
+ end
79
+
80
+ hfile.puts
81
+ hfile.puts "#endif"
82
+ end
83
+ end
84
+
85
+ if RUBY_VERSION < '1.9'
86
+ raise(NotImplementedError, "Sorry, you need at least Ruby 1.9!")
87
+ else
88
+ #$INSTALLFILES = [['nmatrix.h', '$(archdir)'], ['nmatrix.hpp', '$(archdir)'], ['nmatrix_config.h', '$(archdir)'], ['nm_memory.h', '$(archdir)']]
89
+ if /cygwin|mingw/ =~ RUBY_PLATFORM
90
+ #$INSTALLFILES << ['libnmatrix.a', '$(archdir)']
91
+ end
92
+ end
93
+
94
+ if /cygwin|mingw/ =~ RUBY_PLATFORM
95
+ CONFIG["DLDFLAGS"] << " --output-lib libnmatrix.a"
96
+ end
97
+
98
+ $DEBUG = true
99
+ #not the right way to add this include directory
100
+ $CFLAGS = ["-Wall -Werror=return-type -I$(srcdir)/../nmatrix",$CFLAGS].join(" ")
101
+ $CXXFLAGS = ["-Wall -Werror=return-type -I$(srcdir)/../nmatrix",$CXXFLAGS].join(" ")
102
+ $CPPFLAGS = ["-Wall -Werror=return-type -I$(srcdir)/../nmatrix",$CPPFLAGS].join(" ")
103
+
104
+ # When adding objects here, make sure their directories are included in CLEANOBJS down at the bottom of extconf.rb.
105
+ # Why not just autogenerate this list from all .c/.cpp files in directory?
106
+ basenames = %w{nmatrix_atlas math_atlas}
107
+ $objs = basenames.map { |b| "#{b}.o" }
108
+ $srcs = basenames.map { |b| "#{b}.cpp" }
109
+
110
+ #CONFIG['CXX'] = 'clang++'
111
+ CONFIG['CXX'] = 'g++'
112
+
113
+ def find_newer_gplusplus #:nodoc:
114
+ print "checking for apparent GNU g++ binary with C++0x/C++11 support... "
115
+ [9,8,7,6,5,4,3].each do |minor|
116
+ ver = "4.#{minor}"
117
+ gpp = "g++-#{ver}"
118
+ result = `which #{gpp}`
119
+ next if result.empty?
120
+ CONFIG['CXX'] = gpp
121
+ puts ver
122
+ return CONFIG['CXX']
123
+ end
124
+ false
125
+ end
126
+
127
+ def gplusplus_version
128
+ cxxvar = proc { |n| `#{CONFIG['CXX']} -E -dM - </dev/null | grep #{n}`.chomp.split(' ')[2] }
129
+ major = cxxvar.call('__GNUC__')
130
+ minor = cxxvar.call('__GNUC_MINOR__')
131
+ patch = cxxvar.call('__GNUC_PATCHLEVEL__')
132
+
133
+ raise("unable to determine g++ version (match to get version was nil)") if major.nil? || minor.nil? || patch.nil?
134
+
135
+ "#{major}.#{minor}.#{patch}"
136
+ end
137
+
138
+
139
+ if CONFIG['CXX'] == 'clang++'
140
+ $CPP_STANDARD = 'c++11'
141
+
142
+ else
143
+ version = gplusplus_version
144
+ if version < '4.3.0' && CONFIG['CXX'] == 'g++' # see if we can find a newer G++, unless it's been overridden by user
145
+ if !find_newer_gplusplus
146
+ raise("You need a version of g++ which supports -std=c++0x or -std=c++11. If you're on a Mac and using Homebrew, we recommend using mac-brew-gcc.sh to install a more recent g++.")
147
+ end
148
+ version = gplusplus_version
149
+ end
150
+
151
+ if version < '4.7.0'
152
+ $CPP_STANDARD = 'c++0x'
153
+ else
154
+ $CPP_STANDARD = 'c++11'
155
+ end
156
+ puts "using C++ standard... #{$CPP_STANDARD}"
157
+ puts "g++ reports version... " + `#{CONFIG['CXX']} --version|head -n 1|cut -f 3 -d " "`
158
+ end
159
+
160
+ # add smmp in to get generic transp; remove smmp2 to eliminate funcptr transp
161
+
162
+ # The next line allows the user to supply --with-atlas-dir=/usr/local/atlas,
163
+ # --with-atlas-lib or --with-atlas-include and tell the compiler where to look
164
+ # for ATLAS. The same for all the others
165
+ #
166
+ #dir_config("clapack", ["/usr/local/atlas/include"], [])
167
+ #
168
+ #
169
+
170
+ # Is g++ having trouble finding your header files?
171
+ # Try this:
172
+ # export C_INCLUDE_PATH=/usr/local/atlas/include
173
+ # export CPLUS_INCLUDE_PATH=/usr/local/atlas/include
174
+ # (substituting in the path of your cblas.h and clapack.h for the path I used). -- JW 8/27/12
175
+
176
+ idefaults = {lapack: ["/usr/include/atlas"],
177
+ cblas: ["/usr/local/atlas/include", "/usr/include/atlas"],
178
+ atlas: ["/usr/local/atlas/include", "/usr/include/atlas"]}
179
+
180
+ # For some reason, if we try to look for /usr/lib64/atlas on a Mac OS X Mavericks system, and the directory does not
181
+ # exist, it will give a linker error -- even if the lib dir is already correctly included with -L. So we need to check
182
+ # that Dir.exists?(d) for each.
183
+ ldefaults = {lapack: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) },
184
+ cblas: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) },
185
+ atlas: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) }}
186
+
187
+ if have_library("clapack") # Usually only applies for Mac OS X
188
+ $libs += " -lclapack "
189
+ end
190
+
191
+ unless have_library("lapack")
192
+ dir_config("lapack", idefaults[:lapack], ldefaults[:lapack])
193
+ end
194
+
195
+ unless have_library("cblas")
196
+ dir_config("cblas", idefaults[:cblas], ldefaults[:cblas])
197
+ end
198
+
199
+ unless have_library("atlas")
200
+ dir_config("atlas", idefaults[:atlas], ldefaults[:atlas])
201
+ end
202
+
203
+ # If BLAS and LAPACK headers are in an atlas directory, prefer those. Otherwise,
204
+ # we try our luck with the default location.
205
+ if have_header("atlas/cblas.h")
206
+ have_header("atlas/clapack.h")
207
+ else
208
+ have_header("cblas.h")
209
+ have_header("clapack.h")
210
+ end
211
+
212
+
213
+ # Although have_func is supposed to take a list as its second argument, I find that it simply
214
+ # applies a :to_s to the second arg and doesn't actually check each one. We may want to put
215
+ # have_func calls inside an :each block which checks atlas/clapack.h, cblas.h, clapack.h, and
216
+ # lastly lapack.h. On Ubuntu, it only works if I use atlas/clapack.h. --@mohawkjohn 8/20/14
217
+ have_func("clapack_dgetrf", "atlas/clapack.h")
218
+ have_func("clapack_dgetri", "atlas/clapack.h")
219
+ have_func("dgesvd_", "clapack.h") # This may not do anything. dgesvd_ seems to be in LAPACK, not CLAPACK.
220
+
221
+ have_func("cblas_dgemm", "cblas.h")
222
+
223
+ #have_func("rb_scan_args", "ruby.h")
224
+
225
+ #find_library("lapack", "clapack_dgetrf")
226
+ #find_library("cblas", "cblas_dgemm")
227
+ #find_library("atlas", "ATL_dgemmNN")
228
+ # Order matters here: ATLAS has to go after LAPACK: http://mail.scipy.org/pipermail/scipy-user/2007-January/010717.html
229
+ $libs += " -llapack -lcblas -latlas "
230
+ #$libs += " -lprofiler "
231
+
232
+
233
+ # For release, these next two should both be changed to -O3.
234
+ $CFLAGS += " -O3" #" -O0 -g "
235
+ #$CFLAGS += " -static -O0 -g "
236
+ $CPPFLAGS += " -O3 -std=#{$CPP_STANDARD}" #" -O0 -g -std=#{$CPP_STANDARD} " #-fmax-errors=10 -save-temps
237
+ #$CPPFLAGS += " -static -O0 -g -std=#{$CPP_STANDARD} "
238
+
239
+ CONFIG['warnflags'].gsub!('-Wshorten-64-to-32', '') # doesn't work except in Mac-patched gcc (4.2)
240
+ CONFIG['warnflags'].gsub!('-Wdeclaration-after-statement', '')
241
+ CONFIG['warnflags'].gsub!('-Wimplicit-function-declaration', '')
242
+
243
+ create_conf_h("nmatrix_atlas_config.h")
244
+ create_makefile("nmatrix_atlas")
245
+
246
+ # to clean up object files in subdirectories:
247
+ open('Makefile', 'a') do |f|
248
+ clean_objs_paths = %w{ }.map { |d| "#{d}/*.#{CONFIG["OBJEXT"]}" }
249
+ f.write("CLEANOBJS := $(CLEANOBJS) #{clean_objs_paths.join(' ')}")
250
+ end
@@ -0,0 +1,1206 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == math_atlas.cpp
25
+ //
26
+ // Ruby-exposed CBLAS and LAPACK functions that call ATLAS
27
+ // functions.
28
+ //
29
+
30
+ /*
31
+ * Project Includes
32
+ */
33
+
34
+ #include "data/data.h"
35
+
36
+ #include "math_atlas/inc.h"
37
+
38
+ #include "math/util.h"
39
+
40
+ //BLAS
41
+ #include "math_atlas/cblas_templates_atlas.h"
42
+
43
+ //LAPACK
44
+ #include "math/laswp.h"
45
+ #include "math_atlas/clapack_templates.h"
46
+
47
+ #include "math_atlas/gesvd.h"
48
+ #include "math_atlas/gesdd.h"
49
+ #include "math_atlas/geev.h"
50
+
51
+
52
+ /*
53
+ * Forward Declarations
54
+ */
55
+
56
+ extern "C" {
57
+ /* BLAS Level 1. */
58
+ static VALUE nm_atlas_cblas_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx);
59
+ static VALUE nm_atlas_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx);
60
+ static VALUE nm_atlas_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx);
61
+ static VALUE nm_atlas_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s);
62
+ static VALUE nm_atlas_cblas_rotg(VALUE self, VALUE ab);
63
+ static VALUE nm_atlas_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx);
64
+
65
+ /* BLAS Level 2. */
66
+ static VALUE nm_atlas_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
67
+ VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);
68
+
69
+ /* BLAS Level 3. */
70
+ static VALUE nm_atlas_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha,
71
+ VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc);
72
+ static VALUE nm_atlas_cblas_trsm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
73
+ VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
74
+ static VALUE nm_atlas_cblas_trmm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
75
+ VALUE alpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
76
+ static VALUE nm_atlas_cblas_herk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
77
+ VALUE lda, VALUE beta, VALUE c, VALUE ldc);
78
+ static VALUE nm_atlas_cblas_syrk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
79
+ VALUE lda, VALUE beta, VALUE c, VALUE ldc);
80
+
81
+ /* LAPACK. */
82
+ static VALUE nm_atlas_has_clapack(VALUE self);
83
+ static VALUE nm_atlas_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda);
84
+ static VALUE nm_atlas_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
85
+ static VALUE nm_atlas_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb);
86
+ static VALUE nm_atlas_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb);
87
+ static VALUE nm_atlas_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv);
88
+ static VALUE nm_atlas_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
89
+ static VALUE nm_atlas_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx);
90
+
91
+ static VALUE nm_atlas_lapack_gesvd(VALUE self, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lworkspace_size);
92
+ static VALUE nm_atlas_lapack_gesdd(VALUE self, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lworkspace_size);
93
+ static VALUE nm_atlas_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr, VALUE lwork);
94
+ }
95
+
96
+ ////////////////////
97
+ // Math Functions //
98
+ ////////////////////
99
+
100
+ namespace nm {
101
+ namespace math {
102
+ namespace atlas {
103
+
104
+ /*
105
+ * Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
106
+ */
107
+ template <typename DType, typename CType>
108
+ inline static int lapack_gesvd(char jobu, char jobvt, int m, int n, void* a, int lda, void* s, void* u, int ldu, void* vt, int ldvt, void* work, int lwork, void* rwork) {
109
+ return gesvd<DType,CType>(jobu, jobvt, m, n, reinterpret_cast<DType*>(a), lda, reinterpret_cast<CType*>(s), reinterpret_cast<DType*>(u), ldu, reinterpret_cast<DType*>(vt), ldvt, reinterpret_cast<DType*>(work), lwork, reinterpret_cast<CType*>(rwork));
110
+ }
111
+
112
+ /*
113
+ * Function signature conversion for calling CBLAS' gesdd functions as directly as possible.
114
+ */
115
+ template <typename DType, typename CType>
116
+ inline static int lapack_gesdd(char jobz, int m, int n, void* a, int lda, void* s, void* u, int ldu, void* vt, int ldvt, void* work, int lwork, int* iwork, void* rwork) {
117
+ return gesdd<DType,CType>(jobz, m, n, reinterpret_cast<DType*>(a), lda, reinterpret_cast<CType*>(s), reinterpret_cast<DType*>(u), ldu, reinterpret_cast<DType*>(vt), ldvt, reinterpret_cast<DType*>(work), lwork, iwork, reinterpret_cast<CType*>(rwork));
118
+ }
119
+
120
+
121
+ }
122
+ }
123
+ }
124
+
125
+ extern "C" {
126
+
127
+ ///////////////////
128
+ // Ruby Bindings //
129
+ ///////////////////
130
+
131
+ void nm_math_init_atlas() {
132
+ VALUE cNMatrix_ATLAS = rb_define_module_under(cNMatrix, "ATLAS");
133
+
134
+ rb_define_singleton_method(cNMatrix, "has_clapack?", (METHOD)nm_atlas_has_clapack, 0);
135
+
136
+ VALUE cNMatrix_ATLAS_LAPACK = rb_define_module_under(cNMatrix_ATLAS, "LAPACK");
137
+
138
+ /* ATLAS-CLAPACK Functions */
139
+ rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "clapack_getrf", (METHOD)nm_atlas_clapack_getrf, 5);
140
+ rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "clapack_potrf", (METHOD)nm_atlas_clapack_potrf, 5);
141
+ rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "clapack_getrs", (METHOD)nm_atlas_clapack_getrs, 9);
142
+ rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "clapack_potrs", (METHOD)nm_atlas_clapack_potrs, 8);
143
+ rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "clapack_getri", (METHOD)nm_atlas_clapack_getri, 5);
144
+ rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "clapack_potri", (METHOD)nm_atlas_clapack_potri, 5);
145
+ rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "clapack_laswp", (METHOD)nm_atlas_clapack_laswp, 7);
146
+
147
+ /* Non-ATLAS regular LAPACK Functions called via Fortran interface */
148
+ rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "lapack_gesvd", (METHOD)nm_atlas_lapack_gesvd, 12);
149
+ rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "lapack_gesdd", (METHOD)nm_atlas_lapack_gesdd, 11);
150
+ rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "lapack_geev", (METHOD)nm_atlas_lapack_geev, 12);
151
+
152
+ VALUE cNMatrix_ATLAS_BLAS = rb_define_module_under(cNMatrix_ATLAS, "BLAS");
153
+
154
+ //BLAS Level 1
155
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_scal", (METHOD)nm_atlas_cblas_scal, 4);
156
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_nrm2", (METHOD)nm_atlas_cblas_nrm2, 3);
157
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_asum", (METHOD)nm_atlas_cblas_asum, 3);
158
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_rot", (METHOD)nm_atlas_cblas_rot, 7);
159
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_rotg", (METHOD)nm_atlas_cblas_rotg, 1);
160
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_imax", (METHOD)nm_atlas_cblas_imax, 3);
161
+
162
+ //BLAS Level 2
163
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_gemv", (METHOD)nm_atlas_cblas_gemv, 11);
164
+
165
+ //BLAS Level 3
166
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_gemm", (METHOD)nm_atlas_cblas_gemm, 14);
167
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_trsm", (METHOD)nm_atlas_cblas_trsm, 12);
168
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_trmm", (METHOD)nm_atlas_cblas_trmm, 12);
169
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_syrk", (METHOD)nm_atlas_cblas_syrk, 11);
170
+ rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_herk", (METHOD)nm_atlas_cblas_herk, 11);
171
+
172
+ }
173
+
174
+ /*
175
+ * Simple way to check from within Ruby code if clapack functions are available, without
176
+ * having to wait around for an exception to be thrown.
177
+ */
178
+ static VALUE nm_atlas_has_clapack(VALUE self) {
179
+ #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
180
+ return Qtrue;
181
+ #else
182
+ return Qfalse;
183
+ #endif
184
+ }
185
+
186
+ /*
187
+ * call-seq:
188
+ * NMatrix::BLAS.cblas_scal(n, alpha, vector, inc) -> NMatrix
189
+ *
190
+ * BLAS level 1 function +scal+. Works with all dtypes.
191
+ *
192
+ * Scale +vector+ in-place by +alpha+ and also return it. The operation is as
193
+ * follows:
194
+ * x <- alpha * x
195
+ *
196
+ * - +n+ -> Number of elements of +vector+.
197
+ * - +alpha+ -> Scalar value used in the operation.
198
+ * - +vector+ -> NMatrix of shape [n,1] or [1,n]. Modified in-place.
199
+ * - +inc+ -> Increment used in the scaling function. Should generally be 1.
200
+ */
201
+ static VALUE nm_atlas_cblas_scal(VALUE self, VALUE n, VALUE alpha, VALUE vector, VALUE incx) {
202
+ nm::dtype_t dtype = NM_DTYPE(vector);
203
+
204
+ void* scalar = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
205
+ rubyval_to_cval(alpha, dtype, scalar);
206
+
207
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::atlas::cblas_scal, void, const int n,
208
+ const void* scalar, void* x, const int incx);
209
+
210
+ ttable[dtype](FIX2INT(n), scalar, NM_STORAGE_DENSE(vector)->elements,
211
+ FIX2INT(incx));
212
+
213
+ return vector;
214
+ }
215
+
216
+ /*
217
+ * Call any of the cblas_xrotg functions as directly as possible.
218
+ *
219
+ * xROTG computes the elements of a Givens plane rotation matrix such that:
220
+ *
221
+ * | c s | | a | | r |
222
+ * | -s c | * | b | = | 0 |
223
+ *
224
+ * where r = +- sqrt( a**2 + b**2 ) and c**2 + s**2 = 1.
225
+ *
226
+ * The Givens plane rotation can be used to introduce zero elements into a matrix selectively.
227
+ *
228
+ * This function differs from most of the other raw BLAS accessors. Instead of
229
+ * providing a, b, c, s as arguments, you should only provide a and b (the
230
+ * inputs), and you should provide them as the first two elements of any dense
231
+ * NMatrix type.
232
+ *
233
+ * The outputs [c,s] will be returned in a Ruby Array at the end; the input
234
+ * NMatrix will also be modified in-place.
235
+ *
236
+ * This function, like the other cblas_ functions, does minimal type-checking.
237
+ */
238
+ static VALUE nm_atlas_cblas_rotg(VALUE self, VALUE ab) {
239
+ static void (*ttable[nm::NUM_DTYPES])(void* a, void* b, void* c, void* s) = {
240
+ NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
241
+ nm::math::atlas::cblas_rotg<float>,
242
+ nm::math::atlas::cblas_rotg<double>,
243
+ nm::math::atlas::cblas_rotg<nm::Complex64>,
244
+ nm::math::atlas::cblas_rotg<nm::Complex128>,
245
+ NULL //nm::math::atlas::cblas_rotg<nm::RubyObject>
246
+ };
247
+
248
+ nm::dtype_t dtype = NM_DTYPE(ab);
249
+
250
+ if (!ttable[dtype]) {
251
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
252
+ return Qnil;
253
+
254
+ } else {
255
+ NM_CONSERVATIVE(nm_register_value(&self));
256
+ NM_CONSERVATIVE(nm_register_value(&ab));
257
+ void *pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
258
+ *pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
259
+
260
+ // extract A and B from the NVector (first two elements)
261
+ void* pA = NM_STORAGE_DENSE(ab)->elements;
262
+ void* pB = (char*)(NM_STORAGE_DENSE(ab)->elements) + DTYPE_SIZES[dtype];
263
+ // c and s are output
264
+
265
+ ttable[dtype](pA, pB, pC, pS);
266
+
267
+ VALUE result = rb_ary_new2(2);
268
+
269
+ if (dtype == nm::RUBYOBJ) {
270
+ rb_ary_store(result, 0, *reinterpret_cast<VALUE*>(pC));
271
+ rb_ary_store(result, 1, *reinterpret_cast<VALUE*>(pS));
272
+ } else {
273
+ rb_ary_store(result, 0, rubyobj_from_cval(pC, dtype).rval);
274
+ rb_ary_store(result, 1, rubyobj_from_cval(pS, dtype).rval);
275
+ }
276
+ NM_CONSERVATIVE(nm_unregister_value(&ab));
277
+ NM_CONSERVATIVE(nm_unregister_value(&self));
278
+ return result;
279
+ }
280
+ }
281
+
282
+
283
+ /*
284
+ * Call any of the cblas_xrot functions as directly as possible.
285
+ *
286
+ * xROT is a BLAS level 1 routine (taking two vectors) which applies a plane rotation.
287
+ *
288
+ * It's tough to find documentation on xROT. Here are what we think the arguments are for:
289
+ * * n :: number of elements to consider in x and y
290
+ * * x :: a vector (expects an NVector)
291
+ * * incx :: stride of x
292
+ * * y :: a vector (expects an NVector)
293
+ * * incy :: stride of y
294
+ * * c :: cosine of the angle of rotation
295
+ * * s :: sine of the angle of rotation
296
+ *
297
+ * Note that c and s will be the same dtype as x and y, except when x and y are complex. If x and y are complex, c and s
298
+ * will be float for Complex64 or double for Complex128.
299
+ *
300
+ * You probably don't want to call this function. Instead, why don't you try rot, which is more flexible
301
+ * with its arguments?
302
+ *
303
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
304
+ * handling, so you can easily crash Ruby!
305
+ */
306
+ static VALUE nm_atlas_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s) {
307
+ static void (*ttable[nm::NUM_DTYPES])(const int N, void*, const int, void*, const int, const void*, const void*) = {
308
+ NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
309
+ nm::math::atlas::cblas_rot<float,float>,
310
+ nm::math::atlas::cblas_rot<double,double>,
311
+ nm::math::atlas::cblas_rot<nm::Complex64,float>,
312
+ nm::math::atlas::cblas_rot<nm::Complex128,double>,
313
+ nm::math::atlas::cblas_rot<nm::RubyObject,nm::RubyObject>
314
+ };
315
+
316
+ nm::dtype_t dtype = NM_DTYPE(x);
317
+
318
+
319
+ if (!ttable[dtype]) {
320
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
321
+ return Qfalse;
322
+ } else {
323
+ void *pC, *pS;
324
+
325
+ // We need to ensure the cosine and sine arguments are the correct dtype -- which may differ from the actual dtype.
326
+ if (dtype == nm::COMPLEX64) {
327
+ pC = NM_ALLOCA_N(float,1);
328
+ pS = NM_ALLOCA_N(float,1);
329
+ rubyval_to_cval(c, nm::FLOAT32, pC);
330
+ rubyval_to_cval(s, nm::FLOAT32, pS);
331
+ } else if (dtype == nm::COMPLEX128) {
332
+ pC = NM_ALLOCA_N(double,1);
333
+ pS = NM_ALLOCA_N(double,1);
334
+ rubyval_to_cval(c, nm::FLOAT64, pC);
335
+ rubyval_to_cval(s, nm::FLOAT64, pS);
336
+ } else {
337
+ pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
338
+ pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
339
+ rubyval_to_cval(c, dtype, pC);
340
+ rubyval_to_cval(s, dtype, pS);
341
+ }
342
+
343
+
344
+ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), NM_STORAGE_DENSE(y)->elements, FIX2INT(incy), pC, pS);
345
+
346
+ return Qtrue;
347
+ }
348
+ }
349
+
350
+
351
+ /*
352
+ * Call any of the cblas_xnrm2 functions as directly as possible.
353
+ *
354
+ * xNRM2 is a BLAS level 1 routine which calculates the 2-norm of an n-vector x.
355
+ *
356
+ * Arguments:
357
+ * * n :: length of x, must be at least 0
358
+ * * x :: pointer to first entry of input vector
359
+ * * incx :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
360
+ *
361
+ * You probably don't want to call this function. Instead, why don't you try nrm2, which is more flexible
362
+ * with its arguments?
363
+ *
364
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
365
+ * handling, so you can easily crash Ruby!
366
+ */
367
+ static VALUE nm_atlas_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx) {
368
+
369
+ static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
370
+ NULL, NULL, NULL, NULL, NULL, // no help for integers
371
+ nm::math::atlas::cblas_nrm2<float32_t,float32_t>,
372
+ nm::math::atlas::cblas_nrm2<float64_t,float64_t>,
373
+ nm::math::atlas::cblas_nrm2<float32_t,nm::Complex64>,
374
+ nm::math::atlas::cblas_nrm2<float64_t,nm::Complex128>,
375
+ nm::math::atlas::cblas_nrm2<nm::RubyObject,nm::RubyObject>
376
+ };
377
+
378
+ nm::dtype_t dtype = NM_DTYPE(x);
379
+
380
+ if (!ttable[dtype]) {
381
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
382
+ return Qnil;
383
+
384
+ } else {
385
+ // Determine the return dtype and allocate it
386
+ nm::dtype_t rdtype = dtype;
387
+ if (dtype == nm::COMPLEX64) rdtype = nm::FLOAT32;
388
+ else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;
389
+
390
+ void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);
391
+
392
+ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
393
+
394
+ return rubyobj_from_cval(Result, rdtype).rval;
395
+ }
396
+ }
397
+
398
+
399
+
400
+ /*
401
+ * Call any of the cblas_xasum functions as directly as possible.
402
+ *
403
+ * xASUM is a BLAS level 1 routine which calculates the sum of absolute values of the entries
404
+ * of a vector x.
405
+ *
406
+ * Arguments:
407
+ * * n :: length of x, must be at least 0
408
+ * * x :: pointer to first entry of input vector
409
+ * * incx :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
410
+ *
411
+ * You probably don't want to call this function. Instead, why don't you try asum, which is more flexible
412
+ * with its arguments?
413
+ *
414
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
415
+ * handling, so you can easily crash Ruby!
416
+ */
417
+ static VALUE nm_atlas_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx) {
418
+
419
+ static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
420
+ nm::math::atlas::cblas_asum<uint8_t,uint8_t>,
421
+ nm::math::atlas::cblas_asum<int8_t,int8_t>,
422
+ nm::math::atlas::cblas_asum<int16_t,int16_t>,
423
+ nm::math::atlas::cblas_asum<int32_t,int32_t>,
424
+ nm::math::atlas::cblas_asum<int64_t,int64_t>,
425
+ nm::math::atlas::cblas_asum<float32_t,float32_t>,
426
+ nm::math::atlas::cblas_asum<float64_t,float64_t>,
427
+ nm::math::atlas::cblas_asum<float32_t,nm::Complex64>,
428
+ nm::math::atlas::cblas_asum<float64_t,nm::Complex128>,
429
+ nm::math::atlas::cblas_asum<nm::RubyObject,nm::RubyObject>
430
+ };
431
+
432
+ nm::dtype_t dtype = NM_DTYPE(x);
433
+
434
+ // Determine the return dtype and allocate it
435
+ nm::dtype_t rdtype = dtype;
436
+ if (dtype == nm::COMPLEX64) rdtype = nm::FLOAT32;
437
+ else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;
438
+
439
+ void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);
440
+
441
+ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
442
+
443
+ return rubyobj_from_cval(Result, rdtype).rval;
444
+ }
445
+
446
+ /*
447
+ * call-seq:
448
+ * NMatrix::BLAS.cblas_imax(n, vector, inc) -> Fixnum
449
+ *
450
+ * BLAS level 1 routine.
451
+ *
452
+ * Return the index of the largest element of +vector+.
453
+ *
454
+ * - +n+ -> Vector's size. Generally, you can use NMatrix#rows or NMatrix#cols.
455
+ * - +vector+ -> A NMatrix of shape [n,1] or [1,n] with any dtype.
456
+ * - +inc+ -> It's the increment used when searching. Use 1 except if you know
457
+ * what you're doing.
458
+ */
459
+ static VALUE nm_atlas_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx) {
460
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::atlas::cblas_imax, int, const int n, const void* x, const int incx);
461
+
462
+ nm::dtype_t dtype = NM_DTYPE(x);
463
+
464
+ int index = ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx));
465
+
466
+ // Convert to Ruby's Int value.
467
+ return INT2FIX(index);
468
+ }
469
+
470
+ /* Call any of the cblas_xgemv functions as directly as possible.
471
+ *
472
+ * The cblas_xgemv functions (dgemv, sgemv, cgemv, and zgemv) define the following operation:
473
+ *
474
+ * y = alpha*op(A)*x + beta*y
475
+ *
476
+ * where op(A) is one of <tt>op(A) = A</tt>, <tt>op(A) = A**T</tt>, or the complex conjugate of A.
477
+ *
478
+ * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
479
+ * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
480
+ * expose the ultra-optimized ATLAS versions.
481
+ *
482
+ * == Arguments
483
+ * See: http://www.netlib.org/blas/dgemm.f
484
+ *
485
+ * You probably don't want to call this function. Instead, why don't you try cblas_gemv, which is more flexible
486
+ * with its arguments?
487
+ *
488
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
489
+ * handling, so you can easily crash Ruby!
490
+ */
491
+ static VALUE nm_atlas_cblas_gemv(VALUE self,
492
+ VALUE trans_a,
493
+ VALUE m, VALUE n,
494
+ VALUE alpha,
495
+ VALUE a, VALUE lda,
496
+ VALUE x, VALUE incx,
497
+ VALUE beta,
498
+ VALUE y, VALUE incy)
499
+ {
500
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::atlas::cblas_gemv, bool, const enum CBLAS_TRANSPOSE, const int, const int, const void*, const void*, const int, const void*, const int, const void*, void*, const int)
501
+
502
+ nm::dtype_t dtype = NM_DTYPE(a);
503
+
504
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
505
+ *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
506
+ rubyval_to_cval(alpha, dtype, pAlpha);
507
+ rubyval_to_cval(beta, dtype, pBeta);
508
+
509
+ return ttable[dtype](blas_transpose_sym(trans_a), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), pBeta, NM_STORAGE_DENSE(y)->elements, FIX2INT(incy)) ? Qtrue : Qfalse;
510
+ }
511
+
512
+ /* Call any of the cblas_xgemm functions as directly as possible.
513
+ *
514
+ * The cblas_xgemm functions (dgemm, sgemm, cgemm, and zgemm) define the following operation:
515
+ *
516
+ * C = alpha*op(A)*op(B) + beta*C
517
+ *
518
+ * where op(X) is one of <tt>op(X) = X</tt>, <tt>op(X) = X**T</tt>, or the complex conjugate of X.
519
+ *
520
+ * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
521
+ * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
522
+ * expose the ultra-optimized ATLAS versions.
523
+ *
524
+ * == Arguments
525
+ * See: http://www.netlib.org/blas/dgemm.f
526
+ *
527
+ * You probably don't want to call this function. Instead, why don't you try gemm, which is more flexible
528
+ * with its arguments?
529
+ *
530
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
531
+ * handling, so you can easily crash Ruby!
532
+ */
533
+ static VALUE nm_atlas_cblas_gemm(VALUE self,
534
+ VALUE order,
535
+ VALUE trans_a, VALUE trans_b,
536
+ VALUE m, VALUE n, VALUE k,
537
+ VALUE alpha,
538
+ VALUE a, VALUE lda,
539
+ VALUE b, VALUE ldb,
540
+ VALUE beta,
541
+ VALUE c, VALUE ldc)
542
+ {
543
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::atlas::cblas_gemm, void, const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, int m, int n, int k, void* alpha, void* a, int lda, void* b, int ldb, void* beta, void* c, int ldc);
544
+
545
+ nm::dtype_t dtype = NM_DTYPE(a);
546
+
547
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
548
+ *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
549
+ rubyval_to_cval(alpha, dtype, pAlpha);
550
+ rubyval_to_cval(beta, dtype, pBeta);
551
+
552
+ ttable[dtype](blas_order_sym(order), blas_transpose_sym(trans_a), blas_transpose_sym(trans_b), FIX2INT(m), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
553
+
554
+ return c;
555
+ }
556
+
557
+
558
+ static VALUE nm_atlas_cblas_trsm(VALUE self,
559
+ VALUE order,
560
+ VALUE side, VALUE uplo,
561
+ VALUE trans_a, VALUE diag,
562
+ VALUE m, VALUE n,
563
+ VALUE alpha,
564
+ VALUE a, VALUE lda,
565
+ VALUE b, VALUE ldb)
566
+ {
567
+ static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_SIDE, const enum CBLAS_UPLO,
568
+ const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
569
+ const int m, const int n, const void* alpha, const void* a,
570
+ const int lda, void* b, const int ldb) = {
571
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
572
+ nm::math::atlas::cblas_trsm<float>,
573
+ nm::math::atlas::cblas_trsm<double>,
574
+ cblas_ctrsm, cblas_ztrsm, // call directly, same function signature!
575
+ nm::math::atlas::cblas_trsm<nm::RubyObject>
576
+ };
577
+
578
+ nm::dtype_t dtype = NM_DTYPE(a);
579
+
580
+ if (!ttable[dtype]) {
581
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
582
+ } else {
583
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
584
+ rubyval_to_cval(alpha, dtype, pAlpha);
585
+
586
+ ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
587
+ }
588
+
589
+ return Qtrue;
590
+ }
591
+
592
+ static VALUE nm_atlas_cblas_trmm(VALUE self,
593
+ VALUE order,
594
+ VALUE side, VALUE uplo,
595
+ VALUE trans_a, VALUE diag,
596
+ VALUE m, VALUE n,
597
+ VALUE alpha,
598
+ VALUE a, VALUE lda,
599
+ VALUE b, VALUE ldb)
600
+ {
601
+ static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER,
602
+ const enum CBLAS_SIDE, const enum CBLAS_UPLO,
603
+ const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
604
+ const int m, const int n, const void* alpha, const void* a,
605
+ const int lda, void* b, const int ldb) = {
606
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
607
+ nm::math::atlas::cblas_trmm<float>,
608
+ nm::math::atlas::cblas_trmm<double>,
609
+ cblas_ctrmm, cblas_ztrmm, // call directly, same function signature!
610
+ NULL
611
+ };
612
+
613
+ nm::dtype_t dtype = NM_DTYPE(a);
614
+
615
+ if (!ttable[dtype]) {
616
+ rb_raise(nm_eDataTypeError, "this matrix operation not yet defined for non-BLAS dtypes");
617
+ } else {
618
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
619
+ rubyval_to_cval(alpha, dtype, pAlpha);
620
+
621
+ ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
622
+ }
623
+
624
+ return b;
625
+ }
626
+
627
+ static VALUE nm_atlas_cblas_syrk(VALUE self,
628
+ VALUE order,
629
+ VALUE uplo,
630
+ VALUE trans,
631
+ VALUE n, VALUE k,
632
+ VALUE alpha,
633
+ VALUE a, VALUE lda,
634
+ VALUE beta,
635
+ VALUE c, VALUE ldc)
636
+ {
637
+ static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE,
638
+ const int n, const int k, const void* alpha, const void* a,
639
+ const int lda, const void* beta, void* c, const int ldc) = {
640
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
641
+ nm::math::atlas::cblas_syrk<float>,
642
+ nm::math::atlas::cblas_syrk<double>,
643
+ cblas_csyrk, cblas_zsyrk, // call directly, same function signature!
644
+ NULL
645
+ };
646
+
647
+ nm::dtype_t dtype = NM_DTYPE(a);
648
+
649
+ if (!ttable[dtype]) {
650
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
651
+ } else {
652
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
653
+ *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
654
+ rubyval_to_cval(alpha, dtype, pAlpha);
655
+ rubyval_to_cval(beta, dtype, pBeta);
656
+
657
+ ttable[dtype](blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
658
+ }
659
+
660
+ return Qtrue;
661
+ }
662
+
663
+ static VALUE nm_atlas_cblas_herk(VALUE self,
664
+ VALUE order,
665
+ VALUE uplo,
666
+ VALUE trans,
667
+ VALUE n, VALUE k,
668
+ VALUE alpha,
669
+ VALUE a, VALUE lda,
670
+ VALUE beta,
671
+ VALUE c, VALUE ldc)
672
+ {
673
+
674
+ nm::dtype_t dtype = NM_DTYPE(a);
675
+
676
+ if (dtype == nm::COMPLEX64) {
677
+ cblas_cherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
678
+ } else if (dtype == nm::COMPLEX128) {
679
+ cblas_zherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
680
+ } else
681
+ rb_raise(rb_eNotImpError, "this matrix operation undefined for non-complex dtypes");
682
+ return Qtrue;
683
+ }
684
+
685
+ /*
686
+ * Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
687
+ *
688
+ * xGESVD computes the singular value decomposition (SVD) of a real
689
+ * M-by-N matrix A, optionally computing the left and/or right singular
690
+ * vectors. The SVD is written
691
+ *
692
+ * A = U * SIGMA * transpose(V)
693
+ *
694
+ * where SIGMA is an M-by-N matrix which is zero except for its
695
+ * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
696
+ * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
697
+ * are the singular values of A; they are real and non-negative, and
698
+ * are returned in descending order. The first min(m,n) columns of
699
+ * U and V are the left and right singular vectors of A.
700
+ *
701
+ * Note that the routine returns V**T, not V.
702
+ */
703
+ static VALUE nm_atlas_lapack_gesvd(VALUE self, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lwork) {
704
+ static int (*gesvd_table[nm::NUM_DTYPES])(char, char, int, int, void* a, int, void* s, void* u, int, void* vt, int, void* work, int, void* rwork) = {
705
+ NULL, NULL, NULL, NULL, NULL, // no integer ops
706
+ nm::math::atlas::lapack_gesvd<float,float>,
707
+ nm::math::atlas::lapack_gesvd<double,double>,
708
+ nm::math::atlas::lapack_gesvd<nm::Complex64,float>,
709
+ nm::math::atlas::lapack_gesvd<nm::Complex128,double>,
710
+ NULL // no Ruby objects
711
+ };
712
+
713
+ nm::dtype_t dtype = NM_DTYPE(a);
714
+
715
+
716
+ if (!gesvd_table[dtype]) {
717
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
718
+ return Qfalse;
719
+ } else {
720
+ int M = FIX2INT(m),
721
+ N = FIX2INT(n);
722
+
723
+ int min_mn = NM_MIN(M,N);
724
+ int max_mn = NM_MAX(M,N);
725
+
726
+ char JOBU = lapack_svd_job_sym(jobu),
727
+ JOBVT = lapack_svd_job_sym(jobvt);
728
+
729
+ // only need rwork for complex matrices
730
+ int rwork_size = (dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128) ? 5 * min_mn : 0;
731
+ void* rwork = rwork_size > 0 ? NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size) : NULL;
732
+ int work_size = FIX2INT(lwork);
733
+
734
+ // ignore user argument for lwork if it's too small.
735
+ work_size = NM_MAX((dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128 ? 2 * min_mn + max_mn : NM_MAX(3*min_mn + max_mn, 5*min_mn)), work_size);
736
+ void* work = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);
737
+
738
+ int info = gesvd_table[dtype](JOBU, JOBVT, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
739
+ NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
740
+ work, work_size, rwork);
741
+ return INT2FIX(info);
742
+ }
743
+ }
744
+
745
+ /*
746
+ * Function signature conversion for calling CBLAS' gesdd functions as directly as possible.
747
+ *
748
+ * xGESDD uses a divide-and-conquer strategy to compute the singular value decomposition (SVD) of a real
749
+ * M-by-N matrix A, optionally computing the left and/or right singular
750
+ * vectors. The SVD is written
751
+ *
752
+ * A = U * SIGMA * transpose(V)
753
+ *
754
+ * where SIGMA is an M-by-N matrix which is zero except for its
755
+ * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
756
+ * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
757
+ * are the singular values of A; they are real and non-negative, and
758
+ * are returned in descending order. The first min(m,n) columns of
759
+ * U and V are the left and right singular vectors of A.
760
+ *
761
+ * Note that the routine returns V**T, not V.
762
+ */
763
+ static VALUE nm_atlas_lapack_gesdd(VALUE self, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lwork) {
764
+ static int (*gesdd_table[nm::NUM_DTYPES])(char, int, int, void* a, int, void* s, void* u, int, void* vt, int, void* work, int, int* iwork, void* rwork) = {
765
+ NULL, NULL, NULL, NULL, NULL, // no integer ops
766
+ nm::math::atlas::lapack_gesdd<float,float>,
767
+ nm::math::atlas::lapack_gesdd<double,double>,
768
+ nm::math::atlas::lapack_gesdd<nm::Complex64,float>,
769
+ nm::math::atlas::lapack_gesdd<nm::Complex128,double>,
770
+ NULL // no Ruby objects
771
+ };
772
+
773
+ nm::dtype_t dtype = NM_DTYPE(a);
774
+
775
+ if (!gesdd_table[dtype]) {
776
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
777
+ return Qfalse;
778
+ } else {
779
+ int M = FIX2INT(m),
780
+ N = FIX2INT(n);
781
+
782
+ int min_mn = NM_MIN(M,N);
783
+ int max_mn = NM_MAX(M,N);
784
+
785
+ char JOBZ = lapack_svd_job_sym(jobz);
786
+
787
+ // only need rwork for complex matrices
788
+ void* rwork = NULL;
789
+
790
+ int work_size = FIX2INT(lwork); // Make sure we allocate enough work, regardless of the user request.
791
+ if (dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128) {
792
+ int rwork_size = min_mn * (JOBZ == 'N' ? 5 : NM_MAX(5*min_mn + 7, 2*max_mn + 2*min_mn + 1));
793
+ rwork = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size);
794
+
795
+ if (JOBZ == 'N') work_size = NM_MAX(work_size, 3*min_mn + NM_MAX(max_mn, 6*min_mn));
796
+ else if (JOBZ == 'O') work_size = NM_MAX(work_size, 3*min_mn*min_mn + NM_MAX(max_mn, 5*min_mn*min_mn + 4*min_mn));
797
+ else work_size = NM_MAX(work_size, 3*min_mn*min_mn + NM_MAX(max_mn, 4*min_mn*min_mn + 4*min_mn));
798
+ } else {
799
+ if (JOBZ == 'N') work_size = NM_MAX(work_size, 2*min_mn + max_mn);
800
+ else if (JOBZ == 'O') work_size = NM_MAX(work_size, 2*min_mn*min_mn + max_mn + 2*min_mn);
801
+ else work_size = NM_MAX(work_size, min_mn*min_mn + max_mn + 2*min_mn);
802
+ }
803
+ void* work = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);
804
+ int* iwork = NM_ALLOCA_N(int, 8*min_mn);
805
+
806
+ int info = gesdd_table[dtype](JOBZ, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
807
+ NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
808
+ work, work_size, iwork, rwork);
809
+ return INT2FIX(info);
810
+ }
811
+ }
812
+
813
+ /*
814
+ * Function signature conversion for calling CBLAS' geev functions as directly as possible.
815
+ *
816
+ * GEEV computes for an N-by-N real nonsymmetric matrix A, the
817
+ * eigenvalues and, optionally, the left and/or right eigenvectors.
818
+ *
819
+ * The right eigenvector v(j) of A satisfies
820
+ * A * v(j) = lambda(j) * v(j)
821
+ * where lambda(j) is its eigenvalue.
822
+ *
823
+ * The left eigenvector u(j) of A satisfies
824
+ * u(j)**H * A = lambda(j) * u(j)**H
825
+ * where u(j)**H denotes the conjugate transpose of u(j).
826
+ *
827
+ * The computed eigenvectors are normalized to have Euclidean norm
828
+ * equal to 1 and largest component real.
829
+ */
830
+ static VALUE nm_atlas_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr, VALUE lwork) {
831
+ static int (*geev_table[nm::NUM_DTYPES])(char, char, int, void* a, int, void* w, void* wi, void* vl, int, void* vr, int, void* work, int, void* rwork) = {
832
+ NULL, NULL, NULL, NULL, NULL, // no integer ops
833
+ nm::math::atlas::lapack_geev<float,float>,
834
+ nm::math::atlas::lapack_geev<double,double>,
835
+ nm::math::atlas::lapack_geev<nm::Complex64,float>,
836
+ nm::math::atlas::lapack_geev<nm::Complex128,double>,
837
+ NULL // no Ruby objects
838
+ };
839
+
840
+ nm::dtype_t dtype = NM_DTYPE(a);
841
+
842
+
843
+ if (!geev_table[dtype]) {
844
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
845
+ return Qfalse;
846
+ } else {
847
+ int N = FIX2INT(n);
848
+
849
+ char JOBVL = lapack_evd_job_sym(compute_left),
850
+ JOBVR = lapack_evd_job_sym(compute_right);
851
+
852
+ void* A = NM_STORAGE_DENSE(a)->elements;
853
+ void* WR = NM_STORAGE_DENSE(w)->elements;
854
+ void* WI = wi == Qnil ? NULL : NM_STORAGE_DENSE(wi)->elements;
855
+ void* VL = JOBVL == 'V' ? NM_STORAGE_DENSE(vl)->elements : NULL;
856
+ void* VR = JOBVR == 'V' ? NM_STORAGE_DENSE(vr)->elements : NULL;
857
+
858
+ // only need rwork for complex matrices (wi == Qnil for complex)
859
+ int rwork_size = dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128 ? N * DTYPE_SIZES[dtype] : 0; // 2*N*floattype for complex only, otherwise 0
860
+ void* rwork = rwork_size > 0 ? NM_ALLOCA_N(char, rwork_size) : NULL;
861
+ int work_size = FIX2INT(lwork);
862
+ void* work;
863
+
864
+ int info;
865
+
866
+ // if work size is 0 or -1, query.
867
+ if (work_size <= 0) {
868
+ work_size = -1;
869
+ work = NM_ALLOC_N(char, DTYPE_SIZES[dtype]); //2*N * DTYPE_SIZES[dtype]);
870
+ info = geev_table[dtype](JOBVL, JOBVR, N, A, FIX2INT(lda), WR, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr), work, work_size, rwork);
871
+ work_size = (int)(dtype == nm::COMPLEX64 || dtype == nm::FLOAT32 ? reinterpret_cast<float*>(work)[0] : reinterpret_cast<double*>(work)[0]);
872
+ // line above is basically: work_size = (int)(work[0]); // now have new work_size
873
+ NM_FREE(work);
874
+ if (info == 0)
875
+ rb_warn("geev: calculated optimal lwork of %d; to eliminate this message, use a positive value for lwork (at least 2*shape[i])", work_size);
876
+ else return INT2FIX(info); // error of some kind on query!
877
+ }
878
+
879
+ // if work size is < 2*N, just set it to 2*N
880
+ if (work_size < 2*N) work_size = 2*N;
881
+ if (work_size < 3*N && (dtype == nm::FLOAT32 || dtype == nm::FLOAT64)) {
882
+ work_size = JOBVL == 'V' || JOBVR == 'V' ? 4*N : 3*N;
883
+ }
884
+
885
+ // Allocate work array for actual run
886
+ work = NM_ALLOCA_N(char, work_size * DTYPE_SIZES[dtype]);
887
+
888
+ // Perform the actual calculation.
889
+ info = geev_table[dtype](JOBVL, JOBVR, N, A, FIX2INT(lda), WR, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr), work, work_size, rwork);
890
+
891
+ return INT2FIX(info);
892
+ }
893
+ }
894
+
895
+ /* Call any of the clapack_xgetrf functions as directly as possible.
896
+ *
897
+ * The clapack_getrf functions (dgetrf, sgetrf, cgetrf, and zgetrf) compute an LU factorization of a general M-by-N
898
+ * matrix A using partial pivoting with row interchanges.
899
+ *
900
+ * The factorization has the form:
901
+ * A = P * L * U
902
+ * where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n),
903
+ * and U is upper triangular (upper trapezoidal if m < n).
904
+ *
905
+ * This is the right-looking level 3 BLAS version of the algorithm.
906
+ *
907
+ * == Arguments
908
+ * See: http://www.netlib.org/lapack/double/dgetrf.f
909
+ * (You don't need argument 5; this is the value returned by this function.)
910
+ *
911
+ * You probably don't want to call this function. Instead, why don't you try clapack_getrf, which is more flexible
912
+ * with its arguments?
913
+ *
914
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
915
+ * handling, so you can easily crash Ruby!
916
+ *
917
+ * Returns an array giving the pivot indices (normally these are argument #5).
918
+ */
919
+ static VALUE nm_atlas_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda) {
920
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int m, const int n, void* a, const int lda, int* ipiv) = {
921
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
922
+ nm::math::atlas::clapack_getrf<float>,
923
+ nm::math::atlas::clapack_getrf<double>,
924
+ #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
925
+ clapack_cgetrf, clapack_zgetrf, // call directly, same function signature!
926
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
927
+ nm::math::atlas::clapack_getrf<nm::Complex64>,
928
+ nm::math::atlas::clapack_getrf<nm::Complex128>,
929
+ #endif
930
+ nm::math::atlas::clapack_getrf<nm::RubyObject>
931
+ };
932
+
933
+ int M = FIX2INT(m),
934
+ N = FIX2INT(n);
935
+
936
+ // Allocate the pivot index array, which is of size MIN(M, N).
937
+ size_t ipiv_size = std::min(M,N);
938
+ int* ipiv = NM_ALLOCA_N(int, ipiv_size);
939
+
940
+ if (!ttable[NM_DTYPE(a)]) {
941
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
942
+ } else {
943
+ // Call either our version of getrf or the LAPACK version.
944
+ ttable[NM_DTYPE(a)](blas_order_sym(order), M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv);
945
+ }
946
+
947
+ // Result will be stored in a. We return ipiv as an array.
948
+ VALUE ipiv_array = rb_ary_new2(ipiv_size);
949
+ for (size_t i = 0; i < ipiv_size; ++i) {
950
+ rb_ary_store(ipiv_array, i, INT2FIX(ipiv[i]));
951
+ }
952
+
953
+ return ipiv_array;
954
+ }
955
+
956
+
957
+ /* Call any of the clapack_xpotrf functions as directly as possible.
958
+ *
959
+ * You probably don't want to call this function. Instead, why don't you try clapack_potrf, which is more flexible
960
+ * with its arguments?
961
+ *
962
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
963
+ * handling, so you can easily crash Ruby!
964
+ */
965
+ static VALUE nm_atlas_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
966
+ #if !defined(HAVE_CLAPACK_H) && !defined(HAVE_ATLAS_CLAPACK_H)
967
+ rb_raise(rb_eNotImpError, "potrf currently requires CLAPACK");
968
+ #endif
969
+
970
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
971
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
972
+ nm::math::atlas::clapack_potrf<float>,
973
+ nm::math::atlas::clapack_potrf<double>,
974
+ #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
975
+ clapack_cpotrf, clapack_zpotrf, // call directly, same function signature!
976
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
977
+ nm::math::atlas::clapack_potrf<nm::Complex64>,
978
+ nm::math::atlas::clapack_potrf<nm::Complex128>,
979
+ #endif
980
+ NULL
981
+ };
982
+
983
+ if (!ttable[NM_DTYPE(a)]) {
984
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
985
+ // FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
986
+ //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
987
+ } else {
988
+ // Call either our version of potrf or the LAPACK version.
989
+ ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
990
+ }
991
+
992
+ return a;
993
+ }
994
+
995
+
996
+ /*
997
+ * Call any of the clapack_xgetrs functions as directly as possible.
998
+ */
999
+ static VALUE nm_atlas_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb) {
1000
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE Trans, const int N,
1001
+ const int NRHS, const void* A, const int lda, const int* ipiv, void* B,
1002
+ const int ldb) = {
1003
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1004
+ nm::math::atlas::clapack_getrs<float>,
1005
+ nm::math::atlas::clapack_getrs<double>,
1006
+ #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1007
+ clapack_cgetrs, clapack_zgetrs, // call directly, same function signature!
1008
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1009
+ nm::math::atlas::clapack_getrs<nm::Complex64>,
1010
+ nm::math::atlas::clapack_getrs<nm::Complex128>,
1011
+ #endif
1012
+ nm::math::atlas::clapack_getrs<nm::RubyObject>
1013
+ };
1014
+
1015
+ // Allocate the C version of the pivot index array
1016
+ int* ipiv_;
1017
+ if (TYPE(ipiv) != T_ARRAY) {
1018
+ rb_raise(rb_eArgError, "ipiv must be of type Array");
1019
+ } else {
1020
+ ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
1021
+ for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
1022
+ ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
1023
+ }
1024
+ }
1025
+
1026
+ if (!ttable[NM_DTYPE(a)]) {
1027
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1028
+ } else {
1029
+
1030
+ // Call either our version of getrs or the LAPACK version.
1031
+ ttable[NM_DTYPE(a)](blas_order_sym(order), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
1032
+ ipiv_, NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
1033
+ }
1034
+
1035
+ // b is both returned and modified directly in the argument list.
1036
+ return b;
1037
+ }
1038
+
1039
+
1040
+ /*
1041
+ * Call any of the clapack_xpotrs functions as directly as possible.
1042
+ */
1043
+ static VALUE nm_atlas_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb) {
1044
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
1045
+ const int NRHS, const void* A, const int lda, void* B, const int ldb) = {
1046
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1047
+ nm::math::atlas::clapack_potrs<float>,
1048
+ nm::math::atlas::clapack_potrs<double>,
1049
+ #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1050
+ clapack_cpotrs, clapack_zpotrs, // call directly, same function signature!
1051
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1052
+ nm::math::atlas::clapack_potrs<nm::Complex64>,
1053
+ nm::math::atlas::clapack_potrs<nm::Complex128>,
1054
+ #endif
1055
+ nm::math::atlas::clapack_potrs<nm::RubyObject>
1056
+ };
1057
+
1058
+
1059
+ if (!ttable[NM_DTYPE(a)]) {
1060
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1061
+ } else {
1062
+
1063
+ // Call either our version of potrs or the LAPACK version.
1064
+ ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
1065
+ NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
1066
+ }
1067
+
1068
+ // b is both returned and modified directly in the argument list.
1069
+ return b;
1070
+ }
1071
+
1072
+ /* Call any of the clapack_xgetri functions as directly as possible.
1073
+ *
1074
+ * You probably don't want to call this function. Instead, why don't you try clapack_getri, which is more flexible
1075
+ * with its arguments?
1076
+ *
1077
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1078
+ * handling, so you can easily crash Ruby!
1079
+ *
1080
+ * Returns an array giving the pivot indices (normally these are argument #5).
1081
+ */
1082
+ static VALUE nm_atlas_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv) {
1083
+ #if !defined (HAVE_CLAPACK_H) && !defined (HAVE_ATLAS_CLAPACK_H)
1084
+ rb_raise(rb_eNotImpError, "getri currently requires CLAPACK");
1085
+ #endif
1086
+
1087
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int n, void* a, const int lda, const int* ipiv) = {
1088
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1089
+ nm::math::atlas::clapack_getri<float>,
1090
+ nm::math::atlas::clapack_getri<double>,
1091
+ #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1092
+ clapack_cgetri, clapack_zgetri, // call directly, same function signature!
1093
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1094
+ nm::math::atlas::clapack_getri<nm::Complex64>,
1095
+ nm::math::atlas::clapack_getri<nm::Complex128>,
1096
+ #endif
1097
+ NULL
1098
+ };
1099
+
1100
+ // Allocate the C version of the pivot index array
1101
+ int* ipiv_;
1102
+ if (TYPE(ipiv) != T_ARRAY) {
1103
+ rb_raise(rb_eArgError, "ipiv must be of type Array");
1104
+ } else {
1105
+ ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
1106
+ for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
1107
+ ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
1108
+ }
1109
+ }
1110
+
1111
+ if (!ttable[NM_DTYPE(a)]) {
1112
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1113
+ // FIXME: Once non-BLAS dtypes are implemented, replace error above with the error below.
1114
+ //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1115
+ } else {
1116
+ // Call either our version of getri or the LAPACK version.
1117
+ ttable[NM_DTYPE(a)](blas_order_sym(order), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv_);
1118
+ }
1119
+
1120
+ return a;
1121
+ }
1122
+
1123
+
1124
+ /* Call any of the clapack_xpotri functions as directly as possible.
1125
+ *
1126
+ * You probably don't want to call this function. Instead, why don't you try clapack_potri, which is more flexible
1127
+ * with its arguments?
1128
+ *
1129
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1130
+ * handling, so you can easily crash Ruby!
1131
+ */
1132
+ static VALUE nm_atlas_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
1133
+ #if !defined (HAVE_CLAPACK_H) && !defined (HAVE_ATLAS_CLAPACK_H)
1134
+ rb_raise(rb_eNotImpError, "getri currently requires CLAPACK");
1135
+ #endif
1136
+
1137
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
1138
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1139
+ nm::math::atlas::clapack_potri<float>,
1140
+ nm::math::atlas::clapack_potri<double>,
1141
+ #if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
1142
+ clapack_cpotri, clapack_zpotri, // call directly, same function signature!
1143
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
1144
+ nm::math::atlas::clapack_potri<nm::Complex64>,
1145
+ nm::math::atlas::clapack_potri<nm::Complex128>,
1146
+ #endif
1147
+ NULL
1148
+ };
1149
+
1150
+ if (!ttable[NM_DTYPE(a)]) {
1151
+ rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
1152
+ // FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
1153
+ //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1154
+ } else {
1155
+ // Call either our version of getri or the LAPACK version.
1156
+ ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
1157
+ }
1158
+
1159
+ return a;
1160
+ }
1161
+
1162
+
1163
+ /*
1164
+ * Call any of the clapack_xlaswp functions as directly as possible.
1165
+ *
1166
+ * Note that LAPACK's xlaswp functions accept a column-order matrix, but NMatrix uses row-order. Thus, n should be the
1167
+ * number of rows and lda should be the number of columns, no matter what it says in the documentation for dlaswp.f.
1168
+ */
1169
+ static VALUE nm_atlas_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx) {
1170
+ //We have actually never used the ATLAS version of laswp. For the time being
1171
+ //I will leave it like that and just always call the internal implementation.
1172
+ //I don't know if there is a good reason for this or not.
1173
+ //Maybe because our internal version swaps columns instead of rows.
1174
+ static void (*ttable[nm::NUM_DTYPES])(const int n, void* a, const int lda, const int k1, const int k2, const int* ipiv, const int incx) = {
1175
+ nm::math::clapack_laswp<uint8_t>,
1176
+ nm::math::clapack_laswp<int8_t>,
1177
+ nm::math::clapack_laswp<int16_t>,
1178
+ nm::math::clapack_laswp<int32_t>,
1179
+ nm::math::clapack_laswp<int64_t>,
1180
+ nm::math::clapack_laswp<float>,
1181
+ nm::math::clapack_laswp<double>,
1182
+ nm::math::clapack_laswp<nm::Complex64>,
1183
+ nm::math::clapack_laswp<nm::Complex128>,
1184
+ nm::math::clapack_laswp<nm::RubyObject>
1185
+ };
1186
+
1187
+ // Allocate the C version of the pivot index array
1188
+ int* ipiv_;
1189
+ if (TYPE(ipiv) != T_ARRAY) {
1190
+ rb_raise(rb_eArgError, "ipiv must be of type Array");
1191
+ } else {
1192
+ ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
1193
+ for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
1194
+ ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
1195
+ }
1196
+ }
1197
+
1198
+ // Call either our version of laswp or the LAPACK version.
1199
+ ttable[NM_DTYPE(a)](FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), FIX2INT(k1), FIX2INT(k2), ipiv_, FIX2INT(incx));
1200
+
1201
+ // a is both returned and modified directly in the argument list.
1202
+ return a;
1203
+ }
1204
+
1205
+
1206
+ }