pnmatrix 1.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (111) hide show
  1. checksums.yaml +7 -0
  2. data/ext/nmatrix/binary_format.txt +53 -0
  3. data/ext/nmatrix/data/complex.h +388 -0
  4. data/ext/nmatrix/data/data.cpp +274 -0
  5. data/ext/nmatrix/data/data.h +651 -0
  6. data/ext/nmatrix/data/meta.h +64 -0
  7. data/ext/nmatrix/data/ruby_object.h +386 -0
  8. data/ext/nmatrix/extconf.rb +70 -0
  9. data/ext/nmatrix/math/asum.h +99 -0
  10. data/ext/nmatrix/math/cblas_enums.h +36 -0
  11. data/ext/nmatrix/math/cblas_templates_core.h +507 -0
  12. data/ext/nmatrix/math/gemm.h +241 -0
  13. data/ext/nmatrix/math/gemv.h +178 -0
  14. data/ext/nmatrix/math/getrf.h +255 -0
  15. data/ext/nmatrix/math/getrs.h +121 -0
  16. data/ext/nmatrix/math/imax.h +82 -0
  17. data/ext/nmatrix/math/laswp.h +165 -0
  18. data/ext/nmatrix/math/long_dtype.h +62 -0
  19. data/ext/nmatrix/math/magnitude.h +54 -0
  20. data/ext/nmatrix/math/math.h +751 -0
  21. data/ext/nmatrix/math/nrm2.h +165 -0
  22. data/ext/nmatrix/math/rot.h +117 -0
  23. data/ext/nmatrix/math/rotg.h +106 -0
  24. data/ext/nmatrix/math/scal.h +71 -0
  25. data/ext/nmatrix/math/trsm.h +336 -0
  26. data/ext/nmatrix/math/util.h +162 -0
  27. data/ext/nmatrix/math.cpp +1368 -0
  28. data/ext/nmatrix/nm_memory.h +60 -0
  29. data/ext/nmatrix/nmatrix.cpp +285 -0
  30. data/ext/nmatrix/nmatrix.h +476 -0
  31. data/ext/nmatrix/ruby_constants.cpp +151 -0
  32. data/ext/nmatrix/ruby_constants.h +106 -0
  33. data/ext/nmatrix/ruby_nmatrix.c +3130 -0
  34. data/ext/nmatrix/storage/common.cpp +77 -0
  35. data/ext/nmatrix/storage/common.h +183 -0
  36. data/ext/nmatrix/storage/dense/dense.cpp +1096 -0
  37. data/ext/nmatrix/storage/dense/dense.h +129 -0
  38. data/ext/nmatrix/storage/list/list.cpp +1628 -0
  39. data/ext/nmatrix/storage/list/list.h +138 -0
  40. data/ext/nmatrix/storage/storage.cpp +730 -0
  41. data/ext/nmatrix/storage/storage.h +99 -0
  42. data/ext/nmatrix/storage/yale/class.h +1139 -0
  43. data/ext/nmatrix/storage/yale/iterators/base.h +143 -0
  44. data/ext/nmatrix/storage/yale/iterators/iterator.h +131 -0
  45. data/ext/nmatrix/storage/yale/iterators/row.h +450 -0
  46. data/ext/nmatrix/storage/yale/iterators/row_stored.h +140 -0
  47. data/ext/nmatrix/storage/yale/iterators/row_stored_nd.h +169 -0
  48. data/ext/nmatrix/storage/yale/iterators/stored_diagonal.h +124 -0
  49. data/ext/nmatrix/storage/yale/math/transpose.h +110 -0
  50. data/ext/nmatrix/storage/yale/yale.cpp +2074 -0
  51. data/ext/nmatrix/storage/yale/yale.h +203 -0
  52. data/ext/nmatrix/types.h +55 -0
  53. data/ext/nmatrix/util/io.cpp +279 -0
  54. data/ext/nmatrix/util/io.h +115 -0
  55. data/ext/nmatrix/util/sl_list.cpp +627 -0
  56. data/ext/nmatrix/util/sl_list.h +144 -0
  57. data/ext/nmatrix/util/util.h +78 -0
  58. data/lib/nmatrix/blas.rb +378 -0
  59. data/lib/nmatrix/cruby/math.rb +744 -0
  60. data/lib/nmatrix/enumerate.rb +253 -0
  61. data/lib/nmatrix/homogeneous.rb +241 -0
  62. data/lib/nmatrix/io/fortran_format.rb +138 -0
  63. data/lib/nmatrix/io/harwell_boeing.rb +221 -0
  64. data/lib/nmatrix/io/market.rb +263 -0
  65. data/lib/nmatrix/io/point_cloud.rb +189 -0
  66. data/lib/nmatrix/jruby/decomposition.rb +24 -0
  67. data/lib/nmatrix/jruby/enumerable.rb +13 -0
  68. data/lib/nmatrix/jruby/error.rb +4 -0
  69. data/lib/nmatrix/jruby/math.rb +501 -0
  70. data/lib/nmatrix/jruby/nmatrix_java.rb +840 -0
  71. data/lib/nmatrix/jruby/operators.rb +283 -0
  72. data/lib/nmatrix/jruby/slice.rb +264 -0
  73. data/lib/nmatrix/lapack_core.rb +181 -0
  74. data/lib/nmatrix/lapack_plugin.rb +44 -0
  75. data/lib/nmatrix/math.rb +953 -0
  76. data/lib/nmatrix/mkmf.rb +100 -0
  77. data/lib/nmatrix/monkeys.rb +137 -0
  78. data/lib/nmatrix/nmatrix.rb +1172 -0
  79. data/lib/nmatrix/rspec.rb +75 -0
  80. data/lib/nmatrix/shortcuts.rb +1163 -0
  81. data/lib/nmatrix/version.rb +39 -0
  82. data/lib/nmatrix/yale_functions.rb +118 -0
  83. data/lib/nmatrix.rb +28 -0
  84. data/spec/00_nmatrix_spec.rb +892 -0
  85. data/spec/01_enum_spec.rb +196 -0
  86. data/spec/02_slice_spec.rb +407 -0
  87. data/spec/03_nmatrix_monkeys_spec.rb +80 -0
  88. data/spec/2x2_dense_double.mat +0 -0
  89. data/spec/4x4_sparse.mat +0 -0
  90. data/spec/4x5_dense.mat +0 -0
  91. data/spec/blas_spec.rb +215 -0
  92. data/spec/elementwise_spec.rb +311 -0
  93. data/spec/homogeneous_spec.rb +100 -0
  94. data/spec/io/fortran_format_spec.rb +88 -0
  95. data/spec/io/harwell_boeing_spec.rb +98 -0
  96. data/spec/io/test.rua +9 -0
  97. data/spec/io_spec.rb +159 -0
  98. data/spec/lapack_core_spec.rb +482 -0
  99. data/spec/leakcheck.rb +16 -0
  100. data/spec/math_spec.rb +1363 -0
  101. data/spec/nmatrix_yale_resize_test_associations.yaml +2802 -0
  102. data/spec/nmatrix_yale_spec.rb +286 -0
  103. data/spec/rspec_monkeys.rb +56 -0
  104. data/spec/rspec_spec.rb +35 -0
  105. data/spec/shortcuts_spec.rb +474 -0
  106. data/spec/slice_set_spec.rb +162 -0
  107. data/spec/spec_helper.rb +172 -0
  108. data/spec/stat_spec.rb +214 -0
  109. data/spec/test.pcd +20 -0
  110. data/spec/utm5940.mtx +83844 -0
  111. metadata +295 -0
@@ -0,0 +1,1368 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - present, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012 - present, John Woods and the Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == math.cpp
25
+ //
26
+ // Ruby-exposed CBLAS and LAPACK functions that are available without
27
+ // an external library.
28
+ //
29
+ // === Procedure for adding CBLAS functions to math.cpp/math.h:
30
+ //
31
+ // This procedure is written as if for a fictional function with double
32
+ // version dbacon, which we'll say is from CBLAS.
33
+ //
34
+ // 1. Write a default templated version which probably returns a boolean.
35
+ // Call it bacon, and put it in math.h.
36
+ //
37
+ // template <typename DType>
38
+ // bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, DType* A, ...) {
39
+ // rb_raise(rb_eNotImpError, "only implemented for ATLAS types (float32, float64, complex64, complex128)");
40
+ // }
41
+ //
42
+ // Make sure this is in namespace nm::math
43
+ //
44
+ // 2. In math.cpp, add a templated inline static version of the function which takes
45
+ // only void* pointers and uses static_cast to convert them to the
46
+ // proper dtype. This should also be in namespace nm::math
47
+ //
48
+ // This function may also need to switch m and n if these arguments are given.
49
+ //
50
+ // For an example, see cblas_gemm. This function should do nothing other than cast
51
+ // appropriately. If cblas_dbacon, cblas_sbacon, cblas_cbacon, and cblas_zbacon
52
+ // all take void* only, and no other pointers that vary between functions, you can skip
53
+ // this particular step -- as we can call them directly using a custom function pointer
54
+ // array (same function signature!).
55
+ //
56
+ // This version of the function will be the one exposed through NMatrix::BLAS. We
57
+ // want it to be as close to the actual BLAS version of the function as possible,
58
+ // and with as few checks as possible.
59
+ //
60
+ // You will probably need a forward declaration in the extern "C" block.
61
+ //
62
+ // Note: In that case, the function you wrote in Step 1 should also take exactly the
63
+ // same arguments as cblas_xbacon. Otherwise Bad Things will happen.
64
+ //
65
+ // 3. In cblas_templates_core.h, add a default template like in step 1 (which will just
66
+ // call nm::math::bacon()) and also
67
+ // inline specialized versions of bacon for the different BLAS types.
68
+ // This will allow both nmatrix-atlas and nmatrix-lapacke to use the optimized version
69
+ // of bacon from whatever external library is available, as well as the internal version
70
+ // if an external version is not available. These functions will end up in a namsespace
71
+ // like nm::math::atlas, but don't explicitly put them in a namespace, they will get
72
+ // put in the appropriate namespace when cblas_templates_core.h is included.
73
+ //
74
+ // template <typename DType>
75
+ // inline bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, DType* A, ...) {
76
+ // nm::math::bacon(trans, M, N, A, ...);
77
+ // }
78
+ //
79
+ // template <>
80
+ // inline bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, float* A, ...) {
81
+ // cblas_sbacon(trans, M, N, A, ...);
82
+ // return true;
83
+ // }
84
+ //
85
+ // Note that you should do everything in your power here to parse any return values
86
+ // cblas_sbacon may give you. We're not trying very hard in this example, but you might
87
+ // look at getrf to see how it might be done.
88
+ //
89
+ // 4. Write the C function nm_cblas_bacon, which is what Ruby will call. Use the example
90
+ // of nm_cblas_gemm below. Also you must add a similar function in math_atlas.cpp
91
+ // and math_lapacke.cpp
92
+ //
93
+ // 5. Expose the function in nm_math_init_blas(), in math.cpp:
94
+ //
95
+ // rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_bacon", (METHOD)nm_cblas_bacon, 5);
96
+ //
97
+ // Do something similar in math_atlas.cpp and math_lapacke.cpp to add the function
98
+ // to the plugin gems.
99
+ //
100
+ // Here, we're telling Ruby that nm_cblas_bacon takes five arguments as a Ruby function.
101
+ //
102
+ // 6. In blas.rb, write a bacon function which accesses cblas_bacon, but does all the
103
+ // sanity checks we left out in step 2.
104
+ //
105
+ // 7. Write tests for NMatrix::BLAS::bacon, confirming that it works for the ATLAS dtypes.
106
+ //
107
+ // 8. After you get it working properly with CBLAS, download dbacon.f from NETLIB, and use
108
+ // f2c to convert it to C. Clean it up so it's readable. Remove the extra indices -- f2c
109
+ // inserts a lot of unnecessary stuff.
110
+ //
111
+ // Copy and paste the output into the default templated function you wrote in Step 1.
112
+ // Fix it so it works as a template instead of just for doubles.
113
+ //
114
+ // Because of step 3, this will automatically also work for the nmatrix-atlas
115
+ // and nmatrix-lapacke implementations.
116
+ //
117
+ // 9. Write tests to confirm that it works for all data types.
118
+ //
119
+ // 10. See about adding a Ruby-like interface, such as matrix_matrix_multiply for cblas_gemm,
120
+ // or matrix_vector_multiply for cblas_gemv. This step is not mandatory.
121
+ //
122
+ // 11. Pull request!
123
+
124
+ /*
125
+ * Project Includes
126
+ */
127
+
128
+
129
+ #include <ruby.h>
130
+ #include <algorithm>
131
+ #include <limits>
132
+ #include <cmath>
133
+
134
+ #include "math/cblas_enums.h"
135
+
136
+ #include "data/data.h"
137
+ #include "math/magnitude.h"
138
+ #include "math/imax.h"
139
+ #include "math/scal.h"
140
+ #include "math/laswp.h"
141
+ #include "math/trsm.h"
142
+ #include "math/gemm.h"
143
+ #include "math/gemv.h"
144
+ #include "math/asum.h"
145
+ #include "math/nrm2.h"
146
+ #include "math/getrf.h"
147
+ #include "math/getrs.h"
148
+ #include "math/rot.h"
149
+ #include "math/rotg.h"
150
+ #include "math/math.h"
151
+ #include "math/util.h"
152
+ #include "storage/dense/dense.h"
153
+
154
+ #include "nmatrix.h"
155
+ #include "ruby_constants.h"
156
+
157
+ /*
158
+ * Forward Declarations
159
+ */
160
+
161
+ extern "C" {
162
+ /* BLAS Level 1. */
163
+ static VALUE nm_cblas_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx);
164
+ static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx);
165
+ static VALUE nm_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx);
166
+ static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s);
167
+ static VALUE nm_cblas_rotg(VALUE self, VALUE ab);
168
+ static VALUE nm_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx);
169
+
170
+ /* BLAS Level 2. */
171
+ static VALUE nm_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
172
+ VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);
173
+
174
+ /* BLAS Level 3. */
175
+ static VALUE nm_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha,
176
+ VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc);
177
+ static VALUE nm_cblas_trsm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
178
+ VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
179
+
180
+ /* LAPACK. */
181
+ static VALUE nm_has_clapack(VALUE self);
182
+ static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda);
183
+ static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb);
184
+ static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx);
185
+ } // end of extern "C" block
186
+
187
+ ////////////////////
188
+ // Math Functions //
189
+ ////////////////////
190
+
191
+ namespace nm {
192
+ namespace math {
193
+
194
+ /*
195
+ * Calculate the determinant for a dense matrix (A [elements]) of size 2 or 3. Return the result.
196
+ */
197
+ template <typename DType>
198
+ void det_exact_from_dense(const int M, const void* A_elements, const int lda, void* result_arg) {
199
+ DType* result = reinterpret_cast<DType*>(result_arg);
200
+ const DType* A = reinterpret_cast<const DType*>(A_elements);
201
+
202
+ typename LongDType<DType>::type x, y;
203
+
204
+ if (M == 2) {
205
+ *result = A[0] * A[lda+1] - A[1] * A[lda];
206
+ } else if (M == 3) {
207
+ x = A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]; // ei - fh
208
+ y = A[lda] * A[2*lda+2] - A[lda+2] * A[2*lda]; // fg - di
209
+ x = A[0]*x - A[1]*y ; // a*(ei-fh) - b*(fg-di)
210
+
211
+ y = A[lda] * A[2*lda+1] - A[lda+1] * A[2*lda]; // dh - eg
212
+ *result = A[2]*y + x; // c*(dh-eg) + _
213
+ } else if (M < 2) {
214
+ rb_raise(rb_eArgError, "can only calculate exact determinant of a square matrix of size 2 or larger");
215
+ } else {
216
+ rb_raise(rb_eNotImpError, "exact determinant calculation needed for matrices larger than 3x3");
217
+ }
218
+ }
219
+
220
+ //we can't do det_exact on byte, because it will want to return a byte (unsigned), but determinants can be negative, even if all elements of the matrix are positive
221
+ template <>
222
+ void det_exact_from_dense<uint8_t>(const int M, const void* A_elements, const int lda, void* result_arg) {
223
+ rb_raise(nm_eDataTypeError, "cannot call det_exact on unsigned type");
224
+ }
225
+ /*
226
+ * Calculate the determinant for a yale matrix (storage) of size 2 or 3. Return the result.
227
+ */
228
+ template <typename DType>
229
+ void det_exact_from_yale(const int M, const YALE_STORAGE* storage, const int lda, void* result_arg) {
230
+ DType* result = reinterpret_cast<DType*>(result_arg);
231
+ IType* ija = reinterpret_cast<IType *>(storage->ija);
232
+ DType* a = reinterpret_cast<DType*>(storage->a);
233
+ IType col_pos = storage->shape[0] + 1;
234
+ if (M == 2) {
235
+ if (ija[2] - ija[0] == 2) {
236
+ *result = a[0] * a[1] - a[col_pos] * a[col_pos+1];
237
+ }
238
+ else { *result = a[0] * a[1]; }
239
+ } else if (M == 3) {
240
+ DType m[3][3];
241
+ for (int i = 0; i < 3; ++i) {
242
+ m[i][i] = a[i];
243
+ switch(ija[i+1] - ija[i]) {
244
+ case 2:
245
+ m[i][ija[col_pos]] = a[col_pos];
246
+ m[i][ija[col_pos+1]] = a[col_pos+1];
247
+ col_pos += 2;
248
+ break;
249
+ case 1:
250
+ m[i][(i+1)%3] = m[i][(i+2)%3] = 0;
251
+ m[i][ija[col_pos]] = a[col_pos];
252
+ ++col_pos;
253
+ break;
254
+ case 0:
255
+ m[i][(i+1)%3] = m[i][(i+2)%3] = 0;
256
+ break;
257
+ default:
258
+ rb_raise(rb_eArgError, "some value in IJA is incorrect!");
259
+ }
260
+ }
261
+ *result =
262
+ m[0][0] * m[1][1] * m[2][2] + m[0][1] * m[1][2] * m[2][0] + m[0][2] * m[1][0] * m[2][1]
263
+ - m[0][0] * m[1][2] * m[2][1] - m[0][1] * m[1][0] * m[2][2] - m[0][2] * m[1][1] * m[2][0];
264
+
265
+ } else if (M < 2) {
266
+ rb_raise(rb_eArgError, "can only calculate exact determinant of a square matrix of size 2 or larger");
267
+ } else {
268
+ rb_raise(rb_eNotImpError, "exact determinant calculation needed for matrices larger than 3x3");
269
+ }
270
+ }
271
+
272
+ /*
273
+ * Solve a system of linear equations using forward-substution followed by
274
+ * back substution from the LU factorization of the matrix of co-efficients.
275
+ * Replaces x_elements with the result. Works only with non-integer, non-object
276
+ * data types.
277
+ *
278
+ * args - r -> The number of rows of the matrix.
279
+ * lu_elements -> Elements of the LU decomposition of the co-efficients
280
+ * matrix, as a contiguos array.
281
+ * b_elements -> Elements of the the right hand sides, as a contiguous array.
282
+ * x_elements -> The array that will contain the results of the computation.
283
+ * pivot -> Positions of permuted rows.
284
+ */
285
+ template <typename DType>
286
+ void solve(const int r, const void* lu_elements, const void* b_elements, void* x_elements, const int* pivot) {
287
+ int ii = 0, ip;
288
+ DType sum;
289
+
290
+ const DType* matrix = reinterpret_cast<const DType*>(lu_elements);
291
+ const DType* b = reinterpret_cast<const DType*>(b_elements);
292
+ DType* x = reinterpret_cast<DType*>(x_elements);
293
+
294
+ for (int i = 0; i < r; ++i) { x[i] = b[i]; }
295
+ for (int i = 0; i < r; ++i) { // forward substitution loop
296
+ ip = pivot[i];
297
+ sum = x[ip];
298
+ x[ip] = x[i];
299
+
300
+ if (ii != 0) {
301
+ for (int j = ii - 1;j < i; ++j) { sum = sum - matrix[i * r + j] * x[j]; }
302
+ }
303
+ else if (sum != 0.0) {
304
+ ii = i + 1;
305
+ }
306
+ x[i] = sum;
307
+ }
308
+
309
+ for (int i = r - 1; i >= 0; --i) { // back substitution loop
310
+ sum = x[i];
311
+ for (int j = i + 1; j < r; j++) { sum = sum - matrix[i * r + j] * x[j]; }
312
+ x[i] = sum/matrix[i * r + i];
313
+ }
314
+ }
315
+
316
+ /*
317
+ * Calculates in-place inverse of A_elements. Uses Gauss-Jordan elimination technique.
318
+ * In-place inversion of the matrix saves on memory and time.
319
+ *
320
+ * args - M - Shape of the matrix
321
+ * a_elements - A duplicate of the original expressed as a contiguos array
322
+ */
323
+ template <typename DType>
324
+ void inverse(const int M, void* a_elements) {
325
+ DType* matrix = reinterpret_cast<DType*>(a_elements);
326
+ int row_index[M]; // arrays for keeping track of column scrambling
327
+ int col_index[M];
328
+
329
+ for (int k = 0;k < M; ++k) {
330
+ typename MagnitudeDType<DType>::type akk;
331
+ akk = magnitude( matrix[k * (M + 1)] ); // diagonal element
332
+
333
+ int interchange = k;
334
+
335
+ for (int row = k + 1; row < M; ++row) {
336
+ typename MagnitudeDType<DType>::type big;
337
+ big = magnitude( matrix[M*row + k] ); // element below the temp pivot
338
+
339
+ if ( big > akk ) {
340
+ interchange = row;
341
+ akk = big;
342
+ }
343
+ }
344
+
345
+ if (interchange != k) { // check if rows need flipping
346
+ DType temp;
347
+
348
+ for (int col = 0; col < M; ++col) {
349
+ NM_SWAP(matrix[interchange*M + col], matrix[k*M + col], temp);
350
+ }
351
+ }
352
+
353
+ row_index[k] = interchange;
354
+ col_index[k] = k;
355
+
356
+ if (matrix[k * (M + 1)] == (DType)(0)) {
357
+ rb_raise(rb_eZeroDivError, "Expected Non-Singular Matrix.");
358
+ }
359
+
360
+ DType pivot = matrix[k * (M + 1)];
361
+ matrix[k * (M + 1)] = (DType)(1); // set diagonal as 1 for in-place inversion
362
+
363
+ for (int col = 0; col < M; ++col) {
364
+ // divide each element in the kth row with the pivot
365
+ matrix[k*M + col] = matrix[k*M + col] / pivot;
366
+ }
367
+
368
+ for (int kk = 0; kk < M; ++kk) { // iterate and reduce all rows
369
+ if (kk == k) continue;
370
+
371
+ DType dum = matrix[k + M*kk];
372
+ matrix[k + M*kk] = (DType)(0); // prepare for inplace inversion
373
+ for (int col = 0; col < M; ++col) {
374
+ matrix[M*kk + col] = matrix[M*kk + col] - matrix[M*k + col] * dum;
375
+ }
376
+ }
377
+ }
378
+
379
+ // Unscramble columns
380
+ DType temp;
381
+
382
+ for (int k = M - 1; k >= 0; --k) {
383
+ if (row_index[k] != col_index[k]) {
384
+
385
+ for (int row = 0; row < M; ++row) {
386
+ NM_SWAP(matrix[row * M + row_index[k]], matrix[row * M + col_index[k]],
387
+ temp);
388
+ }
389
+ }
390
+ }
391
+ }
392
+
393
+ /*
394
+ * Reduce a square matrix to hessenberg form with householder transforms
395
+ *
396
+ * == Arguments
397
+ *
398
+ * nrows - The number of rows present in matrix a.
399
+ * a_elements - Elements of the matrix to be reduced in 1D array form.
400
+ *
401
+ * == References
402
+ *
403
+ * http://www.mymathlib.com/c_source/matrices/eigen/hessenberg_orthog.c
404
+ * This code has been included by permission of the author.
405
+ */
406
+ template <typename DType>
407
+ void hessenberg(const int nrows, void* a_elements) {
408
+ DType* a = reinterpret_cast<DType*>(a_elements);
409
+ DType* u = new DType[nrows]; // auxillary storage for the chosen vector
410
+ DType sum_of_squares, *p_row, *psubdiag, *p_a, scale, innerproduct;
411
+ int i, k, col;
412
+
413
+ // For each column use a Householder transformation to zero all entries
414
+ // below the subdiagonal.
415
+ for (psubdiag = a + nrows, col = 0; col < nrows - 2; psubdiag += nrows + 1,
416
+ col++) {
417
+ // Calculate the signed square root of the sum of squares of the
418
+ // elements below the diagonal.
419
+
420
+ for (p_a = psubdiag, sum_of_squares = 0.0, i = col + 1; i < nrows;
421
+ p_a += nrows, i++) {
422
+ sum_of_squares += *p_a * *p_a;
423
+ }
424
+ if (sum_of_squares == 0.0) { continue; }
425
+ sum_of_squares = std::sqrt(sum_of_squares);
426
+
427
+ if ( *psubdiag >= 0.0 ) { sum_of_squares = -sum_of_squares; }
428
+
429
+ // Calculate the Householder transformation Q = I - 2uu'/u'u.
430
+ u[col + 1] = *psubdiag - sum_of_squares;
431
+ *psubdiag = sum_of_squares;
432
+
433
+ for (p_a = psubdiag + nrows, i = col + 2; i < nrows; p_a += nrows, i++) {
434
+ u[i] = *p_a;
435
+ *p_a = 0.0;
436
+ }
437
+
438
+ // Premultiply A by Q
439
+ scale = -1.0 / (sum_of_squares * u[col+1]);
440
+ for (p_row = psubdiag - col, i = col + 1; i < nrows; i++) {
441
+ p_a = a + nrows * (col + 1) + i;
442
+ for (innerproduct = 0.0, k = col + 1; k < nrows; p_a += nrows, k++) {
443
+ innerproduct += u[k] * *p_a;
444
+ }
445
+ innerproduct *= scale;
446
+ for (p_a = p_row + i, k = col + 1; k < nrows; p_a += nrows, k++) {
447
+ *p_a -= u[k] * innerproduct;
448
+ }
449
+ }
450
+
451
+ // Postmultiply QA by Q
452
+ for (p_row = a, i = 0; i < nrows; p_row += nrows, i++) {
453
+ for (innerproduct = 0.0, k = col + 1; k < nrows; k++) {
454
+ innerproduct += u[k] * *(p_row + k);
455
+ }
456
+ innerproduct *= scale;
457
+
458
+ for (k = col + 1; k < nrows; k++) {
459
+ *(p_row + k) -= u[k] * innerproduct;
460
+ }
461
+ }
462
+ }
463
+
464
+ delete[] u;
465
+ }
466
+
467
+ void raise_not_invertible_error() {
468
+ rb_raise(nm_eNotInvertibleError,
469
+ "matrix must have non-zero determinant to be invertible (not getting this error does not mean matrix is invertible if you're dealing with floating points)");
470
+ }
471
+
472
+ /*
473
+ * Calculate the exact inverse for a dense matrix (A [elements]) of size 2 or 3. Places the result in B_elements.
474
+ */
475
+ template <typename DType>
476
+ void inverse_exact_from_dense(const int M, const void* A_elements,
477
+ const int lda, void* B_elements, const int ldb) {
478
+
479
+ const DType* A = reinterpret_cast<const DType*>(A_elements);
480
+ DType* B = reinterpret_cast<DType*>(B_elements);
481
+
482
+ if (M == 2) {
483
+ DType det = A[0] * A[lda+1] - A[1] * A[lda];
484
+ if (det == 0) { raise_not_invertible_error(); }
485
+ B[0] = A[lda+1] / det;
486
+ B[1] = -A[1] / det;
487
+ B[ldb] = -A[lda] / det;
488
+ B[ldb+1] = A[0] / det;
489
+
490
+ } else if (M == 3) {
491
+ // Calculate the exact determinant.
492
+ DType det;
493
+ det_exact_from_dense<DType>(M, A_elements, lda, reinterpret_cast<void*>(&det));
494
+ if (det == 0) { raise_not_invertible_error(); }
495
+
496
+ B[0] = ( A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]) / det; // A = ei - fh
497
+ B[1] = (- A[1] * A[2*lda+2] + A[2] * A[2*lda+1]) / det; // D = -bi + ch
498
+ B[2] = ( A[1] * A[lda+2] - A[2] * A[lda+1]) / det; // G = bf - ce
499
+ B[ldb] = (- A[lda] * A[2*lda+2] + A[lda+2] * A[2*lda]) / det; // B = -di + fg
500
+ B[ldb+1] = ( A[0] * A[2*lda+2] - A[2] * A[2*lda]) / det; // E = ai - cg
501
+ B[ldb+2] = (- A[0] * A[lda+2] + A[2] * A[lda]) / det; // H = -af + cd
502
+ B[2*ldb] = ( A[lda] * A[2*lda+1] - A[lda+1] * A[2*lda]) / det; // C = dh - eg
503
+ B[2*ldb+1]= ( -A[0] * A[2*lda+1] + A[1] * A[2*lda]) / det; // F = -ah + bg
504
+ B[2*ldb+2]= ( A[0] * A[lda+1] - A[1] * A[lda]) / det; // I = ae - bd
505
+ } else if (M == 1) {
506
+ B[0] = 1 / A[0];
507
+ } else {
508
+ rb_raise(rb_eNotImpError, "exact inverse calculation needed for matrices larger than 3x3");
509
+ }
510
+ }
511
+
512
+ template <typename DType>
513
+ void inverse_exact_from_yale(const int M, const YALE_STORAGE* storage,
514
+ const int lda, YALE_STORAGE* inverse, const int ldb) {
515
+
516
+ // inverse is a clone of storage
517
+ const DType* a = reinterpret_cast<const DType*>(storage->a);
518
+ const IType* ija = reinterpret_cast<const IType *>(storage->ija);
519
+ DType* b = reinterpret_cast<DType*>(inverse->a);
520
+ IType* ijb = reinterpret_cast<IType *>(inverse->ija);
521
+ IType col_pos = storage->shape[0] + 1;
522
+ // Calculate the exact determinant.
523
+ DType det;
524
+
525
+ if (M == 2) {
526
+ IType ndnz = ija[2] - ija[0];
527
+ if (ndnz == 2) {
528
+ det = a[0] * a[1] - a[col_pos] * a[col_pos+1];
529
+ }
530
+ else { det = a[0] * a[1]; }
531
+ if (det == 0) { raise_not_invertible_error(); }
532
+ b[0] = a[1] / det;
533
+ b[1] = a[0] / det;
534
+ if (ndnz == 2) {
535
+ b[col_pos] = -a[col_pos] / det;
536
+ b[col_pos+1] = -a[col_pos+1] / det;
537
+ }
538
+ else if (ndnz == 1) {
539
+ b[col_pos] = -a[col_pos] / det;
540
+ }
541
+
542
+ } else if (M == 3) {
543
+ DType *A = new DType[lda*3];
544
+ for (int i = 0; i < lda; ++i) {
545
+ A[i*3+i] = a[i];
546
+ switch (ija[i+1] - ija[i]) {
547
+ case 2:
548
+ A[i*3 + ija[col_pos]] = a[col_pos];
549
+ A[i*3 + ija[col_pos+1]] = a[col_pos+1];
550
+ col_pos += 2;
551
+ break;
552
+ case 1:
553
+ A[i*3 + (i+1)%3] = A[i*3 + (i+2)%3] = 0;
554
+ A[i*3 + ija[col_pos]] = a[col_pos];
555
+ col_pos += 1;
556
+ break;
557
+ case 0:
558
+ A[i*3 + (i+1)%3] = A[i*3 + (i+2)%3] = 0;
559
+ break;
560
+ default:
561
+ rb_raise(rb_eArgError, "some value in IJA is incorrect!");
562
+ }
563
+ }
564
+ det =
565
+ A[0] * A[lda+1] * A[2*lda+2] + A[1] * A[lda+2] * A[2*lda] + A[2] * A[lda] * A[2*lda+1]
566
+ - A[0] * A[lda+2] * A[2*lda+1] - A[1] * A[lda] * A[2*lda+2] - A[2] * A[lda+1] * A[2*lda];
567
+ if (det == 0) { raise_not_invertible_error(); }
568
+
569
+ DType *B = new DType[3*ldb];
570
+ B[0] = ( A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]) / det; // A = ei - fh
571
+ B[1] = (- A[1] * A[2*lda+2] + A[2] * A[2*lda+1]) / det; // D = -bi + ch
572
+ B[2] = ( A[1] * A[lda+2] - A[2] * A[lda+1]) / det; // G = bf - ce
573
+ B[ldb] = (- A[lda] * A[2*lda+2] + A[lda+2] * A[2*lda]) / det; // B = -di + fg
574
+ B[ldb+1] = ( A[0] * A[2*lda+2] - A[2] * A[2*lda]) / det; // E = ai - cg
575
+ B[ldb+2] = (- A[0] * A[lda+2] + A[2] * A[lda]) / det; // H = -af + cd
576
+ B[2*ldb] = ( A[lda] * A[2*lda+1] - A[lda+1] * A[2*lda]) / det; // C = dh - eg
577
+ B[2*ldb+1]= ( -A[0] * A[2*lda+1] + A[1] * A[2*lda]) / det; // F = -ah + bg
578
+ B[2*ldb+2]= ( A[0] * A[lda+1] - A[1] * A[lda]) / det; // I = ae - bd
579
+
580
+ // Calculate the size of ijb and b, then reallocate them.
581
+ IType ndnz = 0;
582
+ for (int i = 0; i < 3; ++i) {
583
+ for (int j = 0; j < 3; ++j) {
584
+ if (j != i && B[i*ldb + j] != 0) { ++ndnz; }
585
+ }
586
+ }
587
+ inverse->ndnz = ndnz;
588
+ col_pos = 4; // shape[0] + 1
589
+ inverse->capacity = 4 + ndnz;
590
+ NM_REALLOC_N(inverse->a, DType, 4 + ndnz);
591
+ NM_REALLOC_N(inverse->ija, IType, 4 + ndnz);
592
+ b = reinterpret_cast<DType*>(inverse->a);
593
+ ijb = reinterpret_cast<IType *>(inverse->ija);
594
+
595
+ for (int i = 0; i < 3; ++i) {
596
+ ijb[i] = col_pos;
597
+ for (int j = 0; j < 3; ++j) {
598
+ if (j == i) {
599
+ b[i] = B[i*ldb + j];
600
+ }
601
+ else if (B[i*ldb + j] != 0) {
602
+ b[col_pos] = B[i*ldb + j];
603
+ ijb[col_pos] = j;
604
+ ++col_pos;
605
+ }
606
+ }
607
+ }
608
+ b[3] = 0;
609
+ ijb[3] = col_pos;
610
+ delete [] B;
611
+ delete [] A;
612
+ } else if (M == 1) {
613
+ b[0] = 1 / a[0];
614
+ } else {
615
+ rb_raise(rb_eNotImpError, "exact inverse calculation needed for matrices larger than 3x3");
616
+ }
617
+ }
618
+
619
+ /*
620
+ * Function signature conversion for calling CBLAS' gemm functions as directly as possible.
621
+ *
622
+ * For documentation: http://www.netlib.org/blas/dgemm.f
623
+ */
624
+ template <typename DType>
625
+ inline static void cblas_gemm(const enum CBLAS_ORDER order,
626
+ const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b,
627
+ int m, int n, int k,
628
+ void* alpha,
629
+ void* a, int lda,
630
+ void* b, int ldb,
631
+ void* beta,
632
+ void* c, int ldc)
633
+ {
634
+ gemm<DType>(order, trans_a, trans_b, m, n, k, reinterpret_cast<DType*>(alpha),
635
+ reinterpret_cast<DType*>(a), lda,
636
+ reinterpret_cast<DType*>(b), ldb, reinterpret_cast<DType*>(beta),
637
+ reinterpret_cast<DType*>(c), ldc);
638
+ }
639
+
640
+
641
+ /*
642
+ * Function signature conversion for calling CBLAS's gemv functions as directly as possible.
643
+ *
644
+ * For documentation: http://www.netlib.org/lapack/double/dgetrf.f
645
+ */
646
+ template <typename DType>
647
+ inline static bool cblas_gemv(const enum CBLAS_TRANSPOSE trans,
648
+ const int m, const int n,
649
+ const void* alpha,
650
+ const void* a, const int lda,
651
+ const void* x, const int incx,
652
+ const void* beta,
653
+ void* y, const int incy)
654
+ {
655
+ return gemv<DType>(trans,
656
+ m, n, reinterpret_cast<const DType*>(alpha),
657
+ reinterpret_cast<const DType*>(a), lda,
658
+ reinterpret_cast<const DType*>(x), incx, reinterpret_cast<const DType*>(beta),
659
+ reinterpret_cast<DType*>(y), incy);
660
+ }
661
+
662
+
663
+ /*
664
+ * Function signature conversion for calling CBLAS' trsm functions as directly as possible.
665
+ *
666
+ * For documentation: http://www.netlib.org/blas/dtrsm.f
667
+ */
668
+ template <typename DType>
669
+ inline static void cblas_trsm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
670
+ const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_DIAG diag,
671
+ const int m, const int n, const void* alpha, const void* a,
672
+ const int lda, void* b, const int ldb)
673
+ {
674
+ trsm<DType>(order, side, uplo, trans_a, diag, m, n, *reinterpret_cast<const DType*>(alpha),
675
+ reinterpret_cast<const DType*>(a), lda, reinterpret_cast<DType*>(b), ldb);
676
+ }
677
+
678
+ }
679
+ } // end of namespace nm::math
680
+
681
+
682
+ extern "C" {
683
+
684
+ ///////////////////
685
+ // Ruby Bindings //
686
+ ///////////////////
687
+
688
+ void nm_math_init_blas() {
689
+ VALUE cNMatrix_Internal = rb_define_module_under(cNMatrix, "Internal");
690
+
691
+ rb_define_singleton_method(cNMatrix, "has_clapack?", (METHOD)nm_has_clapack, 0);
692
+
693
+ VALUE cNMatrix_Internal_LAPACK = rb_define_module_under(cNMatrix_Internal, "LAPACK");
694
+
695
+ /* ATLAS-CLAPACK Functions that are implemented internally */
696
+ rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_getrf", (METHOD)nm_clapack_getrf, 5);
697
+ rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_getrs", (METHOD)nm_clapack_getrs, 9);
698
+ rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_laswp", (METHOD)nm_clapack_laswp, 7);
699
+
700
+ VALUE cNMatrix_Internal_BLAS = rb_define_module_under(cNMatrix_Internal, "BLAS");
701
+
702
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_scal", (METHOD)nm_cblas_scal, 4);
703
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_nrm2", (METHOD)nm_cblas_nrm2, 3);
704
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_asum", (METHOD)nm_cblas_asum, 3);
705
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_rot", (METHOD)nm_cblas_rot, 7);
706
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_rotg", (METHOD)nm_cblas_rotg, 1);
707
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_imax", (METHOD)nm_cblas_imax, 3);
708
+
709
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_gemm", (METHOD)nm_cblas_gemm, 14);
710
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11);
711
+ rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_trsm", (METHOD)nm_cblas_trsm, 12);
712
+ }
713
+
714
+ /*
715
+ * call-seq:
716
+ * NMatrix::BLAS.cblas_scal(n, alpha, vector, inc) -> NMatrix
717
+ *
718
+ * BLAS level 1 function +scal+. Works with all dtypes.
719
+ *
720
+ * Scale +vector+ in-place by +alpha+ and also return it. The operation is as
721
+ * follows:
722
+ * x <- alpha * x
723
+ *
724
+ * - +n+ -> Number of elements of +vector+.
725
+ * - +alpha+ -> Scalar value used in the operation.
726
+ * - +vector+ -> NMatrix of shape [n,1] or [1,n]. Modified in-place.
727
+ * - +inc+ -> Increment used in the scaling function. Should generally be 1.
728
+ */
729
+ static VALUE nm_cblas_scal(VALUE self, VALUE n, VALUE alpha, VALUE vector, VALUE incx) {
730
+ nm::dtype_t dtype = NM_DTYPE(vector);
731
+
732
+ void* scalar = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
733
+ rubyval_to_cval(alpha, dtype, scalar);
734
+
735
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_scal, void, const int n,
736
+ const void* scalar, void* x, const int incx);
737
+
738
+ ttable[dtype](FIX2INT(n), scalar, NM_STORAGE_DENSE(vector)->elements,
739
+ FIX2INT(incx));
740
+
741
+ return vector;
742
+ }
743
+
744
+ /*
745
+ * Call any of the cblas_xrotg functions as directly as possible.
746
+ *
747
+ * xROTG computes the elements of a Givens plane rotation matrix such that:
748
+ *
749
+ * | c s | | a | | r |
750
+ * | -s c | * | b | = | 0 |
751
+ *
752
+ * where r = +- sqrt( a**2 + b**2 ) and c**2 + s**2 = 1.
753
+ *
754
+ * The Givens plane rotation can be used to introduce zero elements into a matrix selectively.
755
+ *
756
+ * This function differs from most of the other raw BLAS accessors. Instead of
757
+ * providing a, b, c, s as arguments, you should only provide a and b (the
758
+ * inputs), and you should provide them as the first two elements of any dense
759
+ * NMatrix type.
760
+ *
761
+ * The outputs [c,s] will be returned in a Ruby Array at the end; the input
762
+ * NMatrix will also be modified in-place.
763
+ *
764
+ * This function, like the other cblas_ functions, does minimal type-checking.
765
+ */
766
+ static VALUE nm_cblas_rotg(VALUE self, VALUE ab) {
767
+ static void (*ttable[nm::NUM_DTYPES])(void* a, void* b, void* c, void* s) = {
768
+ NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
769
+ nm::math::cblas_rotg<float>,
770
+ nm::math::cblas_rotg<double>,
771
+ nm::math::cblas_rotg<nm::Complex64>,
772
+ nm::math::cblas_rotg<nm::Complex128>,
773
+ NULL //nm::math::cblas_rotg<nm::RubyObject>
774
+ };
775
+
776
+ nm::dtype_t dtype = NM_DTYPE(ab);
777
+
778
+ if (!ttable[dtype]) {
779
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
780
+ return Qnil;
781
+
782
+ } else {
783
+ NM_CONSERVATIVE(nm_register_value(&self));
784
+ NM_CONSERVATIVE(nm_register_value(&ab));
785
+ void *pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
786
+ *pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
787
+
788
+ // extract A and B from the NVector (first two elements)
789
+ void* pA = NM_STORAGE_DENSE(ab)->elements;
790
+ void* pB = (char*)(NM_STORAGE_DENSE(ab)->elements) + DTYPE_SIZES[dtype];
791
+ // c and s are output
792
+
793
+ ttable[dtype](pA, pB, pC, pS);
794
+
795
+ VALUE result = rb_ary_new2(2);
796
+
797
+ if (dtype == nm::RUBYOBJ) {
798
+ rb_ary_store(result, 0, *reinterpret_cast<VALUE*>(pC));
799
+ rb_ary_store(result, 1, *reinterpret_cast<VALUE*>(pS));
800
+ } else {
801
+ rb_ary_store(result, 0, nm::rubyobj_from_cval(pC, dtype).rval);
802
+ rb_ary_store(result, 1, nm::rubyobj_from_cval(pS, dtype).rval);
803
+ }
804
+ NM_CONSERVATIVE(nm_unregister_value(&ab));
805
+ NM_CONSERVATIVE(nm_unregister_value(&self));
806
+ return result;
807
+ }
808
+ }
809
+
810
+
811
+ /*
812
+ * Call any of the cblas_xrot functions as directly as possible.
813
+ *
814
+ * xROT is a BLAS level 1 routine (taking two vectors) which applies a plane rotation.
815
+ *
816
+ * It's tough to find documentation on xROT. Here are what we think the arguments are for:
817
+ * * n :: number of elements to consider in x and y
818
+ * * x :: a vector (expects an NVector)
819
+ * * incx :: stride of x
820
+ * * y :: a vector (expects an NVector)
821
+ * * incy :: stride of y
822
+ * * c :: cosine of the angle of rotation
823
+ * * s :: sine of the angle of rotation
824
+ *
825
+ * Note that c and s will be the same dtype as x and y, except when x and y are complex. If x and y are complex, c and s
826
+ * will be float for Complex64 or double for Complex128.
827
+ *
828
+ * You probably don't want to call this function. Instead, why don't you try rot, which is more flexible
829
+ * with its arguments?
830
+ *
831
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
832
+ * handling, so you can easily crash Ruby!
833
+ */
834
+ static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s) {
835
+ static void (*ttable[nm::NUM_DTYPES])(const int N, void*, const int, void*, const int, const void*, const void*) = {
836
+ NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
837
+ nm::math::cblas_rot<float,float>,
838
+ nm::math::cblas_rot<double,double>,
839
+ nm::math::cblas_rot<nm::Complex64,float>,
840
+ nm::math::cblas_rot<nm::Complex128,double>,
841
+ nm::math::cblas_rot<nm::RubyObject,nm::RubyObject>
842
+ };
843
+
844
+ nm::dtype_t dtype = NM_DTYPE(x);
845
+
846
+
847
+ if (!ttable[dtype]) {
848
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
849
+ return Qfalse;
850
+ } else {
851
+ void *pC, *pS;
852
+
853
+ // We need to ensure the cosine and sine arguments are the correct dtype -- which may differ from the actual dtype.
854
+ if (dtype == nm::COMPLEX64) {
855
+ pC = NM_ALLOCA_N(float,1);
856
+ pS = NM_ALLOCA_N(float,1);
857
+ rubyval_to_cval(c, nm::FLOAT32, pC);
858
+ rubyval_to_cval(s, nm::FLOAT32, pS);
859
+ } else if (dtype == nm::COMPLEX128) {
860
+ pC = NM_ALLOCA_N(double,1);
861
+ pS = NM_ALLOCA_N(double,1);
862
+ rubyval_to_cval(c, nm::FLOAT64, pC);
863
+ rubyval_to_cval(s, nm::FLOAT64, pS);
864
+ } else {
865
+ pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
866
+ pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
867
+ rubyval_to_cval(c, dtype, pC);
868
+ rubyval_to_cval(s, dtype, pS);
869
+ }
870
+
871
+
872
+ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), NM_STORAGE_DENSE(y)->elements, FIX2INT(incy), pC, pS);
873
+
874
+ return Qtrue;
875
+ }
876
+ }
877
+
878
+
879
+ /*
880
+ * Call any of the cblas_xnrm2 functions as directly as possible.
881
+ *
882
+ * xNRM2 is a BLAS level 1 routine which calculates the 2-norm of an n-vector x.
883
+ *
884
+ * Arguments:
885
+ * * n :: length of x, must be at least 0
886
+ * * x :: pointer to first entry of input vector
887
+ * * incx :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
888
+ *
889
+ * You probably don't want to call this function. Instead, why don't you try nrm2, which is more flexible
890
+ * with its arguments?
891
+ *
892
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
893
+ * handling, so you can easily crash Ruby!
894
+ */
895
+ static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx) {
896
+
897
+ static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
898
+ NULL, NULL, NULL, NULL, NULL, // no help for integers
899
+ nm::math::cblas_nrm2<float32_t>,
900
+ nm::math::cblas_nrm2<float64_t>,
901
+ nm::math::cblas_nrm2<nm::Complex64>,
902
+ nm::math::cblas_nrm2<nm::Complex128>,
903
+ nm::math::cblas_nrm2<nm::RubyObject>
904
+ };
905
+
906
+ nm::dtype_t dtype = NM_DTYPE(x);
907
+
908
+ if (!ttable[dtype]) {
909
+ rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
910
+ return Qnil;
911
+
912
+ } else {
913
+ // Determine the return dtype and allocate it
914
+ nm::dtype_t rdtype = dtype;
915
+ if (dtype == nm::COMPLEX64) rdtype = nm::FLOAT32;
916
+ else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;
917
+
918
+ void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);
919
+
920
+ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
921
+
922
+ return nm::rubyobj_from_cval(Result, rdtype).rval;
923
+ }
924
+ }
925
+
926
+
927
+
928
+ /*
929
+ * Call any of the cblas_xasum functions as directly as possible.
930
+ *
931
+ * xASUM is a BLAS level 1 routine which calculates the sum of absolute values of the entries
932
+ * of a vector x.
933
+ *
934
+ * Arguments:
935
+ * * n :: length of x, must be at least 0
936
+ * * x :: pointer to first entry of input vector
937
+ * * incx :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
938
+ *
939
+ * You probably don't want to call this function. Instead, why don't you try asum, which is more flexible
940
+ * with its arguments?
941
+ *
942
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
943
+ * handling, so you can easily crash Ruby!
944
+ */
945
+ static VALUE nm_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx) {
946
+
947
+ static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
948
+ nm::math::cblas_asum<uint8_t>,
949
+ nm::math::cblas_asum<int8_t>,
950
+ nm::math::cblas_asum<int16_t>,
951
+ nm::math::cblas_asum<int32_t>,
952
+ nm::math::cblas_asum<int64_t>,
953
+ nm::math::cblas_asum<float32_t>,
954
+ nm::math::cblas_asum<float64_t>,
955
+ nm::math::cblas_asum<nm::Complex64>,
956
+ nm::math::cblas_asum<nm::Complex128>,
957
+ nm::math::cblas_asum<nm::RubyObject>
958
+ };
959
+
960
+ nm::dtype_t dtype = NM_DTYPE(x);
961
+
962
+ // Determine the return dtype and allocate it
963
+ nm::dtype_t rdtype = dtype;
964
+ if (dtype == nm::COMPLEX64) rdtype = nm::FLOAT32;
965
+ else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;
966
+
967
+ void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);
968
+
969
+ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
970
+
971
+ return nm::rubyobj_from_cval(Result, rdtype).rval;
972
+ }
973
+
974
+ /*
975
+ * call-seq:
976
+ * NMatrix::BLAS.cblas_imax(n, vector, inc) -> Fixnum
977
+ *
978
+ * BLAS level 1 routine.
979
+ *
980
+ * Return the index of the largest element of +vector+.
981
+ *
982
+ * - +n+ -> Vector's size. Generally, you can use NMatrix#rows or NMatrix#cols.
983
+ * - +vector+ -> A NMatrix of shape [n,1] or [1,n] with any dtype.
984
+ * - +inc+ -> It's the increment used when searching. Use 1 except if you know
985
+ * what you're doing.
986
+ */
987
+ static VALUE nm_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx) {
988
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_imax, int, const int n, const void* x, const int incx);
989
+
990
+ nm::dtype_t dtype = NM_DTYPE(x);
991
+
992
+ int index = ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx));
993
+
994
+ // Convert to Ruby's Int value.
995
+ return INT2FIX(index);
996
+ }
997
+
998
+
999
+ /* Call any of the cblas_xgemm functions as directly as possible.
1000
+ *
1001
+ * The cblas_xgemm functions (dgemm, sgemm, cgemm, and zgemm) define the following operation:
1002
+ *
1003
+ * C = alpha*op(A)*op(B) + beta*C
1004
+ *
1005
+ * where op(X) is one of <tt>op(X) = X</tt>, <tt>op(X) = X**T</tt>, or the complex conjugate of X.
1006
+ *
1007
+ * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
1008
+ * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
1009
+ * expose the ultra-optimized ATLAS versions.
1010
+ *
1011
+ * == Arguments
1012
+ * See: http://www.netlib.org/blas/dgemm.f
1013
+ *
1014
+ * You probably don't want to call this function. Instead, why don't you try gemm, which is more flexible
1015
+ * with its arguments?
1016
+ *
1017
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1018
+ * handling, so you can easily crash Ruby!
1019
+ */
1020
+ static VALUE nm_cblas_gemm(VALUE self,
1021
+ VALUE order,
1022
+ VALUE trans_a, VALUE trans_b,
1023
+ VALUE m, VALUE n, VALUE k,
1024
+ VALUE alpha,
1025
+ VALUE a, VALUE lda,
1026
+ VALUE b, VALUE ldb,
1027
+ VALUE beta,
1028
+ VALUE c, VALUE ldc)
1029
+ {
1030
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemm, void, const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, int m, int n, int k, void* alpha, void* a, int lda, void* b, int ldb, void* beta, void* c, int ldc);
1031
+
1032
+ nm::dtype_t dtype = NM_DTYPE(a);
1033
+
1034
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
1035
+ *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
1036
+ rubyval_to_cval(alpha, dtype, pAlpha);
1037
+ rubyval_to_cval(beta, dtype, pBeta);
1038
+
1039
+ ttable[dtype](blas_order_sym(order), blas_transpose_sym(trans_a), blas_transpose_sym(trans_b), FIX2INT(m), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
1040
+
1041
+ return c;
1042
+ }
1043
+
1044
+
1045
+ /* Call any of the cblas_xgemv functions as directly as possible.
1046
+ *
1047
+ * The cblas_xgemv functions (dgemv, sgemv, cgemv, and zgemv) define the following operation:
1048
+ *
1049
+ * y = alpha*op(A)*x + beta*y
1050
+ *
1051
+ * where op(A) is one of <tt>op(A) = A</tt>, <tt>op(A) = A**T</tt>, or the complex conjugate of A.
1052
+ *
1053
+ * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
1054
+ * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
1055
+ * expose the ultra-optimized ATLAS versions.
1056
+ *
1057
+ * == Arguments
1058
+ * See: http://www.netlib.org/blas/dgemm.f
1059
+ *
1060
+ * You probably don't want to call this function. Instead, why don't you try cblas_gemv, which is more flexible
1061
+ * with its arguments?
1062
+ *
1063
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1064
+ * handling, so you can easily crash Ruby!
1065
+ */
1066
+ static VALUE nm_cblas_gemv(VALUE self,
1067
+ VALUE trans_a,
1068
+ VALUE m, VALUE n,
1069
+ VALUE alpha,
1070
+ VALUE a, VALUE lda,
1071
+ VALUE x, VALUE incx,
1072
+ VALUE beta,
1073
+ VALUE y, VALUE incy)
1074
+ {
1075
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemv, bool, const enum CBLAS_TRANSPOSE, const int, const int, const void*, const void*, const int, const void*, const int, const void*, void*, const int)
1076
+
1077
+ nm::dtype_t dtype = NM_DTYPE(a);
1078
+
1079
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
1080
+ *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
1081
+ rubyval_to_cval(alpha, dtype, pAlpha);
1082
+ rubyval_to_cval(beta, dtype, pBeta);
1083
+
1084
+ return ttable[dtype](blas_transpose_sym(trans_a), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), pBeta, NM_STORAGE_DENSE(y)->elements, FIX2INT(incy)) ? Qtrue : Qfalse;
1085
+ }
1086
+
1087
+
1088
+ static VALUE nm_cblas_trsm(VALUE self,
1089
+ VALUE order,
1090
+ VALUE side, VALUE uplo,
1091
+ VALUE trans_a, VALUE diag,
1092
+ VALUE m, VALUE n,
1093
+ VALUE alpha,
1094
+ VALUE a, VALUE lda,
1095
+ VALUE b, VALUE ldb)
1096
+ {
1097
+ static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_SIDE, const enum CBLAS_UPLO,
1098
+ const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
1099
+ const int m, const int n, const void* alpha, const void* a,
1100
+ const int lda, void* b, const int ldb) = {
1101
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1102
+ nm::math::cblas_trsm<float>,
1103
+ nm::math::cblas_trsm<double>,
1104
+ nm::math::cblas_trsm<nm::Complex64>,
1105
+ nm::math::cblas_trsm<nm::Complex128>,
1106
+ nm::math::cblas_trsm<nm::RubyObject>
1107
+ };
1108
+
1109
+ nm::dtype_t dtype = NM_DTYPE(a);
1110
+
1111
+ if (!ttable[dtype]) {
1112
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1113
+ } else {
1114
+ void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
1115
+ rubyval_to_cval(alpha, dtype, pAlpha);
1116
+
1117
+ ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
1118
+ }
1119
+
1120
+ return Qtrue;
1121
+ }
1122
+
1123
+ /* Call any of the clapack_xgetrf functions as directly as possible.
1124
+ *
1125
+ * The clapack_getrf functions (dgetrf, sgetrf, cgetrf, and zgetrf) compute an LU factorization of a general M-by-N
1126
+ * matrix A using partial pivoting with row interchanges.
1127
+ *
1128
+ * The factorization has the form:
1129
+ * A = P * L * U
1130
+ * where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n),
1131
+ * and U is upper triangular (upper trapezoidal if m < n).
1132
+ *
1133
+ * This is the right-looking level 3 BLAS version of the algorithm.
1134
+ *
1135
+ * == Arguments
1136
+ * See: http://www.netlib.org/lapack/double/dgetrf.f
1137
+ * (You don't need argument 5; this is the value returned by this function.)
1138
+ *
1139
+ * You probably don't want to call this function. Instead, why don't you try clapack_getrf, which is more flexible
1140
+ * with its arguments?
1141
+ *
1142
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1143
+ * handling, so you can easily crash Ruby!
1144
+ *
1145
+ * Returns an array giving the pivot indices (normally these are argument #5).
1146
+ */
1147
+ static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda) {
1148
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int m, const int n, void* a, const int lda, int* ipiv) = {
1149
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1150
+ nm::math::clapack_getrf<float>,
1151
+ nm::math::clapack_getrf<double>,
1152
+ nm::math::clapack_getrf<nm::Complex64>,
1153
+ nm::math::clapack_getrf<nm::Complex128>,
1154
+ nm::math::clapack_getrf<nm::RubyObject>
1155
+ };
1156
+
1157
+ int M = FIX2INT(m),
1158
+ N = FIX2INT(n);
1159
+
1160
+ // Allocate the pivot index array, which is of size MIN(M, N).
1161
+ size_t ipiv_size = std::min(M,N);
1162
+ int* ipiv = NM_ALLOCA_N(int, ipiv_size);
1163
+
1164
+ if (!ttable[NM_DTYPE(a)]) {
1165
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1166
+ } else {
1167
+ // Call either our version of getrf or the LAPACK version.
1168
+ ttable[NM_DTYPE(a)](blas_order_sym(order), M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv);
1169
+ }
1170
+
1171
+ // Result will be stored in a. We return ipiv as an array.
1172
+ VALUE ipiv_array = rb_ary_new2(ipiv_size);
1173
+ for (size_t i = 0; i < ipiv_size; ++i) {
1174
+ rb_ary_store(ipiv_array, i, INT2FIX(ipiv[i]));
1175
+ }
1176
+
1177
+ return ipiv_array;
1178
+ }
1179
+
1180
+
1181
+ /*
1182
+ * Call any of the clapack_xgetrs functions as directly as possible.
1183
+ */
1184
+ static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb) {
1185
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE Trans, const int N,
1186
+ const int NRHS, const void* A, const int lda, const int* ipiv, void* B,
1187
+ const int ldb) = {
1188
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
1189
+ nm::math::clapack_getrs<float>,
1190
+ nm::math::clapack_getrs<double>,
1191
+ nm::math::clapack_getrs<nm::Complex64>,
1192
+ nm::math::clapack_getrs<nm::Complex128>,
1193
+ nm::math::clapack_getrs<nm::RubyObject>
1194
+ };
1195
+
1196
+ // Allocate the C version of the pivot index array
1197
+ int* ipiv_;
1198
+ if (!RB_TYPE_P(ipiv, T_ARRAY)) {
1199
+ rb_raise(rb_eArgError, "ipiv must be of type Array");
1200
+ } else {
1201
+ ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
1202
+ for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
1203
+ ipiv_[index] = FIX2INT( RARRAY_AREF(ipiv, index) );
1204
+ }
1205
+ }
1206
+
1207
+ if (!ttable[NM_DTYPE(a)]) {
1208
+ rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
1209
+ } else {
1210
+
1211
+ // Call either our version of getrs or the LAPACK version.
1212
+ ttable[NM_DTYPE(a)](blas_order_sym(order), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
1213
+ ipiv_, NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
1214
+ }
1215
+
1216
+ // b is both returned and modified directly in the argument list.
1217
+ return b;
1218
+ }
1219
+
1220
+ /*
1221
+ * Simple way to check from within Ruby code if clapack functions are available, without
1222
+ * having to wait around for an exception to be thrown.
1223
+ */
1224
+ static VALUE nm_has_clapack(VALUE self) {
1225
+ return Qfalse;
1226
+ }
1227
+
1228
+ /*
1229
+ * Call any of the clapack_xlaswp functions as directly as possible.
1230
+ *
1231
+ * Note that LAPACK's xlaswp functions accept a column-order matrix, but NMatrix uses row-order. Thus, n should be the
1232
+ * number of rows and lda should be the number of columns, no matter what it says in the documentation for dlaswp.f.
1233
+ */
1234
+ static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx) {
1235
+ static void (*ttable[nm::NUM_DTYPES])(const int n, void* a, const int lda, const int k1, const int k2, const int* ipiv, const int incx) = {
1236
+ nm::math::clapack_laswp<uint8_t>,
1237
+ nm::math::clapack_laswp<int8_t>,
1238
+ nm::math::clapack_laswp<int16_t>,
1239
+ nm::math::clapack_laswp<int32_t>,
1240
+ nm::math::clapack_laswp<int64_t>,
1241
+ nm::math::clapack_laswp<float>,
1242
+ nm::math::clapack_laswp<double>,
1243
+ nm::math::clapack_laswp<nm::Complex64>,
1244
+ nm::math::clapack_laswp<nm::Complex128>,
1245
+ nm::math::clapack_laswp<nm::RubyObject>
1246
+ };
1247
+
1248
+ // Allocate the C version of the pivot index array
1249
+ int* ipiv_;
1250
+ if (!RB_TYPE_P(ipiv, T_ARRAY)) {
1251
+ rb_raise(rb_eArgError, "ipiv must be of type Array");
1252
+ } else {
1253
+ ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
1254
+ for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
1255
+ ipiv_[index] = FIX2INT( RARRAY_AREF(ipiv, index) );
1256
+ }
1257
+ }
1258
+
1259
+ // Call either our version of laswp or the LAPACK version.
1260
+ ttable[NM_DTYPE(a)](FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), FIX2INT(k1), FIX2INT(k2), ipiv_, FIX2INT(incx));
1261
+
1262
+ // a is both returned and modified directly in the argument list.
1263
+ return a;
1264
+ }
1265
+
1266
+
1267
+ /*
1268
+ * C accessor for calculating an exact determinant. Dense matrix version.
1269
+ */
1270
+ void nm_math_det_exact_from_dense(const int M, const void* elements, const int lda,
1271
+ nm::dtype_t dtype, void* result) {
1272
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::det_exact_from_dense, void, const int M,
1273
+ const void* A_elements, const int lda, void* result_arg);
1274
+
1275
+ ttable[dtype](M, elements, lda, result);
1276
+ }
1277
+
1278
+ /*
1279
+ * C accessor for calculating an exact determinant. Yale matrix version.
1280
+ */
1281
+ void nm_math_det_exact_from_yale(const int M, const YALE_STORAGE* storage, const int lda,
1282
+ nm::dtype_t dtype, void* result) {
1283
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::det_exact_from_yale, void, const int M,
1284
+ const YALE_STORAGE* storage, const int lda, void* result_arg);
1285
+
1286
+ ttable[dtype](M, storage, lda, result);
1287
+ }
1288
+
1289
+ /*
1290
+ * C accessor for solving a system of linear equations.
1291
+ */
1292
+ void nm_math_solve(VALUE lu, VALUE b, VALUE x, VALUE ipiv) {
1293
+ int* pivot = new int[RARRAY_LEN(ipiv)];
1294
+
1295
+ for (int i = 0; i < RARRAY_LEN(ipiv); ++i) {
1296
+ pivot[i] = FIX2INT(rb_ary_entry(ipiv, i));
1297
+ }
1298
+
1299
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::solve, void, const int, const void*, const void*, void*, const int*);
1300
+
1301
+ ttable[NM_DTYPE(x)](NM_SHAPE0(b), NM_STORAGE_DENSE(lu)->elements,
1302
+ NM_STORAGE_DENSE(b)->elements, NM_STORAGE_DENSE(x)->elements, pivot);
1303
+ }
1304
+
1305
+ /*
1306
+ * C accessor for reducing a matrix to hessenberg form.
1307
+ */
1308
+ void nm_math_hessenberg(VALUE a) {
1309
+ static void (*ttable[nm::NUM_DTYPES])(const int, void*) = {
1310
+ NULL, NULL, NULL, NULL, NULL, // does not support ints
1311
+ nm::math::hessenberg<float>,
1312
+ nm::math::hessenberg<double>,
1313
+ NULL, NULL, // does not support Complex
1314
+ NULL // no support for Ruby Object
1315
+ };
1316
+
1317
+ ttable[NM_DTYPE(a)](NM_SHAPE0(a), NM_STORAGE_DENSE(a)->elements);
1318
+ }
1319
+ /*
1320
+ * C accessor for calculating an in-place inverse.
1321
+ */
1322
+ void nm_math_inverse(const int M, void* a_elements, nm::dtype_t dtype) {
1323
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::inverse, void, const int, void*);
1324
+
1325
+ ttable[dtype](M, a_elements);
1326
+ }
1327
+
1328
+ /*
1329
+ * C accessor for calculating an exact inverse. Dense matrix version.
1330
+ */
1331
+ void nm_math_inverse_exact_from_dense(const int M, const void* A_elements,
1332
+ const int lda, void* B_elements, const int ldb, nm::dtype_t dtype) {
1333
+
1334
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::inverse_exact_from_dense, void,
1335
+ const int, const void*, const int, void*, const int);
1336
+
1337
+ ttable[dtype](M, A_elements, lda, B_elements, ldb);
1338
+ }
1339
+
1340
+ /*
1341
+ * C accessor for calculating an exact inverse. Yale matrix version.
1342
+ */
1343
+ void nm_math_inverse_exact_from_yale(const int M, const YALE_STORAGE* storage,
1344
+ const int lda, YALE_STORAGE* inverse, const int ldb, nm::dtype_t dtype) {
1345
+
1346
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::inverse_exact_from_yale, void,
1347
+ const int, const YALE_STORAGE*, const int, YALE_STORAGE*, const int);
1348
+
1349
+ ttable[dtype](M, storage, lda, inverse, ldb);
1350
+ }
1351
+
1352
+ /*
1353
+ * Transpose an array of elements that represent a row-major dense matrix. Does not allocate anything, only does an memcpy.
1354
+ */
1355
+ void nm_math_transpose_generic(const size_t M, const size_t N, const void* A, const int lda, void* B, const int ldb, size_t element_size) {
1356
+ for (size_t i = 0; i < N; ++i) {
1357
+ for (size_t j = 0; j < M; ++j) {
1358
+
1359
+ memcpy(reinterpret_cast<char*>(B) + (i*ldb+j)*element_size,
1360
+ reinterpret_cast<const char*>(A) + (j*lda+i)*element_size,
1361
+ element_size);
1362
+
1363
+ }
1364
+ }
1365
+ }
1366
+
1367
+
1368
+ } // end of extern "C" block