nmatrix 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. data/.gitignore +27 -0
  2. data/.rspec +2 -0
  3. data/Gemfile +3 -5
  4. data/Guardfile +6 -0
  5. data/History.txt +33 -0
  6. data/Manifest.txt +41 -38
  7. data/README.rdoc +88 -11
  8. data/Rakefile +35 -53
  9. data/ext/nmatrix/data/complex.h +372 -0
  10. data/ext/nmatrix/data/data.cpp +275 -0
  11. data/ext/nmatrix/data/data.h +707 -0
  12. data/ext/nmatrix/data/rational.h +421 -0
  13. data/ext/nmatrix/data/ruby_object.h +446 -0
  14. data/ext/nmatrix/extconf.rb +101 -51
  15. data/ext/nmatrix/new_extconf.rb +56 -0
  16. data/ext/nmatrix/nmatrix.cpp +1609 -0
  17. data/ext/nmatrix/nmatrix.h +265 -849
  18. data/ext/nmatrix/ruby_constants.cpp +134 -0
  19. data/ext/nmatrix/ruby_constants.h +103 -0
  20. data/ext/nmatrix/storage/common.cpp +70 -0
  21. data/ext/nmatrix/storage/common.h +170 -0
  22. data/ext/nmatrix/storage/dense.cpp +665 -0
  23. data/ext/nmatrix/storage/dense.h +116 -0
  24. data/ext/nmatrix/storage/list.cpp +1088 -0
  25. data/ext/nmatrix/storage/list.h +129 -0
  26. data/ext/nmatrix/storage/storage.cpp +658 -0
  27. data/ext/nmatrix/storage/storage.h +99 -0
  28. data/ext/nmatrix/storage/yale.cpp +1601 -0
  29. data/ext/nmatrix/storage/yale.h +208 -0
  30. data/ext/nmatrix/ttable_helper.rb +126 -0
  31. data/ext/nmatrix/{yale/smmp1_header.template.c → types.h} +36 -9
  32. data/ext/nmatrix/util/io.cpp +295 -0
  33. data/ext/nmatrix/util/io.h +117 -0
  34. data/ext/nmatrix/util/lapack.h +1175 -0
  35. data/ext/nmatrix/util/math.cpp +557 -0
  36. data/ext/nmatrix/util/math.h +1363 -0
  37. data/ext/nmatrix/util/sl_list.cpp +475 -0
  38. data/ext/nmatrix/util/sl_list.h +255 -0
  39. data/ext/nmatrix/util/util.h +78 -0
  40. data/lib/nmatrix/blas.rb +70 -0
  41. data/lib/nmatrix/io/mat5_reader.rb +567 -0
  42. data/lib/nmatrix/io/mat_reader.rb +162 -0
  43. data/lib/{string.rb → nmatrix/monkeys.rb} +49 -2
  44. data/lib/nmatrix/nmatrix.rb +199 -0
  45. data/lib/nmatrix/nvector.rb +103 -0
  46. data/lib/nmatrix/version.rb +27 -0
  47. data/lib/nmatrix.rb +22 -230
  48. data/nmatrix.gemspec +59 -0
  49. data/scripts/mac-brew-gcc.sh +47 -0
  50. data/spec/4x4_sparse.mat +0 -0
  51. data/spec/4x5_dense.mat +0 -0
  52. data/spec/blas_spec.rb +47 -0
  53. data/spec/elementwise_spec.rb +164 -0
  54. data/spec/io_spec.rb +60 -0
  55. data/spec/lapack_spec.rb +52 -0
  56. data/spec/math_spec.rb +96 -0
  57. data/spec/nmatrix_spec.rb +93 -89
  58. data/spec/nmatrix_yale_spec.rb +52 -36
  59. data/spec/nvector_spec.rb +1 -1
  60. data/spec/slice_spec.rb +257 -0
  61. data/spec/spec_helper.rb +51 -0
  62. data/spec/utm5940.mtx +83844 -0
  63. metadata +113 -71
  64. data/.autotest +0 -23
  65. data/.gemtest +0 -0
  66. data/ext/nmatrix/cblas.c +0 -150
  67. data/ext/nmatrix/dense/blas_header.template.c +0 -52
  68. data/ext/nmatrix/dense/elementwise.template.c +0 -107
  69. data/ext/nmatrix/dense/gemm.template.c +0 -159
  70. data/ext/nmatrix/dense/gemv.template.c +0 -130
  71. data/ext/nmatrix/dense/rationalmath.template.c +0 -68
  72. data/ext/nmatrix/dense.c +0 -307
  73. data/ext/nmatrix/depend +0 -18
  74. data/ext/nmatrix/generator/syntax_tree.rb +0 -481
  75. data/ext/nmatrix/generator.rb +0 -594
  76. data/ext/nmatrix/list.c +0 -774
  77. data/ext/nmatrix/nmatrix.c +0 -1977
  78. data/ext/nmatrix/rational.c +0 -98
  79. data/ext/nmatrix/yale/complexmath.template.c +0 -71
  80. data/ext/nmatrix/yale/elementwise.template.c +0 -46
  81. data/ext/nmatrix/yale/elementwise_op.template.c +0 -73
  82. data/ext/nmatrix/yale/numbmm.template.c +0 -94
  83. data/ext/nmatrix/yale/smmp1.template.c +0 -21
  84. data/ext/nmatrix/yale/smmp2.template.c +0 -43
  85. data/ext/nmatrix/yale/smmp2_header.template.c +0 -46
  86. data/ext/nmatrix/yale/sort_columns.template.c +0 -56
  87. data/ext/nmatrix/yale/symbmm.template.c +0 -54
  88. data/ext/nmatrix/yale/transp.template.c +0 -68
  89. data/ext/nmatrix/yale.c +0 -726
  90. data/lib/array.rb +0 -67
  91. data/spec/syntax_tree_spec.rb +0 -46
@@ -0,0 +1,557 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012, Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == math.cpp
25
+ //
26
+ // Ruby-exposed BLAS functions.
27
+ //
28
+ // === Procedure for adding LAPACK or CBLAS functions to math.cpp/math.h:
29
+ //
30
+ // This procedure is written as if for a fictional function with double
31
+ // version dbacon, which we'll say is from LAPACK.
32
+ //
33
+ // 1. Write a default templated version which probably returns a boolean.
34
+ // Call it bacon, and put it in math.h.
35
+ //
36
+ // Order will always be row-major, so we don't need to pass that.
37
+ // CBLAS_TRANSPOSE-type arguments, however, should be passed.
38
+ //
39
+ // Otherwise, arguments should look like those in cblas.h or clapack.h:
40
+ //
41
+ // template <typename DType>
42
+ // bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, DType* A, ...) {
43
+ // rb_raise(rb_eNotImpError, "only implemented for ATLAS types (float32, float64, complex64, complex128)");
44
+ // }
45
+ //
46
+ // 2. In math.cpp, add a templated inline static version of the function which takes
47
+ // only void* pointers and uses reinterpret_cast to convert them to the
48
+ // proper dtype.
49
+ //
50
+ // This function may also need to switch m and n if these arguments are given.
51
+ //
52
+ // For an example, see cblas_gemm. This function should do nothing other than cast
53
+ // appropriately. If clapack_dbacon, clapack_sbacon, clapack_cbacon, and clapack_zbacon
54
+ // all take void* only, and no other pointers that vary between functions, you can skip
55
+ // this particular step -- as we can call them directly using a custom function pointer
56
+ // array (same function signature!).
57
+ //
58
+ // This version of the function will be the one exposed through NMatrix::LAPACK. We
59
+ // want it to be as close to the actual LAPACK version of the function as possible,
60
+ // and with as few checks as possible.
61
+ //
62
+ // You will probably need a forward declaration in the extern "C" block.
63
+ //
64
+ // Note: In that case, the function you wrote in Step 1 should also take exactly the
65
+ // same arguments as clapack_xbacon. Otherwise Bad Things will happen.
66
+ //
67
+ // 3. In math.cpp, add inline specialized versions of bacon for the different ATLAS types.
68
+ //
69
+ // You could do this with a macro, if the arguments are all similar (see #define LAPACK_GETRF).
70
+ // Or you may prefer to do it by hand:
71
+ //
72
+ // template <>
73
+ // inline bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, float* A, ...) {
74
+ // clapack_sbacon(trans, M, N, A, ...);
75
+ // return true;
76
+ // }
77
+ //
78
+ // Make sure these functions are in the namespace nm::math.
79
+ //
80
+ // Note that you should do everything in your power here to parse any return values
81
+ // clapack_sbacon may give you. We're not trying very hard in this example, but you might
82
+ // look at getrf to see how it might be done.
83
+ //
84
+ // 4. Expose the function in nm_math_init_blas(), in math.cpp:
85
+ //
86
+ // rb_define_singleton_method(cNMatrix_LAPACK, "clapack_bacon", (METHOD)nm_lapack_bacon, 5);
87
+ //
88
+ // Here, we're telling Ruby that nm_lapack_bacon takes five arguments as a Ruby function.
89
+ //
90
+ // 5. In blas.rb, write a bacon function which accesses clapack_bacon, but does all the
91
+ // sanity checks we left out in step 2.
92
+ //
93
+ // 6. Write tests for NMatrix::LAPACK::getrf, confirming that it works for the ATLAS dtypes.
94
+ //
95
+ // 7. After you get it working properly with ATLAS, download dbacon.f from NETLIB, and use
96
+ // f2c to convert it to C. Clean it up so it's readable. Remove the extra indices -- f2c
97
+ // inserts a lot of unnecessary stuff.
98
+ //
99
+ // Copy and paste the output into the default templated function you wrote in Step 1.
100
+ // Fix it so it works as a template instead of just for doubles.
101
+ //
102
+ // 8. Write tests to confirm that it works for integers, rationals, and Ruby objects.
103
+ //
104
+ // 9. See about adding a Ruby-like interface, such as matrix_matrix_multiply for cblas_gemm,
105
+ // or matrix_vector_multiply for cblas_gemv. This step is not mandatory.
106
+ //
107
+ // 10. Pull request!
108
+
109
+
110
+
111
+ /*
112
+ * Project Includes
113
+ */
114
+
115
+ #include "math.h"
116
+ #include "lapack.h"
117
+
118
+ #include "nmatrix.h"
119
+ #include "ruby_constants.h"
120
+
121
+ /*
122
+ * Forward Declarations
123
+ */
124
+
125
+ extern "C" {
126
+ #ifdef HAVE_CLAPACK_H
127
+ #include <clapack.h>
128
+ #endif
129
+
130
+ static VALUE nm_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha,
131
+ VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc);
132
+
133
+ static VALUE nm_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
134
+ VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);
135
+
136
+ static VALUE nm_cblas_trsm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
137
+ VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
138
+
139
+ static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda);
140
+
141
+ static VALUE nm_clapack_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx);
142
+
143
+ } // end of extern "C" block
144
+
145
+ ////////////////////
146
+ // Math Functions //
147
+ ////////////////////
148
+
149
+ namespace nm { namespace math {
150
+
151
+ /*
152
+ * Calculate the determinant for a dense matrix (A [elements]) of size 2 or 3. Return the result.
153
+ */
154
+ template <typename DType>
155
+ void det_exact(const int M, const void* A_elements, const int lda, void* result_arg) {
156
+ DType* result = reinterpret_cast<DType*>(result_arg);
157
+ const DType* A = reinterpret_cast<const DType*>(A_elements);
158
+
159
+ typename LongDType<DType>::type x, y;
160
+
161
+ if (M == 2) {
162
+ *result = A[0] * A[lda+1] - A[1] * A[lda];
163
+
164
+ } else if (M == 3) {
165
+ x = A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]; // ei - fh
166
+ y = A[lda] * A[2*lda+2] - A[lda+2] * A[2*lda]; // fg - di
167
+ x = A[0]*x - A[1]*y ; // a*(ei-fh) - b*(fg-di)
168
+
169
+ y = A[lda] * A[2*lda+1] - A[lda+1] * A[2*lda]; // dh - eg
170
+ *result = A[2]*y + x; // c*(dh-eg) + _
171
+ } else if (M < 2) {
172
+ rb_raise(rb_eArgError, "can only calculate exact determinant of a square matrix of size 2 or larger");
173
+ } else {
174
+ rb_raise(rb_eNotImpError, "exact determinant calculation needed for matrices larger than 3x3");
175
+ }
176
+ }
177
+
178
+
179
+
180
+
181
+ /*
182
+ * Function signature conversion for calling CBLAS' gemm functions as directly as possible.
183
+ *
184
+ * For documentation: http://www.netlib.org/blas/dgemm.f
185
+ */
186
+ template <typename DType>
187
+ inline static void cblas_gemm(const enum CBLAS_ORDER order,
188
+ const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b,
189
+ int m, int n, int k,
190
+ void* alpha,
191
+ void* a, int lda,
192
+ void* b, int ldb,
193
+ void* beta,
194
+ void* c, int ldc)
195
+ {
196
+ gemm<DType>(order, trans_a, trans_b, m, n, k, reinterpret_cast<DType*>(alpha),
197
+ reinterpret_cast<DType*>(a), lda,
198
+ reinterpret_cast<DType*>(b), ldb, reinterpret_cast<DType*>(beta),
199
+ reinterpret_cast<DType*>(c), ldc);
200
+ }
201
+
202
+
203
+ /*
204
+ * Function signature conversion for calling CBLAS's gemv functions as directly as possible.
205
+ *
206
+ * For documentation: http://www.netlib.org/lapack/double/dgetrf.f
207
+ */
208
+ template <typename DType>
209
+ inline static bool cblas_gemv(const enum CBLAS_TRANSPOSE trans_a,
210
+ int m, int n,
211
+ void* alpha,
212
+ void* a, int lda,
213
+ void* x, int incx,
214
+ void* beta,
215
+ void* y, int incy)
216
+ {
217
+ return gemv<DType>(trans_a,
218
+ m, n, reinterpret_cast<DType*>(alpha),
219
+ reinterpret_cast<DType*>(a), lda,
220
+ reinterpret_cast<DType*>(x), incx, reinterpret_cast<DType*>(beta),
221
+ reinterpret_cast<DType*>(y), incy);
222
+ }
223
+
224
+
225
+ /*
226
+ * Function signature conversion for calling CBLAS' trsm functions as directly as possible.
227
+ *
228
+ * For documentation: http://www.netlib.org/blas/dtrsm.f
229
+ */
230
+ template <typename DType>
231
+ inline static void cblas_trsm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
232
+ const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_DIAG diag,
233
+ const int m, const int n, const void* alpha, const void* a,
234
+ const int lda, void* b, const int ldb)
235
+ {
236
+ trsm<DType>(order, side, uplo, trans_a, diag, m, n, *reinterpret_cast<const DType*>(alpha),
237
+ reinterpret_cast<const DType*>(a), lda, reinterpret_cast<DType*>(b), ldb);
238
+ }
239
+
240
+
241
+
242
+ }} // end of namespace nm::math
243
+
244
+
245
+ extern "C" {
246
+
247
+ ///////////////////
248
+ // Ruby Bindings //
249
+ ///////////////////
250
+
251
+ void nm_math_init_blas() {
252
+ cNMatrix_LAPACK = rb_define_module_under(cNMatrix, "LAPACK");
253
+
254
+ rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getrf", (METHOD)nm_clapack_getrf, 5);
255
+ rb_define_singleton_method(cNMatrix_LAPACK, "clapack_scal", (METHOD)nm_clapack_scal, 4);
256
+
257
+ cNMatrix_BLAS = rb_define_module_under(cNMatrix, "BLAS");
258
+
259
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemm", (METHOD)nm_cblas_gemm, 14);
260
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11);
261
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_trsm", (METHOD)nm_cblas_trsm, 12);
262
+ }
263
+
264
+
265
+ /* Interprets cblas argument which could be any of false/:no_transpose, :transpose, or :complex_conjugate,
266
+ * into an enum recognized by cblas.
267
+ *
268
+ * Called by nm_cblas_gemm -- basically inline.
269
+ *
270
+ */
271
+ static inline enum CBLAS_TRANSPOSE blas_transpose_sym(VALUE op) {
272
+ if (op == Qfalse || rb_to_id(op) == nm_rb_no_transpose) return CblasNoTrans;
273
+ else if (rb_to_id(op) == nm_rb_transpose) return CblasTrans;
274
+ else if (rb_to_id(op) == nm_rb_complex_conjugate) return CblasConjTrans;
275
+ else rb_raise(rb_eArgError, "Expected false, :transpose, or :complex_conjugate");
276
+ return CblasNoTrans;
277
+ }
278
+
279
+ /*
280
+ * Interprets cblas argument which could be :left or :right
281
+ *
282
+ * Called by nm_cblas_trsm -- basically inline
283
+ */
284
+ static inline enum CBLAS_SIDE blas_side_sym(VALUE op) {
285
+ ID op_id = rb_to_id(op);
286
+ if (op_id == nm_rb_left) return CblasLeft;
287
+ if (op_id == nm_rb_right) return CblasRight;
288
+ rb_raise(rb_eArgError, "Expected :left or :right for side argument");
289
+ return CblasLeft;
290
+ }
291
+
292
+ /*
293
+ * Interprets cblas argument which could be :upper or :lower
294
+ *
295
+ * Called by nm_cblas_trsm -- basically inline
296
+ */
297
+ static inline enum CBLAS_UPLO blas_uplo_sym(VALUE op) {
298
+ ID op_id = rb_to_id(op);
299
+ if (op_id == nm_rb_upper) return CblasUpper;
300
+ if (op_id == nm_rb_lower) return CblasLower;
301
+ rb_raise(rb_eArgError, "Expected :upper or :lower for uplo argument");
302
+ return CblasUpper;
303
+ }
304
+
305
+
306
+ /*
307
+ * Interprets cblas argument which could be :unit (true) or :nonunit (false or anything other than true/:unit)
308
+ *
309
+ * Called by nm_cblas_trsm -- basically inline
310
+ */
311
+ static inline enum CBLAS_DIAG blas_diag_sym(VALUE op) {
312
+ if (rb_to_id(op) == nm_rb_unit || op == Qtrue) return CblasUnit;
313
+ return CblasNonUnit;
314
+ }
315
+
316
+ /*
317
+ * Interprets cblas argument which could be :row or :col
318
+ */
319
+ static inline enum CBLAS_ORDER blas_order_sym(VALUE op) {
320
+ if (rb_to_id(op) == rb_intern("row") || rb_to_id(op) == rb_intern("row_major")) return CblasRowMajor;
321
+ else if (rb_to_id(op) == rb_intern("col") || rb_to_id(op) == rb_intern("col_major") ||
322
+ rb_to_id(op) == rb_intern("column") || rb_to_id(op) == rb_intern("column_major")) return CblasColMajor;
323
+ rb_raise(rb_eArgError, "Expected :row or :col for order argument");
324
+ return CblasRowMajor;
325
+ }
326
+
327
+
328
+ /* Call any of the cblas_xgemm functions as directly as possible.
329
+ *
330
+ * The cblas_xgemm functions (dgemm, sgemm, cgemm, and zgemm) define the following operation:
331
+ *
332
+ * C = alpha*op(A)*op(B) + beta*C
333
+ *
334
+ * where op(X) is one of <tt>op(X) = X</tt>, <tt>op(X) = X**T</tt>, or the complex conjugate of X.
335
+ *
336
+ * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
337
+ * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
338
+ * expose the ultra-optimized ATLAS versions.
339
+ *
340
+ * == Arguments
341
+ * See: http://www.netlib.org/blas/dgemm.f
342
+ *
343
+ * You probably don't want to call this function. Instead, why don't you try cblas_gemm, which is more flexible
344
+ * with its arguments?
345
+ *
346
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
347
+ * handling, so you can easily crash Ruby!
348
+ */
349
+ static VALUE nm_cblas_gemm(VALUE self,
350
+ VALUE order,
351
+ VALUE trans_a, VALUE trans_b,
352
+ VALUE m, VALUE n, VALUE k,
353
+ VALUE alpha,
354
+ VALUE a, VALUE lda,
355
+ VALUE b, VALUE ldb,
356
+ VALUE beta,
357
+ VALUE c, VALUE ldc)
358
+ {
359
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemm, void, const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, int m, int n, int k, void* alpha, void* a, int lda, void* b, int ldb, void* beta, void* c, int ldc);
360
+
361
+ dtype_t dtype = NM_DTYPE(a);
362
+
363
+ void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]),
364
+ *pBeta = ALLOCA_N(char, DTYPE_SIZES[dtype]);
365
+ rubyval_to_cval(alpha, dtype, pAlpha);
366
+ rubyval_to_cval(beta, dtype, pBeta);
367
+
368
+ ttable[dtype](blas_order_sym(order), blas_transpose_sym(trans_a), blas_transpose_sym(trans_b), FIX2INT(m), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
369
+
370
+ return c;
371
+ }
372
+
373
+
374
+ /* Call any of the cblas_xgemv functions as directly as possible.
375
+ *
376
+ * The cblas_xgemv functions (dgemv, sgemv, cgemv, and zgemv) define the following operation:
377
+ *
378
+ * y = alpha*op(A)*x + beta*y
379
+ *
380
+ * where op(A) is one of <tt>op(A) = A</tt>, <tt>op(A) = A**T</tt>, or the complex conjugate of A.
381
+ *
382
+ * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
383
+ * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
384
+ * expose the ultra-optimized ATLAS versions.
385
+ *
386
+ * == Arguments
387
+ * See: http://www.netlib.org/blas/dgemm.f
388
+ *
389
+ * You probably don't want to call this function. Instead, why don't you try cblas_gemv, which is more flexible
390
+ * with its arguments?
391
+ *
392
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
393
+ * handling, so you can easily crash Ruby!
394
+ */
395
+ static VALUE nm_cblas_gemv(VALUE self,
396
+ VALUE trans_a,
397
+ VALUE m, VALUE n,
398
+ VALUE alpha,
399
+ VALUE a, VALUE lda,
400
+ VALUE x, VALUE incx,
401
+ VALUE beta,
402
+ VALUE y, VALUE incy)
403
+ {
404
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemv, bool, const enum CBLAS_TRANSPOSE trans_a, int m, int n, void* alpha, void* a, int lda, void* x, int incx, void* beta, void* y, int incy);
405
+
406
+ dtype_t dtype = NM_DTYPE(a);
407
+
408
+ void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]),
409
+ *pBeta = ALLOCA_N(char, DTYPE_SIZES[dtype]);
410
+ rubyval_to_cval(alpha, dtype, pAlpha);
411
+ rubyval_to_cval(beta, dtype, pBeta);
412
+
413
+ return ttable[dtype](blas_transpose_sym(trans_a), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), pBeta, NM_STORAGE_DENSE(y)->elements, FIX2INT(incy)) ? Qtrue : Qfalse;
414
+ }
415
+
416
+
417
+ static VALUE nm_cblas_trsm(VALUE self,
418
+ VALUE order,
419
+ VALUE side, VALUE uplo,
420
+ VALUE trans_a, VALUE diag,
421
+ VALUE m, VALUE n,
422
+ VALUE alpha,
423
+ VALUE a, VALUE lda,
424
+ VALUE b, VALUE ldb)
425
+ {
426
+ static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_SIDE, const enum CBLAS_UPLO,
427
+ const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
428
+ const int, const int, const void* alpha, const void* a,
429
+ const int lda, void* b, const int ldb) = {
430
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
431
+ nm::math::cblas_trsm<float>,
432
+ nm::math::cblas_trsm<double>,
433
+ cblas_ctrsm, cblas_ztrsm, // call directly, same function signature!
434
+ nm::math::cblas_trsm<nm::Rational32>,
435
+ nm::math::cblas_trsm<nm::Rational64>,
436
+ nm::math::cblas_trsm<nm::Rational128>,
437
+ nm::math::cblas_trsm<nm::RubyObject>
438
+ };
439
+
440
+ dtype_t dtype = NM_DTYPE(a);
441
+
442
+ void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]);
443
+ rubyval_to_cval(alpha, dtype, pAlpha);
444
+
445
+ ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
446
+
447
+ return Qtrue;
448
+ }
449
+
450
+
451
+ /*
452
+ * Based on LAPACK's dscal function, but for any dtype.
453
+ *
454
+ * In-place modification; returns the modified vector as well.
455
+ */
456
+ static VALUE nm_clapack_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx) {
457
+ dtype_t dtype = NM_DTYPE(vector);
458
+
459
+ void* da = ALLOCA_N(char, DTYPE_SIZES[dtype]);
460
+ rubyval_to_cval(scale, dtype, da);
461
+
462
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::clapack_scal, void, const int n, const void* da, void* dx, const int incx);
463
+
464
+ ttable[dtype](FIX2INT(n), da, NM_STORAGE_DENSE(vector)->elements, FIX2INT(incx));
465
+
466
+ return vector;
467
+ }
468
+
469
+
470
+ /* Call any of the clpack_xgetrf functions as directly as possible.
471
+ *
472
+ * The clapack_getrf functions (dgetrf, sgetrf, cgetrf, and zgetrf) compute an LU factorization of a general M-by-N
473
+ * matrix A using partial pivoting with row interchanges.
474
+ *
475
+ * The factorization has the form:
476
+ * A = P * L * U
477
+ * where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n),
478
+ * and U is upper triangular (upper trapezoidal if m < n).
479
+ *
480
+ * This is the right-looking level 3 BLAS version of the algorithm.
481
+ *
482
+ * == Arguments
483
+ * See: http://www.netlib.org/lapack/double/dgetrf.f
484
+ * (You don't need argument 5; this is the value returned by this function.)
485
+ *
486
+ * You probably don't want to call this function. Instead, why don't you try clapack_getrf, which is more flexible
487
+ * with its arguments?
488
+ *
489
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
490
+ * handling, so you can easily crash Ruby!
491
+ *
492
+ * Returns an array giving the pivot indices (normally these are argument #5).
493
+ */
494
+ static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda) {
495
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int m, const int n, void* a, const int lda, int* ipiv) = {
496
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
497
+ nm::math::clapack_getrf<float>,
498
+ nm::math::clapack_getrf<double>,
499
+ #ifdef HAVE_CLAPACK_H
500
+ clapack_cgetrf, clapack_zgetrf, // call directly, same function signature!
501
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
502
+ nm::math::clapack_getrf<nm::Complex64>,
503
+ nm::math::clapack_getrf<nm::Complex128>,
504
+ #endif
505
+ nm::math::clapack_getrf<nm::Rational32>,
506
+ nm::math::clapack_getrf<nm::Rational64>,
507
+ nm::math::clapack_getrf<nm::Rational128>,
508
+ nm::math::clapack_getrf<nm::RubyObject>
509
+ };
510
+
511
+ int M = FIX2INT(m),
512
+ N = FIX2INT(n);
513
+
514
+ // Allocate the pivot index array, which is of size MIN(M, N).
515
+ size_t ipiv_size = std::min(M,N);
516
+ int* ipiv = ALLOCA_N(int, ipiv_size);
517
+
518
+ // Call either our version of getrf or the LAPACK version.
519
+ ttable[NM_DTYPE(a)](blas_order_sym(order), M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv);
520
+
521
+ // Result will be stored in a. We return ipiv as an array.
522
+ VALUE ipiv_array = rb_ary_new2(ipiv_size);
523
+ for (size_t i = 0; i < ipiv_size; ++i) {
524
+ rb_ary_store(ipiv_array, i, INT2FIX(ipiv[i]));
525
+ }
526
+
527
+ return ipiv_array;
528
+ }
529
+
530
+
531
+ /*
532
+ * C accessor for calculating an exact determinant.
533
+ */
534
+ void nm_math_det_exact(const int M, const void* elements, const int lda, dtype_t dtype, void* result) {
535
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::det_exact, void, const int M, const void* A_elements, const int lda, void* result_arg);
536
+
537
+ ttable[dtype](M, elements, lda, result);
538
+ }
539
+
540
+
541
+ /*
542
+ * Transpose an array of elements that represent a row-major dense matrix. Does not allocate anything, only does an memcpy.
543
+ */
544
+ void nm_math_transpose_generic(const size_t M, const size_t N, const void* A, const int lda, void* B, const int ldb, size_t element_size) {
545
+ for (size_t i = 0; i < N; ++i) {
546
+ for (size_t j = 0; j < M; ++j) {
547
+
548
+ memcpy(reinterpret_cast<char*>(B) + (i*ldb+j)*element_size,
549
+ reinterpret_cast<const char*>(A) + (j*lda+i)*element_size,
550
+ element_size);
551
+
552
+ }
553
+ }
554
+ }
555
+
556
+
557
+ } // end of extern "C" block