nmatrix 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. data/.gitignore +27 -0
  2. data/.rspec +2 -0
  3. data/Gemfile +3 -5
  4. data/Guardfile +6 -0
  5. data/History.txt +33 -0
  6. data/Manifest.txt +41 -38
  7. data/README.rdoc +88 -11
  8. data/Rakefile +35 -53
  9. data/ext/nmatrix/data/complex.h +372 -0
  10. data/ext/nmatrix/data/data.cpp +275 -0
  11. data/ext/nmatrix/data/data.h +707 -0
  12. data/ext/nmatrix/data/rational.h +421 -0
  13. data/ext/nmatrix/data/ruby_object.h +446 -0
  14. data/ext/nmatrix/extconf.rb +101 -51
  15. data/ext/nmatrix/new_extconf.rb +56 -0
  16. data/ext/nmatrix/nmatrix.cpp +1609 -0
  17. data/ext/nmatrix/nmatrix.h +265 -849
  18. data/ext/nmatrix/ruby_constants.cpp +134 -0
  19. data/ext/nmatrix/ruby_constants.h +103 -0
  20. data/ext/nmatrix/storage/common.cpp +70 -0
  21. data/ext/nmatrix/storage/common.h +170 -0
  22. data/ext/nmatrix/storage/dense.cpp +665 -0
  23. data/ext/nmatrix/storage/dense.h +116 -0
  24. data/ext/nmatrix/storage/list.cpp +1088 -0
  25. data/ext/nmatrix/storage/list.h +129 -0
  26. data/ext/nmatrix/storage/storage.cpp +658 -0
  27. data/ext/nmatrix/storage/storage.h +99 -0
  28. data/ext/nmatrix/storage/yale.cpp +1601 -0
  29. data/ext/nmatrix/storage/yale.h +208 -0
  30. data/ext/nmatrix/ttable_helper.rb +126 -0
  31. data/ext/nmatrix/{yale/smmp1_header.template.c → types.h} +36 -9
  32. data/ext/nmatrix/util/io.cpp +295 -0
  33. data/ext/nmatrix/util/io.h +117 -0
  34. data/ext/nmatrix/util/lapack.h +1175 -0
  35. data/ext/nmatrix/util/math.cpp +557 -0
  36. data/ext/nmatrix/util/math.h +1363 -0
  37. data/ext/nmatrix/util/sl_list.cpp +475 -0
  38. data/ext/nmatrix/util/sl_list.h +255 -0
  39. data/ext/nmatrix/util/util.h +78 -0
  40. data/lib/nmatrix/blas.rb +70 -0
  41. data/lib/nmatrix/io/mat5_reader.rb +567 -0
  42. data/lib/nmatrix/io/mat_reader.rb +162 -0
  43. data/lib/{string.rb → nmatrix/monkeys.rb} +49 -2
  44. data/lib/nmatrix/nmatrix.rb +199 -0
  45. data/lib/nmatrix/nvector.rb +103 -0
  46. data/lib/nmatrix/version.rb +27 -0
  47. data/lib/nmatrix.rb +22 -230
  48. data/nmatrix.gemspec +59 -0
  49. data/scripts/mac-brew-gcc.sh +47 -0
  50. data/spec/4x4_sparse.mat +0 -0
  51. data/spec/4x5_dense.mat +0 -0
  52. data/spec/blas_spec.rb +47 -0
  53. data/spec/elementwise_spec.rb +164 -0
  54. data/spec/io_spec.rb +60 -0
  55. data/spec/lapack_spec.rb +52 -0
  56. data/spec/math_spec.rb +96 -0
  57. data/spec/nmatrix_spec.rb +93 -89
  58. data/spec/nmatrix_yale_spec.rb +52 -36
  59. data/spec/nvector_spec.rb +1 -1
  60. data/spec/slice_spec.rb +257 -0
  61. data/spec/spec_helper.rb +51 -0
  62. data/spec/utm5940.mtx +83844 -0
  63. metadata +113 -71
  64. data/.autotest +0 -23
  65. data/.gemtest +0 -0
  66. data/ext/nmatrix/cblas.c +0 -150
  67. data/ext/nmatrix/dense/blas_header.template.c +0 -52
  68. data/ext/nmatrix/dense/elementwise.template.c +0 -107
  69. data/ext/nmatrix/dense/gemm.template.c +0 -159
  70. data/ext/nmatrix/dense/gemv.template.c +0 -130
  71. data/ext/nmatrix/dense/rationalmath.template.c +0 -68
  72. data/ext/nmatrix/dense.c +0 -307
  73. data/ext/nmatrix/depend +0 -18
  74. data/ext/nmatrix/generator/syntax_tree.rb +0 -481
  75. data/ext/nmatrix/generator.rb +0 -594
  76. data/ext/nmatrix/list.c +0 -774
  77. data/ext/nmatrix/nmatrix.c +0 -1977
  78. data/ext/nmatrix/rational.c +0 -98
  79. data/ext/nmatrix/yale/complexmath.template.c +0 -71
  80. data/ext/nmatrix/yale/elementwise.template.c +0 -46
  81. data/ext/nmatrix/yale/elementwise_op.template.c +0 -73
  82. data/ext/nmatrix/yale/numbmm.template.c +0 -94
  83. data/ext/nmatrix/yale/smmp1.template.c +0 -21
  84. data/ext/nmatrix/yale/smmp2.template.c +0 -43
  85. data/ext/nmatrix/yale/smmp2_header.template.c +0 -46
  86. data/ext/nmatrix/yale/sort_columns.template.c +0 -56
  87. data/ext/nmatrix/yale/symbmm.template.c +0 -54
  88. data/ext/nmatrix/yale/transp.template.c +0 -68
  89. data/ext/nmatrix/yale.c +0 -726
  90. data/lib/array.rb +0 -67
  91. data/spec/syntax_tree_spec.rb +0 -46
@@ -0,0 +1,557 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012, Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == math.cpp
25
+ //
26
+ // Ruby-exposed BLAS functions.
27
+ //
28
+ // === Procedure for adding LAPACK or CBLAS functions to math.cpp/math.h:
29
+ //
30
+ // This procedure is written as if for a fictional function with double
31
+ // version dbacon, which we'll say is from LAPACK.
32
+ //
33
+ // 1. Write a default templated version which probably returns a boolean.
34
+ // Call it bacon, and put it in math.h.
35
+ //
36
+ // Order will always be row-major, so we don't need to pass that.
37
+ // CBLAS_TRANSPOSE-type arguments, however, should be passed.
38
+ //
39
+ // Otherwise, arguments should look like those in cblas.h or clapack.h:
40
+ //
41
+ // template <typename DType>
42
+ // bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, DType* A, ...) {
43
+ // rb_raise(rb_eNotImpError, "only implemented for ATLAS types (float32, float64, complex64, complex128)");
44
+ // }
45
+ //
46
+ // 2. In math.cpp, add a templated inline static version of the function which takes
47
+ // only void* pointers and uses reinterpret_cast to convert them to the
48
+ // proper dtype.
49
+ //
50
+ // This function may also need to switch m and n if these arguments are given.
51
+ //
52
+ // For an example, see cblas_gemm. This function should do nothing other than cast
53
+ // appropriately. If clapack_dbacon, clapack_sbacon, clapack_cbacon, and clapack_zbacon
54
+ // all take void* only, and no other pointers that vary between functions, you can skip
55
+ // this particular step -- as we can call them directly using a custom function pointer
56
+ // array (same function signature!).
57
+ //
58
+ // This version of the function will be the one exposed through NMatrix::LAPACK. We
59
+ // want it to be as close to the actual LAPACK version of the function as possible,
60
+ // and with as few checks as possible.
61
+ //
62
+ // You will probably need a forward declaration in the extern "C" block.
63
+ //
64
+ // Note: In that case, the function you wrote in Step 1 should also take exactly the
65
+ // same arguments as clapack_xbacon. Otherwise Bad Things will happen.
66
+ //
67
+ // 3. In math.cpp, add inline specialized versions of bacon for the different ATLAS types.
68
+ //
69
+ // You could do this with a macro, if the arguments are all similar (see #define LAPACK_GETRF).
70
+ // Or you may prefer to do it by hand:
71
+ //
72
+ // template <>
73
+ // inline bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, float* A, ...) {
74
+ // clapack_sbacon(trans, M, N, A, ...);
75
+ // return true;
76
+ // }
77
+ //
78
+ // Make sure these functions are in the namespace nm::math.
79
+ //
80
+ // Note that you should do everything in your power here to parse any return values
81
+ // clapack_sbacon may give you. We're not trying very hard in this example, but you might
82
+ // look at getrf to see how it might be done.
83
+ //
84
+ // 4. Expose the function in nm_math_init_blas(), in math.cpp:
85
+ //
86
+ // rb_define_singleton_method(cNMatrix_LAPACK, "clapack_bacon", (METHOD)nm_lapack_bacon, 5);
87
+ //
88
+ // Here, we're telling Ruby that nm_lapack_bacon takes five arguments as a Ruby function.
89
+ //
90
+ // 5. In blas.rb, write a bacon function which accesses clapack_bacon, but does all the
91
+ // sanity checks we left out in step 2.
92
+ //
93
+ // 6. Write tests for NMatrix::LAPACK::getrf, confirming that it works for the ATLAS dtypes.
94
+ //
95
+ // 7. After you get it working properly with ATLAS, download dbacon.f from NETLIB, and use
96
+ // f2c to convert it to C. Clean it up so it's readable. Remove the extra indices -- f2c
97
+ // inserts a lot of unnecessary stuff.
98
+ //
99
+ // Copy and paste the output into the default templated function you wrote in Step 1.
100
+ // Fix it so it works as a template instead of just for doubles.
101
+ //
102
+ // 8. Write tests to confirm that it works for integers, rationals, and Ruby objects.
103
+ //
104
+ // 9. See about adding a Ruby-like interface, such as matrix_matrix_multiply for cblas_gemm,
105
+ // or matrix_vector_multiply for cblas_gemv. This step is not mandatory.
106
+ //
107
+ // 10. Pull request!
108
+
109
+
110
+
111
+ /*
112
+ * Project Includes
113
+ */
114
+
115
+ #include "math.h"
116
+ #include "lapack.h"
117
+
118
+ #include "nmatrix.h"
119
+ #include "ruby_constants.h"
120
+
121
+ /*
122
+ * Forward Declarations
123
+ */
124
+
125
+ extern "C" {
126
+ #ifdef HAVE_CLAPACK_H
127
+ #include <clapack.h>
128
+ #endif
129
+
130
+ static VALUE nm_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha,
131
+ VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc);
132
+
133
+ static VALUE nm_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
134
+ VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);
135
+
136
+ static VALUE nm_cblas_trsm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
137
+ VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
138
+
139
+ static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda);
140
+
141
+ static VALUE nm_clapack_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx);
142
+
143
+ } // end of extern "C" block
144
+
145
+ ////////////////////
146
+ // Math Functions //
147
+ ////////////////////
148
+
149
+ namespace nm { namespace math {
150
+
151
+ /*
152
+ * Calculate the determinant for a dense matrix (A [elements]) of size 2 or 3. Return the result.
153
+ */
154
+ template <typename DType>
155
+ void det_exact(const int M, const void* A_elements, const int lda, void* result_arg) {
156
+ DType* result = reinterpret_cast<DType*>(result_arg);
157
+ const DType* A = reinterpret_cast<const DType*>(A_elements);
158
+
159
+ typename LongDType<DType>::type x, y;
160
+
161
+ if (M == 2) {
162
+ *result = A[0] * A[lda+1] - A[1] * A[lda];
163
+
164
+ } else if (M == 3) {
165
+ x = A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]; // ei - fh
166
+ y = A[lda] * A[2*lda+2] - A[lda+2] * A[2*lda]; // fg - di
167
+ x = A[0]*x - A[1]*y ; // a*(ei-fh) - b*(fg-di)
168
+
169
+ y = A[lda] * A[2*lda+1] - A[lda+1] * A[2*lda]; // dh - eg
170
+ *result = A[2]*y + x; // c*(dh-eg) + _
171
+ } else if (M < 2) {
172
+ rb_raise(rb_eArgError, "can only calculate exact determinant of a square matrix of size 2 or larger");
173
+ } else {
174
+ rb_raise(rb_eNotImpError, "exact determinant calculation needed for matrices larger than 3x3");
175
+ }
176
+ }
177
+
178
+
179
+
180
+
181
+ /*
182
+ * Function signature conversion for calling CBLAS' gemm functions as directly as possible.
183
+ *
184
+ * For documentation: http://www.netlib.org/blas/dgemm.f
185
+ */
186
+ template <typename DType>
187
+ inline static void cblas_gemm(const enum CBLAS_ORDER order,
188
+ const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b,
189
+ int m, int n, int k,
190
+ void* alpha,
191
+ void* a, int lda,
192
+ void* b, int ldb,
193
+ void* beta,
194
+ void* c, int ldc)
195
+ {
196
+ gemm<DType>(order, trans_a, trans_b, m, n, k, reinterpret_cast<DType*>(alpha),
197
+ reinterpret_cast<DType*>(a), lda,
198
+ reinterpret_cast<DType*>(b), ldb, reinterpret_cast<DType*>(beta),
199
+ reinterpret_cast<DType*>(c), ldc);
200
+ }
201
+
202
+
203
+ /*
204
+ * Function signature conversion for calling CBLAS's gemv functions as directly as possible.
205
+ *
206
+ * For documentation: http://www.netlib.org/lapack/double/dgetrf.f
207
+ */
208
+ template <typename DType>
209
+ inline static bool cblas_gemv(const enum CBLAS_TRANSPOSE trans_a,
210
+ int m, int n,
211
+ void* alpha,
212
+ void* a, int lda,
213
+ void* x, int incx,
214
+ void* beta,
215
+ void* y, int incy)
216
+ {
217
+ return gemv<DType>(trans_a,
218
+ m, n, reinterpret_cast<DType*>(alpha),
219
+ reinterpret_cast<DType*>(a), lda,
220
+ reinterpret_cast<DType*>(x), incx, reinterpret_cast<DType*>(beta),
221
+ reinterpret_cast<DType*>(y), incy);
222
+ }
223
+
224
+
225
+ /*
226
+ * Function signature conversion for calling CBLAS' trsm functions as directly as possible.
227
+ *
228
+ * For documentation: http://www.netlib.org/blas/dtrsm.f
229
+ */
230
+ template <typename DType>
231
+ inline static void cblas_trsm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
232
+ const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_DIAG diag,
233
+ const int m, const int n, const void* alpha, const void* a,
234
+ const int lda, void* b, const int ldb)
235
+ {
236
+ trsm<DType>(order, side, uplo, trans_a, diag, m, n, *reinterpret_cast<const DType*>(alpha),
237
+ reinterpret_cast<const DType*>(a), lda, reinterpret_cast<DType*>(b), ldb);
238
+ }
239
+
240
+
241
+
242
+ }} // end of namespace nm::math
243
+
244
+
245
+ extern "C" {
246
+
247
+ ///////////////////
248
+ // Ruby Bindings //
249
+ ///////////////////
250
+
251
+ void nm_math_init_blas() {
252
+ cNMatrix_LAPACK = rb_define_module_under(cNMatrix, "LAPACK");
253
+
254
+ rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getrf", (METHOD)nm_clapack_getrf, 5);
255
+ rb_define_singleton_method(cNMatrix_LAPACK, "clapack_scal", (METHOD)nm_clapack_scal, 4);
256
+
257
+ cNMatrix_BLAS = rb_define_module_under(cNMatrix, "BLAS");
258
+
259
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemm", (METHOD)nm_cblas_gemm, 14);
260
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11);
261
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_trsm", (METHOD)nm_cblas_trsm, 12);
262
+ }
263
+
264
+
265
+ /* Interprets cblas argument which could be any of false/:no_transpose, :transpose, or :complex_conjugate,
266
+ * into an enum recognized by cblas.
267
+ *
268
+ * Called by nm_cblas_gemm -- basically inline.
269
+ *
270
+ */
271
+ static inline enum CBLAS_TRANSPOSE blas_transpose_sym(VALUE op) {
272
+ if (op == Qfalse || rb_to_id(op) == nm_rb_no_transpose) return CblasNoTrans;
273
+ else if (rb_to_id(op) == nm_rb_transpose) return CblasTrans;
274
+ else if (rb_to_id(op) == nm_rb_complex_conjugate) return CblasConjTrans;
275
+ else rb_raise(rb_eArgError, "Expected false, :transpose, or :complex_conjugate");
276
+ return CblasNoTrans;
277
+ }
278
+
279
+ /*
280
+ * Interprets cblas argument which could be :left or :right
281
+ *
282
+ * Called by nm_cblas_trsm -- basically inline
283
+ */
284
+ static inline enum CBLAS_SIDE blas_side_sym(VALUE op) {
285
+ ID op_id = rb_to_id(op);
286
+ if (op_id == nm_rb_left) return CblasLeft;
287
+ if (op_id == nm_rb_right) return CblasRight;
288
+ rb_raise(rb_eArgError, "Expected :left or :right for side argument");
289
+ return CblasLeft;
290
+ }
291
+
292
+ /*
293
+ * Interprets cblas argument which could be :upper or :lower
294
+ *
295
+ * Called by nm_cblas_trsm -- basically inline
296
+ */
297
+ static inline enum CBLAS_UPLO blas_uplo_sym(VALUE op) {
298
+ ID op_id = rb_to_id(op);
299
+ if (op_id == nm_rb_upper) return CblasUpper;
300
+ if (op_id == nm_rb_lower) return CblasLower;
301
+ rb_raise(rb_eArgError, "Expected :upper or :lower for uplo argument");
302
+ return CblasUpper;
303
+ }
304
+
305
+
306
+ /*
307
+ * Interprets cblas argument which could be :unit (true) or :nonunit (false or anything other than true/:unit)
308
+ *
309
+ * Called by nm_cblas_trsm -- basically inline
310
+ */
311
+ static inline enum CBLAS_DIAG blas_diag_sym(VALUE op) {
312
+ if (rb_to_id(op) == nm_rb_unit || op == Qtrue) return CblasUnit;
313
+ return CblasNonUnit;
314
+ }
315
+
316
+ /*
317
+ * Interprets cblas argument which could be :row or :col
318
+ */
319
+ static inline enum CBLAS_ORDER blas_order_sym(VALUE op) {
320
+ if (rb_to_id(op) == rb_intern("row") || rb_to_id(op) == rb_intern("row_major")) return CblasRowMajor;
321
+ else if (rb_to_id(op) == rb_intern("col") || rb_to_id(op) == rb_intern("col_major") ||
322
+ rb_to_id(op) == rb_intern("column") || rb_to_id(op) == rb_intern("column_major")) return CblasColMajor;
323
+ rb_raise(rb_eArgError, "Expected :row or :col for order argument");
324
+ return CblasRowMajor;
325
+ }
326
+
327
+
328
+ /* Call any of the cblas_xgemm functions as directly as possible.
329
+ *
330
+ * The cblas_xgemm functions (dgemm, sgemm, cgemm, and zgemm) define the following operation:
331
+ *
332
+ * C = alpha*op(A)*op(B) + beta*C
333
+ *
334
+ * where op(X) is one of <tt>op(X) = X</tt>, <tt>op(X) = X**T</tt>, or the complex conjugate of X.
335
+ *
336
+ * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
337
+ * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
338
+ * expose the ultra-optimized ATLAS versions.
339
+ *
340
+ * == Arguments
341
+ * See: http://www.netlib.org/blas/dgemm.f
342
+ *
343
+ * You probably don't want to call this function. Instead, why don't you try cblas_gemm, which is more flexible
344
+ * with its arguments?
345
+ *
346
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
347
+ * handling, so you can easily crash Ruby!
348
+ */
349
+ static VALUE nm_cblas_gemm(VALUE self,
350
+ VALUE order,
351
+ VALUE trans_a, VALUE trans_b,
352
+ VALUE m, VALUE n, VALUE k,
353
+ VALUE alpha,
354
+ VALUE a, VALUE lda,
355
+ VALUE b, VALUE ldb,
356
+ VALUE beta,
357
+ VALUE c, VALUE ldc)
358
+ {
359
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemm, void, const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, int m, int n, int k, void* alpha, void* a, int lda, void* b, int ldb, void* beta, void* c, int ldc);
360
+
361
+ dtype_t dtype = NM_DTYPE(a);
362
+
363
+ void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]),
364
+ *pBeta = ALLOCA_N(char, DTYPE_SIZES[dtype]);
365
+ rubyval_to_cval(alpha, dtype, pAlpha);
366
+ rubyval_to_cval(beta, dtype, pBeta);
367
+
368
+ ttable[dtype](blas_order_sym(order), blas_transpose_sym(trans_a), blas_transpose_sym(trans_b), FIX2INT(m), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
369
+
370
+ return c;
371
+ }
372
+
373
+
374
+ /* Call any of the cblas_xgemv functions as directly as possible.
375
+ *
376
+ * The cblas_xgemv functions (dgemv, sgemv, cgemv, and zgemv) define the following operation:
377
+ *
378
+ * y = alpha*op(A)*x + beta*y
379
+ *
380
+ * where op(A) is one of <tt>op(A) = A</tt>, <tt>op(A) = A**T</tt>, or the complex conjugate of A.
381
+ *
382
+ * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
383
+ * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
384
+ * expose the ultra-optimized ATLAS versions.
385
+ *
386
+ * == Arguments
387
+ * See: http://www.netlib.org/blas/dgemm.f
388
+ *
389
+ * You probably don't want to call this function. Instead, why don't you try cblas_gemv, which is more flexible
390
+ * with its arguments?
391
+ *
392
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
393
+ * handling, so you can easily crash Ruby!
394
+ */
395
+ static VALUE nm_cblas_gemv(VALUE self,
396
+ VALUE trans_a,
397
+ VALUE m, VALUE n,
398
+ VALUE alpha,
399
+ VALUE a, VALUE lda,
400
+ VALUE x, VALUE incx,
401
+ VALUE beta,
402
+ VALUE y, VALUE incy)
403
+ {
404
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemv, bool, const enum CBLAS_TRANSPOSE trans_a, int m, int n, void* alpha, void* a, int lda, void* x, int incx, void* beta, void* y, int incy);
405
+
406
+ dtype_t dtype = NM_DTYPE(a);
407
+
408
+ void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]),
409
+ *pBeta = ALLOCA_N(char, DTYPE_SIZES[dtype]);
410
+ rubyval_to_cval(alpha, dtype, pAlpha);
411
+ rubyval_to_cval(beta, dtype, pBeta);
412
+
413
+ return ttable[dtype](blas_transpose_sym(trans_a), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), pBeta, NM_STORAGE_DENSE(y)->elements, FIX2INT(incy)) ? Qtrue : Qfalse;
414
+ }
415
+
416
+
417
+ static VALUE nm_cblas_trsm(VALUE self,
418
+ VALUE order,
419
+ VALUE side, VALUE uplo,
420
+ VALUE trans_a, VALUE diag,
421
+ VALUE m, VALUE n,
422
+ VALUE alpha,
423
+ VALUE a, VALUE lda,
424
+ VALUE b, VALUE ldb)
425
+ {
426
+ static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_SIDE, const enum CBLAS_UPLO,
427
+ const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
428
+ const int, const int, const void* alpha, const void* a,
429
+ const int lda, void* b, const int ldb) = {
430
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
431
+ nm::math::cblas_trsm<float>,
432
+ nm::math::cblas_trsm<double>,
433
+ cblas_ctrsm, cblas_ztrsm, // call directly, same function signature!
434
+ nm::math::cblas_trsm<nm::Rational32>,
435
+ nm::math::cblas_trsm<nm::Rational64>,
436
+ nm::math::cblas_trsm<nm::Rational128>,
437
+ nm::math::cblas_trsm<nm::RubyObject>
438
+ };
439
+
440
+ dtype_t dtype = NM_DTYPE(a);
441
+
442
+ void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]);
443
+ rubyval_to_cval(alpha, dtype, pAlpha);
444
+
445
+ ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
446
+
447
+ return Qtrue;
448
+ }
449
+
450
+
451
+ /*
452
+ * Based on LAPACK's dscal function, but for any dtype.
453
+ *
454
+ * In-place modification; returns the modified vector as well.
455
+ */
456
+ static VALUE nm_clapack_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx) {
457
+ dtype_t dtype = NM_DTYPE(vector);
458
+
459
+ void* da = ALLOCA_N(char, DTYPE_SIZES[dtype]);
460
+ rubyval_to_cval(scale, dtype, da);
461
+
462
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::clapack_scal, void, const int n, const void* da, void* dx, const int incx);
463
+
464
+ ttable[dtype](FIX2INT(n), da, NM_STORAGE_DENSE(vector)->elements, FIX2INT(incx));
465
+
466
+ return vector;
467
+ }
468
+
469
+
470
+ /* Call any of the clpack_xgetrf functions as directly as possible.
471
+ *
472
+ * The clapack_getrf functions (dgetrf, sgetrf, cgetrf, and zgetrf) compute an LU factorization of a general M-by-N
473
+ * matrix A using partial pivoting with row interchanges.
474
+ *
475
+ * The factorization has the form:
476
+ * A = P * L * U
477
+ * where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n),
478
+ * and U is upper triangular (upper trapezoidal if m < n).
479
+ *
480
+ * This is the right-looking level 3 BLAS version of the algorithm.
481
+ *
482
+ * == Arguments
483
+ * See: http://www.netlib.org/lapack/double/dgetrf.f
484
+ * (You don't need argument 5; this is the value returned by this function.)
485
+ *
486
+ * You probably don't want to call this function. Instead, why don't you try clapack_getrf, which is more flexible
487
+ * with its arguments?
488
+ *
489
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
490
+ * handling, so you can easily crash Ruby!
491
+ *
492
+ * Returns an array giving the pivot indices (normally these are argument #5).
493
+ */
494
+ static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda) {
495
+ static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int m, const int n, void* a, const int lda, int* ipiv) = {
496
+ NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
497
+ nm::math::clapack_getrf<float>,
498
+ nm::math::clapack_getrf<double>,
499
+ #ifdef HAVE_CLAPACK_H
500
+ clapack_cgetrf, clapack_zgetrf, // call directly, same function signature!
501
+ #else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
502
+ nm::math::clapack_getrf<nm::Complex64>,
503
+ nm::math::clapack_getrf<nm::Complex128>,
504
+ #endif
505
+ nm::math::clapack_getrf<nm::Rational32>,
506
+ nm::math::clapack_getrf<nm::Rational64>,
507
+ nm::math::clapack_getrf<nm::Rational128>,
508
+ nm::math::clapack_getrf<nm::RubyObject>
509
+ };
510
+
511
+ int M = FIX2INT(m),
512
+ N = FIX2INT(n);
513
+
514
+ // Allocate the pivot index array, which is of size MIN(M, N).
515
+ size_t ipiv_size = std::min(M,N);
516
+ int* ipiv = ALLOCA_N(int, ipiv_size);
517
+
518
+ // Call either our version of getrf or the LAPACK version.
519
+ ttable[NM_DTYPE(a)](blas_order_sym(order), M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv);
520
+
521
+ // Result will be stored in a. We return ipiv as an array.
522
+ VALUE ipiv_array = rb_ary_new2(ipiv_size);
523
+ for (size_t i = 0; i < ipiv_size; ++i) {
524
+ rb_ary_store(ipiv_array, i, INT2FIX(ipiv[i]));
525
+ }
526
+
527
+ return ipiv_array;
528
+ }
529
+
530
+
531
+ /*
532
+ * C accessor for calculating an exact determinant.
533
+ */
534
+ void nm_math_det_exact(const int M, const void* elements, const int lda, dtype_t dtype, void* result) {
535
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::det_exact, void, const int M, const void* A_elements, const int lda, void* result_arg);
536
+
537
+ ttable[dtype](M, elements, lda, result);
538
+ }
539
+
540
+
541
+ /*
542
+ * Transpose an array of elements that represent a row-major dense matrix. Does not allocate anything, only does an memcpy.
543
+ */
544
+ void nm_math_transpose_generic(const size_t M, const size_t N, const void* A, const int lda, void* B, const int ldb, size_t element_size) {
545
+ for (size_t i = 0; i < N; ++i) {
546
+ for (size_t j = 0; j < M; ++j) {
547
+
548
+ memcpy(reinterpret_cast<char*>(B) + (i*ldb+j)*element_size,
549
+ reinterpret_cast<const char*>(A) + (j*lda+i)*element_size,
550
+ element_size);
551
+
552
+ }
553
+ }
554
+ }
555
+
556
+
557
+ } // end of extern "C" block