nmatrix 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/ext/nmatrix/data/complex.h +20 -55
  3. data/ext/nmatrix/data/data.cpp +11 -44
  4. data/ext/nmatrix/data/data.h +174 -311
  5. data/ext/nmatrix/data/meta.h +1 -7
  6. data/ext/nmatrix/data/ruby_object.h +3 -85
  7. data/ext/nmatrix/extconf.rb +2 -73
  8. data/ext/nmatrix/math.cpp +170 -813
  9. data/ext/nmatrix/math/asum.h +2 -25
  10. data/ext/nmatrix/math/{inc.h → cblas_enums.h} +11 -22
  11. data/ext/nmatrix/math/cblas_templates_core.h +507 -0
  12. data/ext/nmatrix/math/gemm.h +2 -32
  13. data/ext/nmatrix/math/gemv.h +1 -35
  14. data/ext/nmatrix/math/getrf.h +21 -6
  15. data/ext/nmatrix/math/getrs.h +0 -8
  16. data/ext/nmatrix/math/imax.h +0 -22
  17. data/ext/nmatrix/math/long_dtype.h +0 -3
  18. data/ext/nmatrix/math/math.h +11 -337
  19. data/ext/nmatrix/math/nrm2.h +2 -23
  20. data/ext/nmatrix/math/rot.h +1 -25
  21. data/ext/nmatrix/math/rotg.h +4 -13
  22. data/ext/nmatrix/math/scal.h +0 -22
  23. data/ext/nmatrix/math/trsm.h +0 -55
  24. data/ext/nmatrix/math/util.h +148 -0
  25. data/ext/nmatrix/nmatrix.cpp +0 -14
  26. data/ext/nmatrix/nmatrix.h +92 -84
  27. data/ext/nmatrix/ruby_constants.cpp +0 -2
  28. data/ext/nmatrix/ruby_constants.h +0 -2
  29. data/ext/nmatrix/ruby_nmatrix.c +86 -45
  30. data/ext/nmatrix/storage/dense/dense.cpp +1 -7
  31. data/ext/nmatrix/storage/storage.h +0 -1
  32. data/ext/nmatrix/ttable_helper.rb +0 -6
  33. data/ext/nmatrix/util/io.cpp +1 -1
  34. data/lib/nmatrix.rb +1 -19
  35. data/lib/nmatrix/blas.rb +33 -11
  36. data/lib/nmatrix/io/market.rb +3 -3
  37. data/lib/nmatrix/lapack_core.rb +181 -0
  38. data/lib/nmatrix/lapack_plugin.rb +44 -0
  39. data/lib/nmatrix/math.rb +382 -131
  40. data/lib/nmatrix/monkeys.rb +2 -3
  41. data/lib/nmatrix/nmatrix.rb +166 -13
  42. data/lib/nmatrix/shortcuts.rb +72 -7
  43. data/lib/nmatrix/version.rb +2 -2
  44. data/spec/00_nmatrix_spec.rb +154 -5
  45. data/spec/02_slice_spec.rb +2 -6
  46. data/spec/03_nmatrix_monkeys_spec.rb +7 -1
  47. data/spec/blas_spec.rb +60 -33
  48. data/spec/homogeneous_spec.rb +10 -10
  49. data/spec/lapack_core_spec.rb +482 -0
  50. data/spec/math_spec.rb +436 -52
  51. data/spec/shortcuts_spec.rb +28 -4
  52. data/spec/spec_helper.rb +14 -2
  53. data/spec/utm5940.mtx +83844 -0
  54. metadata +49 -76
  55. data/.gitignore +0 -27
  56. data/.rspec +0 -2
  57. data/.travis.yml +0 -15
  58. data/CONTRIBUTING.md +0 -82
  59. data/Gemfile +0 -2
  60. data/History.txt +0 -677
  61. data/LICENSE.txt +0 -23
  62. data/Manifest.txt +0 -92
  63. data/README.rdoc +0 -150
  64. data/Rakefile +0 -216
  65. data/ext/nmatrix/data/rational.h +0 -440
  66. data/ext/nmatrix/math/geev.h +0 -82
  67. data/ext/nmatrix/math/ger.h +0 -96
  68. data/ext/nmatrix/math/gesdd.h +0 -80
  69. data/ext/nmatrix/math/gesvd.h +0 -78
  70. data/ext/nmatrix/math/getf2.h +0 -86
  71. data/ext/nmatrix/math/getri.h +0 -108
  72. data/ext/nmatrix/math/potrs.h +0 -129
  73. data/ext/nmatrix/math/swap.h +0 -52
  74. data/lib/nmatrix/lapack.rb +0 -240
  75. data/nmatrix.gemspec +0 -55
  76. data/scripts/mac-brew-gcc.sh +0 -50
  77. data/scripts/mac-mavericks-brew-gcc.sh +0 -22
  78. data/spec/lapack_spec.rb +0 -459
@@ -30,14 +30,8 @@
30
30
  #ifndef GEMM_H
31
31
  # define GEMM_H
32
32
 
33
- extern "C" { // These need to be in an extern "C" block or you'll get all kinds of undefined symbol errors.
34
- #if defined HAVE_CBLAS_H
35
- #include <cblas.h>
36
- #elif defined HAVE_ATLAS_CBLAS_H
37
- #include <atlas/cblas.h>
38
- #endif
39
- }
40
-
33
+ #include "cblas_enums.h"
34
+ #include "math/long_dtype.h"
41
35
 
42
36
  namespace nm { namespace math {
43
37
  /*
@@ -242,30 +236,6 @@ inline void gemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
242
236
  }
243
237
 
244
238
 
245
- template <>
246
- inline void gemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
247
- const float* alpha, const float* A, const int lda, const float* B, const int ldb, const float* beta, float* C, const int ldc) {
248
- cblas_sgemm(Order, TransA, TransB, M, N, K, *alpha, A, lda, B, ldb, *beta, C, ldc);
249
- }
250
-
251
- template <>
252
- inline void gemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
253
- const double* alpha, const double* A, const int lda, const double* B, const int ldb, const double* beta, double* C, const int ldc) {
254
- cblas_dgemm(Order, TransA, TransB, M, N, K, *alpha, A, lda, B, ldb, *beta, C, ldc);
255
- }
256
-
257
- template <>
258
- inline void gemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
259
- const Complex64* alpha, const Complex64* A, const int lda, const Complex64* B, const int ldb, const Complex64* beta, Complex64* C, const int ldc) {
260
- cblas_cgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
261
- }
262
-
263
- template <>
264
- inline void gemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
265
- const Complex128* alpha, const Complex128* A, const int lda, const Complex128* B, const int ldb, const Complex128* beta, Complex128* C, const int ldc) {
266
- cblas_zgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
267
- }
268
-
269
239
  }} // end of namespace nm::math
270
240
 
271
241
  #endif // GEMM_H
@@ -30,14 +30,7 @@
30
30
  #ifndef GEMV_H
31
31
  # define GEMV_H
32
32
 
33
- extern "C" { // These need to be in an extern "C" block or you'll get all kinds of undefined symbol errors.
34
- #if defined HAVE_CBLAS_H
35
- #include <cblas.h>
36
- #elif defined HAVE_ATLAS_CBLAS_H
37
- #include <atlas/cblas.h>
38
- #endif
39
- }
40
-
33
+ #include "math/long_dtype.h"
41
34
 
42
35
  namespace nm { namespace math {
43
36
 
@@ -179,33 +172,6 @@ inline bool gemv(const enum CBLAS_TRANSPOSE Trans, const int M, const int N, con
179
172
  return true;
180
173
  } // end of GEMV
181
174
 
182
- template <>
183
- inline bool gemv(const enum CBLAS_TRANSPOSE Trans, const int M, const int N, const float* alpha, const float* A, const int lda,
184
- const float* X, const int incX, const float* beta, float* Y, const int incY) {
185
- cblas_sgemv(CblasRowMajor, Trans, M, N, *alpha, A, lda, X, incX, *beta, Y, incY);
186
- return true;
187
- }
188
-
189
- template <>
190
- inline bool gemv(const enum CBLAS_TRANSPOSE Trans, const int M, const int N, const double* alpha, const double* A, const int lda,
191
- const double* X, const int incX, const double* beta, double* Y, const int incY) {
192
- cblas_dgemv(CblasRowMajor, Trans, M, N, *alpha, A, lda, X, incX, *beta, Y, incY);
193
- return true;
194
- }
195
-
196
- template <>
197
- inline bool gemv(const enum CBLAS_TRANSPOSE Trans, const int M, const int N, const Complex64* alpha, const Complex64* A, const int lda,
198
- const Complex64* X, const int incX, const Complex64* beta, Complex64* Y, const int incY) {
199
- cblas_cgemv(CblasRowMajor, Trans, M, N, alpha, A, lda, X, incX, beta, Y, incY);
200
- return true;
201
- }
202
-
203
- template <>
204
- inline bool gemv(const enum CBLAS_TRANSPOSE Trans, const int M, const int N, const Complex128* alpha, const Complex128* A, const int lda,
205
- const Complex128* X, const int incX, const Complex128* beta, Complex128* Y, const int incY) {
206
- cblas_zgemv(CblasRowMajor, Trans, M, N, alpha, A, lda, X, incX, beta, Y, incY);
207
- return true;
208
- }
209
175
 
210
176
  }} // end of namespace nm::math
211
177
 
@@ -59,6 +59,13 @@
59
59
  #ifndef GETRF_H
60
60
  #define GETRF_H
61
61
 
62
+ #include "math/laswp.h"
63
+ #include "math/math.h"
64
+ #include "math/trsm.h"
65
+ #include "math/gemm.h"
66
+ #include "math/imax.h"
67
+ #include "math/scal.h"
68
+
62
69
  namespace nm { namespace math {
63
70
 
64
71
  /* Numeric inverse -- usually just 1 / f, but a little more complicated for complex. */
@@ -69,8 +76,6 @@ inline DType numeric_inverse(const DType& n) {
69
76
  template <> inline float numeric_inverse(const float& n) { return 1 / n; }
70
77
  template <> inline double numeric_inverse(const double& n) { return 1 / n; }
71
78
 
72
-
73
-
74
79
  /*
75
80
  * Templated version of row-order and column-order getrf, derived from ATL_getrfR.c (from ATLAS 3.8.0).
76
81
  *
@@ -109,7 +114,12 @@ inline int getrf_nothrow(const int M, const int N, DType* A, const int lda, int*
109
114
  if (N_ul > NB) N_ul = ATL_MulByNB(ATL_DivByNB(N_ul));
110
115
  #endif
111
116
 
112
- int N_dr = M - N_ul;
117
+ int N_dr;
118
+ if (RowMajor) {
119
+ N_dr = M - N_ul;
120
+ } else {
121
+ N_dr = N - N_ul;
122
+ }
113
123
 
114
124
  int i = RowMajor ? getrf_nothrow<true,DType>(N_ul, N, A, lda, ipiv) : getrf_nothrow<false,DType>(M, N_ul, A, lda, ipiv);
115
125
 
@@ -135,7 +145,7 @@ inline int getrf_nothrow(const int M, const int N, DType* A, const int lda, int*
135
145
  nm::math::laswp<DType>(N_dr, Ac, lda, 0, N_ul, ipiv, 1);
136
146
 
137
147
  nm::math::trsm<DType>(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, N_ul, N_dr, one, A, lda, Ac, lda);
138
- nm::math::gemm<DType>(CblasColMajor, CblasNoTrans, CblasNoTrans, M-N_ul, N_dr, N_ul, &neg_one, An, lda, Ac, lda, &one, An, lda);
148
+ nm::math::gemm<DType>(CblasColMajor, CblasNoTrans, CblasNoTrans, M-N_ul, N_dr, N_ul, &neg_one, &(A[N_ul]), lda, Ac, lda, &one, An, lda);
139
149
 
140
150
  i = getrf_nothrow<false,DType>(M-N_ul, N_dr, An, lda, ipiv+N_ul);
141
151
  }
@@ -148,9 +158,14 @@ inline int getrf_nothrow(const int M, const int N, DType* A, const int lda, int*
148
158
 
149
159
  nm::math::laswp<DType>(N_ul, A, lda, N_ul, MN, ipiv, 1); /* apply pivots */
150
160
 
151
- } else if (MN == 1) { // there's another case for the colmajor version, but i don't know that it's that critical. Calls ATLAS LU2, who knows what that does.
161
+ } else if (MN == 1) { // there's another case for the colmajor version, but it doesn't seem to be necessary.
152
162
 
153
- int i = *ipiv = nm::math::imax<DType>(N, A, 1); // cblas_iamax(N, A, 1);
163
+ int i;
164
+ if (RowMajor) {
165
+ i = *ipiv = nm::math::imax<DType>(N, A, 1); // cblas_iamax(N, A, 1);
166
+ } else {
167
+ i = *ipiv = nm::math::imax<DType>(M, A, 1);
168
+ }
154
169
 
155
170
  DType tmp = A[i];
156
171
  if (tmp != 0) {
@@ -59,14 +59,6 @@
59
59
  #ifndef GETRS_H
60
60
  #define GETRS_H
61
61
 
62
- extern "C" {
63
- #if defined HAVE_CBLAS_H
64
- #include <cblas.h>
65
- #elif defined HAVE_ATLAS_CBLAS_H
66
- #include <atlas/cblas.h>
67
- #endif
68
- }
69
-
70
62
  namespace nm { namespace math {
71
63
 
72
64
 
@@ -69,28 +69,6 @@ inline int imax(const int n, const DType *x, const int incx) {
69
69
  return imax;
70
70
  }
71
71
 
72
- #if defined HAVE_CBLAS_H || defined HAVE_ATLAS_CBLAS_H
73
- template<>
74
- inline int imax(const int n, const float* x, const int incx) {
75
- return cblas_isamax(n, x, incx);
76
- }
77
-
78
- template<>
79
- inline int imax(const int n, const double* x, const int incx) {
80
- return cblas_idamax(n, x, incx);
81
- }
82
-
83
- template<>
84
- inline int imax(const int n, const Complex64* x, const int incx) {
85
- return cblas_icamax(n, x, incx);
86
- }
87
-
88
- template <>
89
- inline int imax(const int n, const Complex128* x, const int incx) {
90
- return cblas_izamax(n, x, incx);
91
- }
92
- #endif
93
-
94
72
  template<typename DType>
95
73
  inline int cblas_imax(const int n, const void* x, const int incx) {
96
74
  return imax<DType>(n, reinterpret_cast<const DType*>(x), incx);
@@ -42,9 +42,6 @@ namespace nm { namespace math {
42
42
  template <> struct LongDType<double> { typedef double type; };
43
43
  template <> struct LongDType<Complex64> { typedef Complex128 type; };
44
44
  template <> struct LongDType<Complex128> { typedef Complex128 type; };
45
- template <> struct LongDType<Rational32> { typedef Rational128 type; };
46
- template <> struct LongDType<Rational64> { typedef Rational128 type; };
47
- template <> struct LongDType<Rational128> { typedef Rational128 type; };
48
45
  template <> struct LongDType<RubyObject> { typedef RubyObject type; };
49
46
 
50
47
  }} // end of namespace nm::math
@@ -68,19 +68,7 @@
68
68
  * Standard Includes
69
69
  */
70
70
 
71
- extern "C" { // These need to be in an extern "C" block or you'll get all kinds of undefined symbol errors.
72
- #if defined HAVE_CBLAS_H
73
- #include <cblas.h>
74
- #elif defined HAVE_ATLAS_CBLAS_H
75
- #include <atlas/cblas.h>
76
- #endif
77
-
78
- #if defined HAVE_CLAPACK_H
79
- #include <clapack.h>
80
- #elif defined HAVE_ATLAS_CLAPACK_H
81
- #include <atlas/clapack.h>
82
- #endif
83
- }
71
+ #include "cblas_enums.h"
84
72
 
85
73
  #include <algorithm> // std::min, std::max
86
74
  #include <limits> // std::numeric_limits
@@ -103,11 +91,18 @@ extern "C" {
103
91
  /*
104
92
  * C accessors.
105
93
  */
106
- void nm_math_det_exact(const int M, const void* elements, const int lda, nm::dtype_t dtype, void* result);
107
- void nm_math_inverse(const int M, void* A_elements, nm::dtype_t dtype);
108
- void nm_math_inverse_exact(const int M, const void* A_elements, const int lda, void* B_elements, const int ldb, nm::dtype_t dtype);
94
+
109
95
  void nm_math_transpose_generic(const size_t M, const size_t N, const void* A, const int lda, void* B, const int ldb, size_t element_size);
110
96
  void nm_math_init_blas(void);
97
+
98
+ /*
99
+ * Pure math implementations.
100
+ */
101
+ void nm_math_solve(VALUE lu, VALUE b, VALUE x, VALUE ipiv);
102
+ void nm_math_inverse(const int M, void* A_elements, nm::dtype_t dtype);
103
+ void nm_math_hessenberg(VALUE a);
104
+ void nm_math_det_exact(const int M, const void* elements, const int lda, nm::dtype_t dtype, void* result);
105
+ void nm_math_inverse_exact(const int M, const void* A_elements, const int lda, void* B_elements, const int ldb, nm::dtype_t dtype);
111
106
  }
112
107
 
113
108
 
@@ -123,94 +118,6 @@ namespace nm {
123
118
  * Functions
124
119
  */
125
120
 
126
-
127
- template <typename DType>
128
- inline void syrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
129
- const int K, const DType* alpha, const DType* A, const int lda, const DType* beta, DType* C, const int ldc) {
130
- rb_raise(rb_eNotImpError, "syrk not yet implemented for non-BLAS dtypes");
131
- }
132
-
133
- template <typename DType>
134
- inline void herk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
135
- const int K, const DType* alpha, const DType* A, const int lda, const DType* beta, DType* C, const int ldc) {
136
- rb_raise(rb_eNotImpError, "herk not yet implemented for non-BLAS dtypes");
137
- }
138
-
139
- template <>
140
- inline void syrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
141
- const int K, const float* alpha, const float* A, const int lda, const float* beta, float* C, const int ldc) {
142
- cblas_ssyrk(Order, Uplo, Trans, N, K, *alpha, A, lda, *beta, C, ldc);
143
- }
144
-
145
- template <>
146
- inline void syrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
147
- const int K, const double* alpha, const double* A, const int lda, const double* beta, double* C, const int ldc) {
148
- cblas_dsyrk(Order, Uplo, Trans, N, K, *alpha, A, lda, *beta, C, ldc);
149
- }
150
-
151
- template <>
152
- inline void syrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
153
- const int K, const Complex64* alpha, const Complex64* A, const int lda, const Complex64* beta, Complex64* C, const int ldc) {
154
- cblas_csyrk(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc);
155
- }
156
-
157
- template <>
158
- inline void syrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
159
- const int K, const Complex128* alpha, const Complex128* A, const int lda, const Complex128* beta, Complex128* C, const int ldc) {
160
- cblas_zsyrk(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc);
161
- }
162
-
163
-
164
- template <>
165
- inline void herk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
166
- const int K, const Complex64* alpha, const Complex64* A, const int lda, const Complex64* beta, Complex64* C, const int ldc) {
167
- cblas_cherk(Order, Uplo, Trans, N, K, alpha->r, A, lda, beta->r, C, ldc);
168
- }
169
-
170
- template <>
171
- inline void herk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
172
- const int K, const Complex128* alpha, const Complex128* A, const int lda, const Complex128* beta, Complex128* C, const int ldc) {
173
- cblas_zherk(Order, Uplo, Trans, N, K, alpha->r, A, lda, beta->r, C, ldc);
174
- }
175
-
176
-
177
- template <typename DType>
178
- inline void trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
179
- const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const DType* alpha,
180
- const DType* A, const int lda, DType* B, const int ldb) {
181
- rb_raise(rb_eNotImpError, "trmm not yet implemented for non-BLAS dtypes");
182
- }
183
-
184
- template <>
185
- inline void trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
186
- const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const float* alpha,
187
- const float* A, const int lda, float* B, const int ldb) {
188
- cblas_strmm(order, side, uplo, ta, diag, m, n, *alpha, A, lda, B, ldb);
189
- }
190
-
191
- template <>
192
- inline void trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
193
- const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const double* alpha,
194
- const double* A, const int lda, double* B, const int ldb) {
195
- cblas_dtrmm(order, side, uplo, ta, diag, m, n, *alpha, A, lda, B, ldb);
196
- }
197
-
198
- template <>
199
- inline void trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
200
- const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const Complex64* alpha,
201
- const Complex64* A, const int lda, Complex64* B, const int ldb) {
202
- cblas_ctrmm(order, side, uplo, ta, diag, m, n, alpha, A, lda, B, ldb);
203
- }
204
-
205
- template <>
206
- inline void trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
207
- const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const Complex128* alpha,
208
- const Complex128* A, const int lda, Complex128* B, const int ldb) {
209
- cblas_ztrmm(order, side, uplo, ta, diag, m, n, alpha, A, lda, B, ldb);
210
- }
211
-
212
-
213
-
214
121
  // Yale: numeric matrix multiply c=a*b
215
122
  template <typename DType>
216
123
  inline void numbmm(const unsigned int n, const unsigned int m, const unsigned int l, const IType* ia, const IType* ja, const DType* a, const bool diaga,
@@ -502,74 +409,6 @@ inline void smmp_sort_columns(const size_t n, const IType* ia, IType* ja, DType*
502
409
  }
503
410
 
504
411
 
505
- /*
506
- * From ATLAS 3.8.0:
507
- *
508
- * Computes one of two LU factorizations based on the setting of the Order
509
- * parameter, as follows:
510
- * ----------------------------------------------------------------------------
511
- * Order == CblasColMajor
512
- * Column-major factorization of form
513
- * A = P * L * U
514
- * where P is a row-permutation matrix, L is lower triangular with unit
515
- * diagonal elements (lower trapazoidal if M > N), and U is upper triangular
516
- * (upper trapazoidal if M < N).
517
- *
518
- * ----------------------------------------------------------------------------
519
- * Order == CblasRowMajor
520
- * Row-major factorization of form
521
- * A = P * L * U
522
- * where P is a column-permutation matrix, L is lower triangular (lower
523
- * trapazoidal if M > N), and U is upper triangular with unit diagonals (upper
524
- * trapazoidal if M < N).
525
- *
526
- * ============================================================================
527
- * Let IERR be the return value of the function:
528
- * If IERR == 0, successful exit.
529
- * If (IERR < 0) the -IERR argument had an illegal value
530
- * If (IERR > 0 && Order == CblasColMajor)
531
- * U(i-1,i-1) is exactly zero. The factorization has been completed,
532
- * but the factor U is exactly singular, and division by zero will
533
- * occur if it is used to solve a system of equations.
534
- * If (IERR > 0 && Order == CblasRowMajor)
535
- * L(i-1,i-1) is exactly zero. The factorization has been completed,
536
- * but the factor L is exactly singular, and division by zero will
537
- * occur if it is used to solve a system of equations.
538
- */
539
- template <typename DType>
540
- inline int potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, DType* A, const int lda) {
541
- #if defined HAVE_CLAPACK_H || defined HAVE_ATLAS_CLAPACK_H
542
- rb_raise(rb_eNotImpError, "not yet implemented for non-BLAS dtypes");
543
- #else
544
- rb_raise(rb_eNotImpError, "only CLAPACK version implemented thus far");
545
- #endif
546
- return 0;
547
- }
548
-
549
- #if defined HAVE_CLAPACK_H || defined HAVE_ATLAS_CLAPACK_H
550
- template <>
551
- inline int potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, float* A, const int lda) {
552
- return clapack_spotrf(order, uplo, N, A, lda);
553
- }
554
-
555
- template <>
556
- inline int potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, double* A, const int lda) {
557
- return clapack_dpotrf(order, uplo, N, A, lda);
558
- }
559
-
560
- template <>
561
- inline int potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, Complex64* A, const int lda) {
562
- return clapack_cpotrf(order, uplo, N, reinterpret_cast<void*>(A), lda);
563
- }
564
-
565
- template <>
566
- inline int potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, Complex128* A, const int lda) {
567
- return clapack_zpotrf(order, uplo, N, reinterpret_cast<void*>(A), lda);
568
- }
569
- #endif
570
-
571
-
572
-
573
412
  // Copies an upper row-major array from U, zeroing U; U is unit, so diagonal is not copied.
574
413
  //
575
414
  // From ATLAS 3.8.0.
@@ -875,110 +714,6 @@ int getri(const int N, DType* A, const int lda, const int* ipiv, DType* wrk, con
875
714
  }
876
715
  */
877
716
 
878
-
879
-
880
- template <bool is_complex, typename DType>
881
- inline void lauum(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, DType* A, const int lda) {
882
-
883
- int Nleft, Nright;
884
- const DType ONE = 1;
885
- DType *G, *U0 = A, *U1;
886
-
887
- if (N > 1) {
888
- Nleft = N >> 1;
889
- #ifdef NB
890
- if (Nleft > NB) Nleft = ATL_MulByNB(ATL_DivByNB(Nleft));
891
- #endif
892
-
893
- Nright = N - Nleft;
894
-
895
- // FIXME: There's a simpler way to write this next block, but I'm way too tired to work it out right now.
896
- if (uplo == CblasUpper) {
897
- if (order == CblasRowMajor) {
898
- G = A + Nleft;
899
- U1 = G + Nleft * lda;
900
- } else {
901
- G = A + Nleft * lda;
902
- U1 = G + Nleft;
903
- }
904
- } else {
905
- if (order == CblasRowMajor) {
906
- G = A + Nleft * lda;
907
- U1 = G + Nleft;
908
- } else {
909
- G = A + Nleft;
910
- U1 = G + Nleft * lda;
911
- }
912
- }
913
-
914
- lauum<is_complex, DType>(order, uplo, Nleft, U0, lda);
915
-
916
- if (is_complex) {
917
-
918
- nm::math::herk<DType>(order, uplo,
919
- uplo == CblasLower ? CblasConjTrans : CblasNoTrans,
920
- Nleft, Nright, &ONE, G, lda, &ONE, U0, lda);
921
-
922
- nm::math::trmm<DType>(order, CblasLeft, uplo, CblasConjTrans, CblasNonUnit, Nright, Nleft, &ONE, U1, lda, G, lda);
923
- } else {
924
- nm::math::syrk<DType>(order, uplo,
925
- uplo == CblasLower ? CblasTrans : CblasNoTrans,
926
- Nleft, Nright, &ONE, G, lda, &ONE, U0, lda);
927
-
928
- nm::math::trmm<DType>(order, CblasLeft, uplo, CblasTrans, CblasNonUnit, Nright, Nleft, &ONE, U1, lda, G, lda);
929
- }
930
- lauum<is_complex, DType>(order, uplo, Nright, U1, lda);
931
-
932
- } else {
933
- *A = *A * *A;
934
- }
935
- }
936
-
937
-
938
- #if defined HAVE_CLAPACK_H || defined HAVE_ATLAS_CLAPACK_H
939
- template <bool is_complex>
940
- inline void lauum(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, float* A, const int lda) {
941
- clapack_slauum(order, uplo, N, A, lda);
942
- }
943
-
944
- template <bool is_complex>
945
- inline void lauum(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, double* A, const int lda) {
946
- clapack_dlauum(order, uplo, N, A, lda);
947
- }
948
-
949
- template <bool is_complex>
950
- inline void lauum(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, Complex64* A, const int lda) {
951
- clapack_clauum(order, uplo, N, A, lda);
952
- }
953
-
954
- template <bool is_complex>
955
- inline void lauum(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, Complex128* A, const int lda) {
956
- clapack_zlauum(order, uplo, N, A, lda);
957
- }
958
- #endif
959
-
960
-
961
- /*
962
- * Function signature conversion for calling LAPACK's lauum functions as directly as possible.
963
- *
964
- * For documentation: http://www.netlib.org/lapack/double/dlauum.f
965
- *
966
- * This function should normally go in math.cpp, but we need it to be available to nmatrix.cpp.
967
- */
968
- template <bool is_complex, typename DType>
969
- inline int clapack_lauum(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, void* a, const int lda) {
970
- if (n < 0) rb_raise(rb_eArgError, "n cannot be less than zero, is set to %d", n);
971
- if (lda < n || lda < 1) rb_raise(rb_eArgError, "lda must be >= max(n,1); lda=%d, n=%d\n", lda, n);
972
-
973
- if (uplo == CblasUpper) lauum<is_complex, DType>(order, uplo, n, reinterpret_cast<DType*>(a), lda);
974
- else lauum<is_complex, DType>(order, uplo, n, reinterpret_cast<DType*>(a), lda);
975
-
976
- return 0;
977
- }
978
-
979
-
980
-
981
-
982
717
  /*
983
718
  * Macro for declaring LAPACK specializations of the getrf function.
984
719
  *
@@ -1003,67 +738,6 @@ LAPACK_GETRF(Complex64, clapack_cgetrf, void)
1003
738
  LAPACK_GETRF(Complex128, clapack_zgetrf, void)
1004
739
  */
1005
740
 
1006
-
1007
-
1008
- /*
1009
- * Function signature conversion for calling LAPACK's potrf functions as directly as possible.
1010
- *
1011
- * For documentation: http://www.netlib.org/lapack/double/dpotrf.f
1012
- *
1013
- * This function should normally go in math.cpp, but we need it to be available to nmatrix.cpp.
1014
- */
1015
- template <typename DType>
1016
- inline int clapack_potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, void* a, const int lda) {
1017
- return potrf<DType>(order, uplo, n, reinterpret_cast<DType*>(a), lda);
1018
- }
1019
-
1020
-
1021
-
1022
- template <typename DType>
1023
- inline int potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, DType* a, const int lda) {
1024
- rb_raise(rb_eNotImpError, "potri not yet implemented for non-BLAS dtypes");
1025
- return 0;
1026
- }
1027
-
1028
-
1029
- #if defined HAVE_CLAPACK_H || defined HAVE_ATLAS_CLAPACK_H
1030
- template <>
1031
- inline int potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, float* a, const int lda) {
1032
- return clapack_spotri(order, uplo, n, a, lda);
1033
- }
1034
-
1035
- template <>
1036
- inline int potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, double* a, const int lda) {
1037
- return clapack_dpotri(order, uplo, n, a, lda);
1038
- }
1039
-
1040
- template <>
1041
- inline int potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, Complex64* a, const int lda) {
1042
- return clapack_cpotri(order, uplo, n, reinterpret_cast<void*>(a), lda);
1043
- }
1044
-
1045
- template <>
1046
- inline int potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, Complex128* a, const int lda) {
1047
- return clapack_zpotri(order, uplo, n, reinterpret_cast<void*>(a), lda);
1048
- }
1049
- #endif
1050
-
1051
-
1052
- /*
1053
- * Function signature conversion for calling LAPACK's potri functions as directly as possible.
1054
- *
1055
- * For documentation: http://www.netlib.org/lapack/double/dpotri.f
1056
- *
1057
- * This function should normally go in math.cpp, but we need it to be available to nmatrix.cpp.
1058
- */
1059
- template <typename DType>
1060
- inline int clapack_potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, void* a, const int lda) {
1061
- return potri<DType>(order, uplo, n, reinterpret_cast<DType*>(a), lda);
1062
- }
1063
-
1064
-
1065
-
1066
-
1067
741
  }} // end namespace nm::math
1068
742
 
1069
743