nmatrix 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/nmatrix/data/complex.h +20 -55
- data/ext/nmatrix/data/data.cpp +11 -44
- data/ext/nmatrix/data/data.h +174 -311
- data/ext/nmatrix/data/meta.h +1 -7
- data/ext/nmatrix/data/ruby_object.h +3 -85
- data/ext/nmatrix/extconf.rb +2 -73
- data/ext/nmatrix/math.cpp +170 -813
- data/ext/nmatrix/math/asum.h +2 -25
- data/ext/nmatrix/math/{inc.h → cblas_enums.h} +11 -22
- data/ext/nmatrix/math/cblas_templates_core.h +507 -0
- data/ext/nmatrix/math/gemm.h +2 -32
- data/ext/nmatrix/math/gemv.h +1 -35
- data/ext/nmatrix/math/getrf.h +21 -6
- data/ext/nmatrix/math/getrs.h +0 -8
- data/ext/nmatrix/math/imax.h +0 -22
- data/ext/nmatrix/math/long_dtype.h +0 -3
- data/ext/nmatrix/math/math.h +11 -337
- data/ext/nmatrix/math/nrm2.h +2 -23
- data/ext/nmatrix/math/rot.h +1 -25
- data/ext/nmatrix/math/rotg.h +4 -13
- data/ext/nmatrix/math/scal.h +0 -22
- data/ext/nmatrix/math/trsm.h +0 -55
- data/ext/nmatrix/math/util.h +148 -0
- data/ext/nmatrix/nmatrix.cpp +0 -14
- data/ext/nmatrix/nmatrix.h +92 -84
- data/ext/nmatrix/ruby_constants.cpp +0 -2
- data/ext/nmatrix/ruby_constants.h +0 -2
- data/ext/nmatrix/ruby_nmatrix.c +86 -45
- data/ext/nmatrix/storage/dense/dense.cpp +1 -7
- data/ext/nmatrix/storage/storage.h +0 -1
- data/ext/nmatrix/ttable_helper.rb +0 -6
- data/ext/nmatrix/util/io.cpp +1 -1
- data/lib/nmatrix.rb +1 -19
- data/lib/nmatrix/blas.rb +33 -11
- data/lib/nmatrix/io/market.rb +3 -3
- data/lib/nmatrix/lapack_core.rb +181 -0
- data/lib/nmatrix/lapack_plugin.rb +44 -0
- data/lib/nmatrix/math.rb +382 -131
- data/lib/nmatrix/monkeys.rb +2 -3
- data/lib/nmatrix/nmatrix.rb +166 -13
- data/lib/nmatrix/shortcuts.rb +72 -7
- data/lib/nmatrix/version.rb +2 -2
- data/spec/00_nmatrix_spec.rb +154 -5
- data/spec/02_slice_spec.rb +2 -6
- data/spec/03_nmatrix_monkeys_spec.rb +7 -1
- data/spec/blas_spec.rb +60 -33
- data/spec/homogeneous_spec.rb +10 -10
- data/spec/lapack_core_spec.rb +482 -0
- data/spec/math_spec.rb +436 -52
- data/spec/shortcuts_spec.rb +28 -4
- data/spec/spec_helper.rb +14 -2
- data/spec/utm5940.mtx +83844 -0
- metadata +49 -76
- data/.gitignore +0 -27
- data/.rspec +0 -2
- data/.travis.yml +0 -15
- data/CONTRIBUTING.md +0 -82
- data/Gemfile +0 -2
- data/History.txt +0 -677
- data/LICENSE.txt +0 -23
- data/Manifest.txt +0 -92
- data/README.rdoc +0 -150
- data/Rakefile +0 -216
- data/ext/nmatrix/data/rational.h +0 -440
- data/ext/nmatrix/math/geev.h +0 -82
- data/ext/nmatrix/math/ger.h +0 -96
- data/ext/nmatrix/math/gesdd.h +0 -80
- data/ext/nmatrix/math/gesvd.h +0 -78
- data/ext/nmatrix/math/getf2.h +0 -86
- data/ext/nmatrix/math/getri.h +0 -108
- data/ext/nmatrix/math/potrs.h +0 -129
- data/ext/nmatrix/math/swap.h +0 -52
- data/lib/nmatrix/lapack.rb +0 -240
- data/nmatrix.gemspec +0 -55
- data/scripts/mac-brew-gcc.sh +0 -50
- data/scripts/mac-mavericks-brew-gcc.sh +0 -22
- data/spec/lapack_spec.rb +0 -459
data/ext/nmatrix/math/gemm.h
CHANGED
@@ -30,14 +30,8 @@
|
|
30
30
|
#ifndef GEMM_H
|
31
31
|
# define GEMM_H
|
32
32
|
|
33
|
-
|
34
|
-
#
|
35
|
-
#include <cblas.h>
|
36
|
-
#elif defined HAVE_ATLAS_CBLAS_H
|
37
|
-
#include <atlas/cblas.h>
|
38
|
-
#endif
|
39
|
-
}
|
40
|
-
|
33
|
+
#include "cblas_enums.h"
|
34
|
+
#include "math/long_dtype.h"
|
41
35
|
|
42
36
|
namespace nm { namespace math {
|
43
37
|
/*
|
@@ -242,30 +236,6 @@ inline void gemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
|
|
242
236
|
}
|
243
237
|
|
244
238
|
|
245
|
-
template <>
|
246
|
-
inline void gemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
|
247
|
-
const float* alpha, const float* A, const int lda, const float* B, const int ldb, const float* beta, float* C, const int ldc) {
|
248
|
-
cblas_sgemm(Order, TransA, TransB, M, N, K, *alpha, A, lda, B, ldb, *beta, C, ldc);
|
249
|
-
}
|
250
|
-
|
251
|
-
template <>
|
252
|
-
inline void gemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
|
253
|
-
const double* alpha, const double* A, const int lda, const double* B, const int ldb, const double* beta, double* C, const int ldc) {
|
254
|
-
cblas_dgemm(Order, TransA, TransB, M, N, K, *alpha, A, lda, B, ldb, *beta, C, ldc);
|
255
|
-
}
|
256
|
-
|
257
|
-
template <>
|
258
|
-
inline void gemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
|
259
|
-
const Complex64* alpha, const Complex64* A, const int lda, const Complex64* B, const int ldb, const Complex64* beta, Complex64* C, const int ldc) {
|
260
|
-
cblas_cgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
|
261
|
-
}
|
262
|
-
|
263
|
-
template <>
|
264
|
-
inline void gemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
|
265
|
-
const Complex128* alpha, const Complex128* A, const int lda, const Complex128* B, const int ldb, const Complex128* beta, Complex128* C, const int ldc) {
|
266
|
-
cblas_zgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
|
267
|
-
}
|
268
|
-
|
269
239
|
}} // end of namespace nm::math
|
270
240
|
|
271
241
|
#endif // GEMM_H
|
data/ext/nmatrix/math/gemv.h
CHANGED
@@ -30,14 +30,7 @@
|
|
30
30
|
#ifndef GEMV_H
|
31
31
|
# define GEMV_H
|
32
32
|
|
33
|
-
|
34
|
-
#if defined HAVE_CBLAS_H
|
35
|
-
#include <cblas.h>
|
36
|
-
#elif defined HAVE_ATLAS_CBLAS_H
|
37
|
-
#include <atlas/cblas.h>
|
38
|
-
#endif
|
39
|
-
}
|
40
|
-
|
33
|
+
#include "math/long_dtype.h"
|
41
34
|
|
42
35
|
namespace nm { namespace math {
|
43
36
|
|
@@ -179,33 +172,6 @@ inline bool gemv(const enum CBLAS_TRANSPOSE Trans, const int M, const int N, con
|
|
179
172
|
return true;
|
180
173
|
} // end of GEMV
|
181
174
|
|
182
|
-
template <>
|
183
|
-
inline bool gemv(const enum CBLAS_TRANSPOSE Trans, const int M, const int N, const float* alpha, const float* A, const int lda,
|
184
|
-
const float* X, const int incX, const float* beta, float* Y, const int incY) {
|
185
|
-
cblas_sgemv(CblasRowMajor, Trans, M, N, *alpha, A, lda, X, incX, *beta, Y, incY);
|
186
|
-
return true;
|
187
|
-
}
|
188
|
-
|
189
|
-
template <>
|
190
|
-
inline bool gemv(const enum CBLAS_TRANSPOSE Trans, const int M, const int N, const double* alpha, const double* A, const int lda,
|
191
|
-
const double* X, const int incX, const double* beta, double* Y, const int incY) {
|
192
|
-
cblas_dgemv(CblasRowMajor, Trans, M, N, *alpha, A, lda, X, incX, *beta, Y, incY);
|
193
|
-
return true;
|
194
|
-
}
|
195
|
-
|
196
|
-
template <>
|
197
|
-
inline bool gemv(const enum CBLAS_TRANSPOSE Trans, const int M, const int N, const Complex64* alpha, const Complex64* A, const int lda,
|
198
|
-
const Complex64* X, const int incX, const Complex64* beta, Complex64* Y, const int incY) {
|
199
|
-
cblas_cgemv(CblasRowMajor, Trans, M, N, alpha, A, lda, X, incX, beta, Y, incY);
|
200
|
-
return true;
|
201
|
-
}
|
202
|
-
|
203
|
-
template <>
|
204
|
-
inline bool gemv(const enum CBLAS_TRANSPOSE Trans, const int M, const int N, const Complex128* alpha, const Complex128* A, const int lda,
|
205
|
-
const Complex128* X, const int incX, const Complex128* beta, Complex128* Y, const int incY) {
|
206
|
-
cblas_zgemv(CblasRowMajor, Trans, M, N, alpha, A, lda, X, incX, beta, Y, incY);
|
207
|
-
return true;
|
208
|
-
}
|
209
175
|
|
210
176
|
}} // end of namespace nm::math
|
211
177
|
|
data/ext/nmatrix/math/getrf.h
CHANGED
@@ -59,6 +59,13 @@
|
|
59
59
|
#ifndef GETRF_H
|
60
60
|
#define GETRF_H
|
61
61
|
|
62
|
+
#include "math/laswp.h"
|
63
|
+
#include "math/math.h"
|
64
|
+
#include "math/trsm.h"
|
65
|
+
#include "math/gemm.h"
|
66
|
+
#include "math/imax.h"
|
67
|
+
#include "math/scal.h"
|
68
|
+
|
62
69
|
namespace nm { namespace math {
|
63
70
|
|
64
71
|
/* Numeric inverse -- usually just 1 / f, but a little more complicated for complex. */
|
@@ -69,8 +76,6 @@ inline DType numeric_inverse(const DType& n) {
|
|
69
76
|
template <> inline float numeric_inverse(const float& n) { return 1 / n; }
|
70
77
|
template <> inline double numeric_inverse(const double& n) { return 1 / n; }
|
71
78
|
|
72
|
-
|
73
|
-
|
74
79
|
/*
|
75
80
|
* Templated version of row-order and column-order getrf, derived from ATL_getrfR.c (from ATLAS 3.8.0).
|
76
81
|
*
|
@@ -109,7 +114,12 @@ inline int getrf_nothrow(const int M, const int N, DType* A, const int lda, int*
|
|
109
114
|
if (N_ul > NB) N_ul = ATL_MulByNB(ATL_DivByNB(N_ul));
|
110
115
|
#endif
|
111
116
|
|
112
|
-
int N_dr
|
117
|
+
int N_dr;
|
118
|
+
if (RowMajor) {
|
119
|
+
N_dr = M - N_ul;
|
120
|
+
} else {
|
121
|
+
N_dr = N - N_ul;
|
122
|
+
}
|
113
123
|
|
114
124
|
int i = RowMajor ? getrf_nothrow<true,DType>(N_ul, N, A, lda, ipiv) : getrf_nothrow<false,DType>(M, N_ul, A, lda, ipiv);
|
115
125
|
|
@@ -135,7 +145,7 @@ inline int getrf_nothrow(const int M, const int N, DType* A, const int lda, int*
|
|
135
145
|
nm::math::laswp<DType>(N_dr, Ac, lda, 0, N_ul, ipiv, 1);
|
136
146
|
|
137
147
|
nm::math::trsm<DType>(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, N_ul, N_dr, one, A, lda, Ac, lda);
|
138
|
-
nm::math::gemm<DType>(CblasColMajor, CblasNoTrans, CblasNoTrans, M-N_ul, N_dr, N_ul, &neg_one,
|
148
|
+
nm::math::gemm<DType>(CblasColMajor, CblasNoTrans, CblasNoTrans, M-N_ul, N_dr, N_ul, &neg_one, &(A[N_ul]), lda, Ac, lda, &one, An, lda);
|
139
149
|
|
140
150
|
i = getrf_nothrow<false,DType>(M-N_ul, N_dr, An, lda, ipiv+N_ul);
|
141
151
|
}
|
@@ -148,9 +158,14 @@ inline int getrf_nothrow(const int M, const int N, DType* A, const int lda, int*
|
|
148
158
|
|
149
159
|
nm::math::laswp<DType>(N_ul, A, lda, N_ul, MN, ipiv, 1); /* apply pivots */
|
150
160
|
|
151
|
-
} else if (MN == 1) { // there's another case for the colmajor version, but
|
161
|
+
} else if (MN == 1) { // there's another case for the colmajor version, but it doesn't seem to be necessary.
|
152
162
|
|
153
|
-
int i
|
163
|
+
int i;
|
164
|
+
if (RowMajor) {
|
165
|
+
i = *ipiv = nm::math::imax<DType>(N, A, 1); // cblas_iamax(N, A, 1);
|
166
|
+
} else {
|
167
|
+
i = *ipiv = nm::math::imax<DType>(M, A, 1);
|
168
|
+
}
|
154
169
|
|
155
170
|
DType tmp = A[i];
|
156
171
|
if (tmp != 0) {
|
data/ext/nmatrix/math/getrs.h
CHANGED
data/ext/nmatrix/math/imax.h
CHANGED
@@ -69,28 +69,6 @@ inline int imax(const int n, const DType *x, const int incx) {
|
|
69
69
|
return imax;
|
70
70
|
}
|
71
71
|
|
72
|
-
#if defined HAVE_CBLAS_H || defined HAVE_ATLAS_CBLAS_H
|
73
|
-
template<>
|
74
|
-
inline int imax(const int n, const float* x, const int incx) {
|
75
|
-
return cblas_isamax(n, x, incx);
|
76
|
-
}
|
77
|
-
|
78
|
-
template<>
|
79
|
-
inline int imax(const int n, const double* x, const int incx) {
|
80
|
-
return cblas_idamax(n, x, incx);
|
81
|
-
}
|
82
|
-
|
83
|
-
template<>
|
84
|
-
inline int imax(const int n, const Complex64* x, const int incx) {
|
85
|
-
return cblas_icamax(n, x, incx);
|
86
|
-
}
|
87
|
-
|
88
|
-
template <>
|
89
|
-
inline int imax(const int n, const Complex128* x, const int incx) {
|
90
|
-
return cblas_izamax(n, x, incx);
|
91
|
-
}
|
92
|
-
#endif
|
93
|
-
|
94
72
|
template<typename DType>
|
95
73
|
inline int cblas_imax(const int n, const void* x, const int incx) {
|
96
74
|
return imax<DType>(n, reinterpret_cast<const DType*>(x), incx);
|
@@ -42,9 +42,6 @@ namespace nm { namespace math {
|
|
42
42
|
template <> struct LongDType<double> { typedef double type; };
|
43
43
|
template <> struct LongDType<Complex64> { typedef Complex128 type; };
|
44
44
|
template <> struct LongDType<Complex128> { typedef Complex128 type; };
|
45
|
-
template <> struct LongDType<Rational32> { typedef Rational128 type; };
|
46
|
-
template <> struct LongDType<Rational64> { typedef Rational128 type; };
|
47
|
-
template <> struct LongDType<Rational128> { typedef Rational128 type; };
|
48
45
|
template <> struct LongDType<RubyObject> { typedef RubyObject type; };
|
49
46
|
|
50
47
|
}} // end of namespace nm::math
|
data/ext/nmatrix/math/math.h
CHANGED
@@ -68,19 +68,7 @@
|
|
68
68
|
* Standard Includes
|
69
69
|
*/
|
70
70
|
|
71
|
-
|
72
|
-
#if defined HAVE_CBLAS_H
|
73
|
-
#include <cblas.h>
|
74
|
-
#elif defined HAVE_ATLAS_CBLAS_H
|
75
|
-
#include <atlas/cblas.h>
|
76
|
-
#endif
|
77
|
-
|
78
|
-
#if defined HAVE_CLAPACK_H
|
79
|
-
#include <clapack.h>
|
80
|
-
#elif defined HAVE_ATLAS_CLAPACK_H
|
81
|
-
#include <atlas/clapack.h>
|
82
|
-
#endif
|
83
|
-
}
|
71
|
+
#include "cblas_enums.h"
|
84
72
|
|
85
73
|
#include <algorithm> // std::min, std::max
|
86
74
|
#include <limits> // std::numeric_limits
|
@@ -103,11 +91,18 @@ extern "C" {
|
|
103
91
|
/*
|
104
92
|
* C accessors.
|
105
93
|
*/
|
106
|
-
|
107
|
-
void nm_math_inverse(const int M, void* A_elements, nm::dtype_t dtype);
|
108
|
-
void nm_math_inverse_exact(const int M, const void* A_elements, const int lda, void* B_elements, const int ldb, nm::dtype_t dtype);
|
94
|
+
|
109
95
|
void nm_math_transpose_generic(const size_t M, const size_t N, const void* A, const int lda, void* B, const int ldb, size_t element_size);
|
110
96
|
void nm_math_init_blas(void);
|
97
|
+
|
98
|
+
/*
|
99
|
+
* Pure math implementations.
|
100
|
+
*/
|
101
|
+
void nm_math_solve(VALUE lu, VALUE b, VALUE x, VALUE ipiv);
|
102
|
+
void nm_math_inverse(const int M, void* A_elements, nm::dtype_t dtype);
|
103
|
+
void nm_math_hessenberg(VALUE a);
|
104
|
+
void nm_math_det_exact(const int M, const void* elements, const int lda, nm::dtype_t dtype, void* result);
|
105
|
+
void nm_math_inverse_exact(const int M, const void* A_elements, const int lda, void* B_elements, const int ldb, nm::dtype_t dtype);
|
111
106
|
}
|
112
107
|
|
113
108
|
|
@@ -123,94 +118,6 @@ namespace nm {
|
|
123
118
|
* Functions
|
124
119
|
*/
|
125
120
|
|
126
|
-
|
127
|
-
template <typename DType>
|
128
|
-
inline void syrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
|
129
|
-
const int K, const DType* alpha, const DType* A, const int lda, const DType* beta, DType* C, const int ldc) {
|
130
|
-
rb_raise(rb_eNotImpError, "syrk not yet implemented for non-BLAS dtypes");
|
131
|
-
}
|
132
|
-
|
133
|
-
template <typename DType>
|
134
|
-
inline void herk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
|
135
|
-
const int K, const DType* alpha, const DType* A, const int lda, const DType* beta, DType* C, const int ldc) {
|
136
|
-
rb_raise(rb_eNotImpError, "herk not yet implemented for non-BLAS dtypes");
|
137
|
-
}
|
138
|
-
|
139
|
-
template <>
|
140
|
-
inline void syrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
|
141
|
-
const int K, const float* alpha, const float* A, const int lda, const float* beta, float* C, const int ldc) {
|
142
|
-
cblas_ssyrk(Order, Uplo, Trans, N, K, *alpha, A, lda, *beta, C, ldc);
|
143
|
-
}
|
144
|
-
|
145
|
-
template <>
|
146
|
-
inline void syrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
|
147
|
-
const int K, const double* alpha, const double* A, const int lda, const double* beta, double* C, const int ldc) {
|
148
|
-
cblas_dsyrk(Order, Uplo, Trans, N, K, *alpha, A, lda, *beta, C, ldc);
|
149
|
-
}
|
150
|
-
|
151
|
-
template <>
|
152
|
-
inline void syrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
|
153
|
-
const int K, const Complex64* alpha, const Complex64* A, const int lda, const Complex64* beta, Complex64* C, const int ldc) {
|
154
|
-
cblas_csyrk(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc);
|
155
|
-
}
|
156
|
-
|
157
|
-
template <>
|
158
|
-
inline void syrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
|
159
|
-
const int K, const Complex128* alpha, const Complex128* A, const int lda, const Complex128* beta, Complex128* C, const int ldc) {
|
160
|
-
cblas_zsyrk(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc);
|
161
|
-
}
|
162
|
-
|
163
|
-
|
164
|
-
template <>
|
165
|
-
inline void herk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
|
166
|
-
const int K, const Complex64* alpha, const Complex64* A, const int lda, const Complex64* beta, Complex64* C, const int ldc) {
|
167
|
-
cblas_cherk(Order, Uplo, Trans, N, K, alpha->r, A, lda, beta->r, C, ldc);
|
168
|
-
}
|
169
|
-
|
170
|
-
template <>
|
171
|
-
inline void herk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
|
172
|
-
const int K, const Complex128* alpha, const Complex128* A, const int lda, const Complex128* beta, Complex128* C, const int ldc) {
|
173
|
-
cblas_zherk(Order, Uplo, Trans, N, K, alpha->r, A, lda, beta->r, C, ldc);
|
174
|
-
}
|
175
|
-
|
176
|
-
|
177
|
-
template <typename DType>
|
178
|
-
inline void trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
179
|
-
const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const DType* alpha,
|
180
|
-
const DType* A, const int lda, DType* B, const int ldb) {
|
181
|
-
rb_raise(rb_eNotImpError, "trmm not yet implemented for non-BLAS dtypes");
|
182
|
-
}
|
183
|
-
|
184
|
-
template <>
|
185
|
-
inline void trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
186
|
-
const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const float* alpha,
|
187
|
-
const float* A, const int lda, float* B, const int ldb) {
|
188
|
-
cblas_strmm(order, side, uplo, ta, diag, m, n, *alpha, A, lda, B, ldb);
|
189
|
-
}
|
190
|
-
|
191
|
-
template <>
|
192
|
-
inline void trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
193
|
-
const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const double* alpha,
|
194
|
-
const double* A, const int lda, double* B, const int ldb) {
|
195
|
-
cblas_dtrmm(order, side, uplo, ta, diag, m, n, *alpha, A, lda, B, ldb);
|
196
|
-
}
|
197
|
-
|
198
|
-
template <>
|
199
|
-
inline void trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
200
|
-
const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const Complex64* alpha,
|
201
|
-
const Complex64* A, const int lda, Complex64* B, const int ldb) {
|
202
|
-
cblas_ctrmm(order, side, uplo, ta, diag, m, n, alpha, A, lda, B, ldb);
|
203
|
-
}
|
204
|
-
|
205
|
-
template <>
|
206
|
-
inline void trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
207
|
-
const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const Complex128* alpha,
|
208
|
-
const Complex128* A, const int lda, Complex128* B, const int ldb) {
|
209
|
-
cblas_ztrmm(order, side, uplo, ta, diag, m, n, alpha, A, lda, B, ldb);
|
210
|
-
}
|
211
|
-
|
212
|
-
|
213
|
-
|
214
121
|
// Yale: numeric matrix multiply c=a*b
|
215
122
|
template <typename DType>
|
216
123
|
inline void numbmm(const unsigned int n, const unsigned int m, const unsigned int l, const IType* ia, const IType* ja, const DType* a, const bool diaga,
|
@@ -502,74 +409,6 @@ inline void smmp_sort_columns(const size_t n, const IType* ia, IType* ja, DType*
|
|
502
409
|
}
|
503
410
|
|
504
411
|
|
505
|
-
/*
|
506
|
-
* From ATLAS 3.8.0:
|
507
|
-
*
|
508
|
-
* Computes one of two LU factorizations based on the setting of the Order
|
509
|
-
* parameter, as follows:
|
510
|
-
* ----------------------------------------------------------------------------
|
511
|
-
* Order == CblasColMajor
|
512
|
-
* Column-major factorization of form
|
513
|
-
* A = P * L * U
|
514
|
-
* where P is a row-permutation matrix, L is lower triangular with unit
|
515
|
-
* diagonal elements (lower trapazoidal if M > N), and U is upper triangular
|
516
|
-
* (upper trapazoidal if M < N).
|
517
|
-
*
|
518
|
-
* ----------------------------------------------------------------------------
|
519
|
-
* Order == CblasRowMajor
|
520
|
-
* Row-major factorization of form
|
521
|
-
* A = P * L * U
|
522
|
-
* where P is a column-permutation matrix, L is lower triangular (lower
|
523
|
-
* trapazoidal if M > N), and U is upper triangular with unit diagonals (upper
|
524
|
-
* trapazoidal if M < N).
|
525
|
-
*
|
526
|
-
* ============================================================================
|
527
|
-
* Let IERR be the return value of the function:
|
528
|
-
* If IERR == 0, successful exit.
|
529
|
-
* If (IERR < 0) the -IERR argument had an illegal value
|
530
|
-
* If (IERR > 0 && Order == CblasColMajor)
|
531
|
-
* U(i-1,i-1) is exactly zero. The factorization has been completed,
|
532
|
-
* but the factor U is exactly singular, and division by zero will
|
533
|
-
* occur if it is used to solve a system of equations.
|
534
|
-
* If (IERR > 0 && Order == CblasRowMajor)
|
535
|
-
* L(i-1,i-1) is exactly zero. The factorization has been completed,
|
536
|
-
* but the factor L is exactly singular, and division by zero will
|
537
|
-
* occur if it is used to solve a system of equations.
|
538
|
-
*/
|
539
|
-
template <typename DType>
|
540
|
-
inline int potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, DType* A, const int lda) {
|
541
|
-
#if defined HAVE_CLAPACK_H || defined HAVE_ATLAS_CLAPACK_H
|
542
|
-
rb_raise(rb_eNotImpError, "not yet implemented for non-BLAS dtypes");
|
543
|
-
#else
|
544
|
-
rb_raise(rb_eNotImpError, "only CLAPACK version implemented thus far");
|
545
|
-
#endif
|
546
|
-
return 0;
|
547
|
-
}
|
548
|
-
|
549
|
-
#if defined HAVE_CLAPACK_H || defined HAVE_ATLAS_CLAPACK_H
|
550
|
-
template <>
|
551
|
-
inline int potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, float* A, const int lda) {
|
552
|
-
return clapack_spotrf(order, uplo, N, A, lda);
|
553
|
-
}
|
554
|
-
|
555
|
-
template <>
|
556
|
-
inline int potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, double* A, const int lda) {
|
557
|
-
return clapack_dpotrf(order, uplo, N, A, lda);
|
558
|
-
}
|
559
|
-
|
560
|
-
template <>
|
561
|
-
inline int potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, Complex64* A, const int lda) {
|
562
|
-
return clapack_cpotrf(order, uplo, N, reinterpret_cast<void*>(A), lda);
|
563
|
-
}
|
564
|
-
|
565
|
-
template <>
|
566
|
-
inline int potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, Complex128* A, const int lda) {
|
567
|
-
return clapack_zpotrf(order, uplo, N, reinterpret_cast<void*>(A), lda);
|
568
|
-
}
|
569
|
-
#endif
|
570
|
-
|
571
|
-
|
572
|
-
|
573
412
|
// Copies an upper row-major array from U, zeroing U; U is unit, so diagonal is not copied.
|
574
413
|
//
|
575
414
|
// From ATLAS 3.8.0.
|
@@ -875,110 +714,6 @@ int getri(const int N, DType* A, const int lda, const int* ipiv, DType* wrk, con
|
|
875
714
|
}
|
876
715
|
*/
|
877
716
|
|
878
|
-
|
879
|
-
|
880
|
-
template <bool is_complex, typename DType>
|
881
|
-
inline void lauum(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, DType* A, const int lda) {
|
882
|
-
|
883
|
-
int Nleft, Nright;
|
884
|
-
const DType ONE = 1;
|
885
|
-
DType *G, *U0 = A, *U1;
|
886
|
-
|
887
|
-
if (N > 1) {
|
888
|
-
Nleft = N >> 1;
|
889
|
-
#ifdef NB
|
890
|
-
if (Nleft > NB) Nleft = ATL_MulByNB(ATL_DivByNB(Nleft));
|
891
|
-
#endif
|
892
|
-
|
893
|
-
Nright = N - Nleft;
|
894
|
-
|
895
|
-
// FIXME: There's a simpler way to write this next block, but I'm way too tired to work it out right now.
|
896
|
-
if (uplo == CblasUpper) {
|
897
|
-
if (order == CblasRowMajor) {
|
898
|
-
G = A + Nleft;
|
899
|
-
U1 = G + Nleft * lda;
|
900
|
-
} else {
|
901
|
-
G = A + Nleft * lda;
|
902
|
-
U1 = G + Nleft;
|
903
|
-
}
|
904
|
-
} else {
|
905
|
-
if (order == CblasRowMajor) {
|
906
|
-
G = A + Nleft * lda;
|
907
|
-
U1 = G + Nleft;
|
908
|
-
} else {
|
909
|
-
G = A + Nleft;
|
910
|
-
U1 = G + Nleft * lda;
|
911
|
-
}
|
912
|
-
}
|
913
|
-
|
914
|
-
lauum<is_complex, DType>(order, uplo, Nleft, U0, lda);
|
915
|
-
|
916
|
-
if (is_complex) {
|
917
|
-
|
918
|
-
nm::math::herk<DType>(order, uplo,
|
919
|
-
uplo == CblasLower ? CblasConjTrans : CblasNoTrans,
|
920
|
-
Nleft, Nright, &ONE, G, lda, &ONE, U0, lda);
|
921
|
-
|
922
|
-
nm::math::trmm<DType>(order, CblasLeft, uplo, CblasConjTrans, CblasNonUnit, Nright, Nleft, &ONE, U1, lda, G, lda);
|
923
|
-
} else {
|
924
|
-
nm::math::syrk<DType>(order, uplo,
|
925
|
-
uplo == CblasLower ? CblasTrans : CblasNoTrans,
|
926
|
-
Nleft, Nright, &ONE, G, lda, &ONE, U0, lda);
|
927
|
-
|
928
|
-
nm::math::trmm<DType>(order, CblasLeft, uplo, CblasTrans, CblasNonUnit, Nright, Nleft, &ONE, U1, lda, G, lda);
|
929
|
-
}
|
930
|
-
lauum<is_complex, DType>(order, uplo, Nright, U1, lda);
|
931
|
-
|
932
|
-
} else {
|
933
|
-
*A = *A * *A;
|
934
|
-
}
|
935
|
-
}
|
936
|
-
|
937
|
-
|
938
|
-
#if defined HAVE_CLAPACK_H || defined HAVE_ATLAS_CLAPACK_H
|
939
|
-
template <bool is_complex>
|
940
|
-
inline void lauum(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, float* A, const int lda) {
|
941
|
-
clapack_slauum(order, uplo, N, A, lda);
|
942
|
-
}
|
943
|
-
|
944
|
-
template <bool is_complex>
|
945
|
-
inline void lauum(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, double* A, const int lda) {
|
946
|
-
clapack_dlauum(order, uplo, N, A, lda);
|
947
|
-
}
|
948
|
-
|
949
|
-
template <bool is_complex>
|
950
|
-
inline void lauum(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, Complex64* A, const int lda) {
|
951
|
-
clapack_clauum(order, uplo, N, A, lda);
|
952
|
-
}
|
953
|
-
|
954
|
-
template <bool is_complex>
|
955
|
-
inline void lauum(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, Complex128* A, const int lda) {
|
956
|
-
clapack_zlauum(order, uplo, N, A, lda);
|
957
|
-
}
|
958
|
-
#endif
|
959
|
-
|
960
|
-
|
961
|
-
/*
|
962
|
-
* Function signature conversion for calling LAPACK's lauum functions as directly as possible.
|
963
|
-
*
|
964
|
-
* For documentation: http://www.netlib.org/lapack/double/dlauum.f
|
965
|
-
*
|
966
|
-
* This function should normally go in math.cpp, but we need it to be available to nmatrix.cpp.
|
967
|
-
*/
|
968
|
-
template <bool is_complex, typename DType>
|
969
|
-
inline int clapack_lauum(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, void* a, const int lda) {
|
970
|
-
if (n < 0) rb_raise(rb_eArgError, "n cannot be less than zero, is set to %d", n);
|
971
|
-
if (lda < n || lda < 1) rb_raise(rb_eArgError, "lda must be >= max(n,1); lda=%d, n=%d\n", lda, n);
|
972
|
-
|
973
|
-
if (uplo == CblasUpper) lauum<is_complex, DType>(order, uplo, n, reinterpret_cast<DType*>(a), lda);
|
974
|
-
else lauum<is_complex, DType>(order, uplo, n, reinterpret_cast<DType*>(a), lda);
|
975
|
-
|
976
|
-
return 0;
|
977
|
-
}
|
978
|
-
|
979
|
-
|
980
|
-
|
981
|
-
|
982
717
|
/*
|
983
718
|
* Macro for declaring LAPACK specializations of the getrf function.
|
984
719
|
*
|
@@ -1003,67 +738,6 @@ LAPACK_GETRF(Complex64, clapack_cgetrf, void)
|
|
1003
738
|
LAPACK_GETRF(Complex128, clapack_zgetrf, void)
|
1004
739
|
*/
|
1005
740
|
|
1006
|
-
|
1007
|
-
|
1008
|
-
/*
|
1009
|
-
* Function signature conversion for calling LAPACK's potrf functions as directly as possible.
|
1010
|
-
*
|
1011
|
-
* For documentation: http://www.netlib.org/lapack/double/dpotrf.f
|
1012
|
-
*
|
1013
|
-
* This function should normally go in math.cpp, but we need it to be available to nmatrix.cpp.
|
1014
|
-
*/
|
1015
|
-
template <typename DType>
|
1016
|
-
inline int clapack_potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, void* a, const int lda) {
|
1017
|
-
return potrf<DType>(order, uplo, n, reinterpret_cast<DType*>(a), lda);
|
1018
|
-
}
|
1019
|
-
|
1020
|
-
|
1021
|
-
|
1022
|
-
template <typename DType>
|
1023
|
-
inline int potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, DType* a, const int lda) {
|
1024
|
-
rb_raise(rb_eNotImpError, "potri not yet implemented for non-BLAS dtypes");
|
1025
|
-
return 0;
|
1026
|
-
}
|
1027
|
-
|
1028
|
-
|
1029
|
-
#if defined HAVE_CLAPACK_H || defined HAVE_ATLAS_CLAPACK_H
|
1030
|
-
template <>
|
1031
|
-
inline int potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, float* a, const int lda) {
|
1032
|
-
return clapack_spotri(order, uplo, n, a, lda);
|
1033
|
-
}
|
1034
|
-
|
1035
|
-
template <>
|
1036
|
-
inline int potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, double* a, const int lda) {
|
1037
|
-
return clapack_dpotri(order, uplo, n, a, lda);
|
1038
|
-
}
|
1039
|
-
|
1040
|
-
template <>
|
1041
|
-
inline int potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, Complex64* a, const int lda) {
|
1042
|
-
return clapack_cpotri(order, uplo, n, reinterpret_cast<void*>(a), lda);
|
1043
|
-
}
|
1044
|
-
|
1045
|
-
template <>
|
1046
|
-
inline int potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, Complex128* a, const int lda) {
|
1047
|
-
return clapack_zpotri(order, uplo, n, reinterpret_cast<void*>(a), lda);
|
1048
|
-
}
|
1049
|
-
#endif
|
1050
|
-
|
1051
|
-
|
1052
|
-
/*
|
1053
|
-
* Function signature conversion for calling LAPACK's potri functions as directly as possible.
|
1054
|
-
*
|
1055
|
-
* For documentation: http://www.netlib.org/lapack/double/dpotri.f
|
1056
|
-
*
|
1057
|
-
* This function should normally go in math.cpp, but we need it to be available to nmatrix.cpp.
|
1058
|
-
*/
|
1059
|
-
template <typename DType>
|
1060
|
-
inline int clapack_potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, void* a, const int lda) {
|
1061
|
-
return potri<DType>(order, uplo, n, reinterpret_cast<DType*>(a), lda);
|
1062
|
-
}
|
1063
|
-
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
741
|
}} // end namespace nm::math
|
1068
742
|
|
1069
743
|
|