nmatrix 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nmatrix/data/complex.h +20 -55
- data/ext/nmatrix/data/data.cpp +11 -44
- data/ext/nmatrix/data/data.h +174 -311
- data/ext/nmatrix/data/meta.h +1 -7
- data/ext/nmatrix/data/ruby_object.h +3 -85
- data/ext/nmatrix/extconf.rb +2 -73
- data/ext/nmatrix/math.cpp +170 -813
- data/ext/nmatrix/math/asum.h +2 -25
- data/ext/nmatrix/math/{inc.h → cblas_enums.h} +11 -22
- data/ext/nmatrix/math/cblas_templates_core.h +507 -0
- data/ext/nmatrix/math/gemm.h +2 -32
- data/ext/nmatrix/math/gemv.h +1 -35
- data/ext/nmatrix/math/getrf.h +21 -6
- data/ext/nmatrix/math/getrs.h +0 -8
- data/ext/nmatrix/math/imax.h +0 -22
- data/ext/nmatrix/math/long_dtype.h +0 -3
- data/ext/nmatrix/math/math.h +11 -337
- data/ext/nmatrix/math/nrm2.h +2 -23
- data/ext/nmatrix/math/rot.h +1 -25
- data/ext/nmatrix/math/rotg.h +4 -13
- data/ext/nmatrix/math/scal.h +0 -22
- data/ext/nmatrix/math/trsm.h +0 -55
- data/ext/nmatrix/math/util.h +148 -0
- data/ext/nmatrix/nmatrix.cpp +0 -14
- data/ext/nmatrix/nmatrix.h +92 -84
- data/ext/nmatrix/ruby_constants.cpp +0 -2
- data/ext/nmatrix/ruby_constants.h +0 -2
- data/ext/nmatrix/ruby_nmatrix.c +86 -45
- data/ext/nmatrix/storage/dense/dense.cpp +1 -7
- data/ext/nmatrix/storage/storage.h +0 -1
- data/ext/nmatrix/ttable_helper.rb +0 -6
- data/ext/nmatrix/util/io.cpp +1 -1
- data/lib/nmatrix.rb +1 -19
- data/lib/nmatrix/blas.rb +33 -11
- data/lib/nmatrix/io/market.rb +3 -3
- data/lib/nmatrix/lapack_core.rb +181 -0
- data/lib/nmatrix/lapack_plugin.rb +44 -0
- data/lib/nmatrix/math.rb +382 -131
- data/lib/nmatrix/monkeys.rb +2 -3
- data/lib/nmatrix/nmatrix.rb +166 -13
- data/lib/nmatrix/shortcuts.rb +72 -7
- data/lib/nmatrix/version.rb +2 -2
- data/spec/00_nmatrix_spec.rb +154 -5
- data/spec/02_slice_spec.rb +2 -6
- data/spec/03_nmatrix_monkeys_spec.rb +7 -1
- data/spec/blas_spec.rb +60 -33
- data/spec/homogeneous_spec.rb +10 -10
- data/spec/lapack_core_spec.rb +482 -0
- data/spec/math_spec.rb +436 -52
- data/spec/shortcuts_spec.rb +28 -4
- data/spec/spec_helper.rb +14 -2
- data/spec/utm5940.mtx +83844 -0
- metadata +49 -76
- data/.gitignore +0 -27
- data/.rspec +0 -2
- data/.travis.yml +0 -15
- data/CONTRIBUTING.md +0 -82
- data/Gemfile +0 -2
- data/History.txt +0 -677
- data/LICENSE.txt +0 -23
- data/Manifest.txt +0 -92
- data/README.rdoc +0 -150
- data/Rakefile +0 -216
- data/ext/nmatrix/data/rational.h +0 -440
- data/ext/nmatrix/math/geev.h +0 -82
- data/ext/nmatrix/math/ger.h +0 -96
- data/ext/nmatrix/math/gesdd.h +0 -80
- data/ext/nmatrix/math/gesvd.h +0 -78
- data/ext/nmatrix/math/getf2.h +0 -86
- data/ext/nmatrix/math/getri.h +0 -108
- data/ext/nmatrix/math/potrs.h +0 -129
- data/ext/nmatrix/math/swap.h +0 -52
- data/lib/nmatrix/lapack.rb +0 -240
- data/nmatrix.gemspec +0 -55
- data/scripts/mac-brew-gcc.sh +0 -50
- data/scripts/mac-mavericks-brew-gcc.sh +0 -22
- data/spec/lapack_spec.rb +0 -459
data/ext/nmatrix/math/gemm.h
CHANGED
@@ -30,14 +30,8 @@
|
|
30
30
|
#ifndef GEMM_H
|
31
31
|
# define GEMM_H
|
32
32
|
|
33
|
-
|
34
|
-
#
|
35
|
-
#include <cblas.h>
|
36
|
-
#elif defined HAVE_ATLAS_CBLAS_H
|
37
|
-
#include <atlas/cblas.h>
|
38
|
-
#endif
|
39
|
-
}
|
40
|
-
|
33
|
+
#include "cblas_enums.h"
|
34
|
+
#include "math/long_dtype.h"
|
41
35
|
|
42
36
|
namespace nm { namespace math {
|
43
37
|
/*
|
@@ -242,30 +236,6 @@ inline void gemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
|
|
242
236
|
}
|
243
237
|
|
244
238
|
|
245
|
-
template <>
|
246
|
-
inline void gemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
|
247
|
-
const float* alpha, const float* A, const int lda, const float* B, const int ldb, const float* beta, float* C, const int ldc) {
|
248
|
-
cblas_sgemm(Order, TransA, TransB, M, N, K, *alpha, A, lda, B, ldb, *beta, C, ldc);
|
249
|
-
}
|
250
|
-
|
251
|
-
template <>
|
252
|
-
inline void gemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
|
253
|
-
const double* alpha, const double* A, const int lda, const double* B, const int ldb, const double* beta, double* C, const int ldc) {
|
254
|
-
cblas_dgemm(Order, TransA, TransB, M, N, K, *alpha, A, lda, B, ldb, *beta, C, ldc);
|
255
|
-
}
|
256
|
-
|
257
|
-
template <>
|
258
|
-
inline void gemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
|
259
|
-
const Complex64* alpha, const Complex64* A, const int lda, const Complex64* B, const int ldb, const Complex64* beta, Complex64* C, const int ldc) {
|
260
|
-
cblas_cgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
|
261
|
-
}
|
262
|
-
|
263
|
-
template <>
|
264
|
-
inline void gemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
|
265
|
-
const Complex128* alpha, const Complex128* A, const int lda, const Complex128* B, const int ldb, const Complex128* beta, Complex128* C, const int ldc) {
|
266
|
-
cblas_zgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
|
267
|
-
}
|
268
|
-
|
269
239
|
}} // end of namespace nm::math
|
270
240
|
|
271
241
|
#endif // GEMM_H
|
data/ext/nmatrix/math/gemv.h
CHANGED
@@ -30,14 +30,7 @@
|
|
30
30
|
#ifndef GEMV_H
|
31
31
|
# define GEMV_H
|
32
32
|
|
33
|
-
|
34
|
-
#if defined HAVE_CBLAS_H
|
35
|
-
#include <cblas.h>
|
36
|
-
#elif defined HAVE_ATLAS_CBLAS_H
|
37
|
-
#include <atlas/cblas.h>
|
38
|
-
#endif
|
39
|
-
}
|
40
|
-
|
33
|
+
#include "math/long_dtype.h"
|
41
34
|
|
42
35
|
namespace nm { namespace math {
|
43
36
|
|
@@ -179,33 +172,6 @@ inline bool gemv(const enum CBLAS_TRANSPOSE Trans, const int M, const int N, con
|
|
179
172
|
return true;
|
180
173
|
} // end of GEMV
|
181
174
|
|
182
|
-
template <>
|
183
|
-
inline bool gemv(const enum CBLAS_TRANSPOSE Trans, const int M, const int N, const float* alpha, const float* A, const int lda,
|
184
|
-
const float* X, const int incX, const float* beta, float* Y, const int incY) {
|
185
|
-
cblas_sgemv(CblasRowMajor, Trans, M, N, *alpha, A, lda, X, incX, *beta, Y, incY);
|
186
|
-
return true;
|
187
|
-
}
|
188
|
-
|
189
|
-
template <>
|
190
|
-
inline bool gemv(const enum CBLAS_TRANSPOSE Trans, const int M, const int N, const double* alpha, const double* A, const int lda,
|
191
|
-
const double* X, const int incX, const double* beta, double* Y, const int incY) {
|
192
|
-
cblas_dgemv(CblasRowMajor, Trans, M, N, *alpha, A, lda, X, incX, *beta, Y, incY);
|
193
|
-
return true;
|
194
|
-
}
|
195
|
-
|
196
|
-
template <>
|
197
|
-
inline bool gemv(const enum CBLAS_TRANSPOSE Trans, const int M, const int N, const Complex64* alpha, const Complex64* A, const int lda,
|
198
|
-
const Complex64* X, const int incX, const Complex64* beta, Complex64* Y, const int incY) {
|
199
|
-
cblas_cgemv(CblasRowMajor, Trans, M, N, alpha, A, lda, X, incX, beta, Y, incY);
|
200
|
-
return true;
|
201
|
-
}
|
202
|
-
|
203
|
-
template <>
|
204
|
-
inline bool gemv(const enum CBLAS_TRANSPOSE Trans, const int M, const int N, const Complex128* alpha, const Complex128* A, const int lda,
|
205
|
-
const Complex128* X, const int incX, const Complex128* beta, Complex128* Y, const int incY) {
|
206
|
-
cblas_zgemv(CblasRowMajor, Trans, M, N, alpha, A, lda, X, incX, beta, Y, incY);
|
207
|
-
return true;
|
208
|
-
}
|
209
175
|
|
210
176
|
}} // end of namespace nm::math
|
211
177
|
|
data/ext/nmatrix/math/getrf.h
CHANGED
@@ -59,6 +59,13 @@
|
|
59
59
|
#ifndef GETRF_H
|
60
60
|
#define GETRF_H
|
61
61
|
|
62
|
+
#include "math/laswp.h"
|
63
|
+
#include "math/math.h"
|
64
|
+
#include "math/trsm.h"
|
65
|
+
#include "math/gemm.h"
|
66
|
+
#include "math/imax.h"
|
67
|
+
#include "math/scal.h"
|
68
|
+
|
62
69
|
namespace nm { namespace math {
|
63
70
|
|
64
71
|
/* Numeric inverse -- usually just 1 / f, but a little more complicated for complex. */
|
@@ -69,8 +76,6 @@ inline DType numeric_inverse(const DType& n) {
|
|
69
76
|
template <> inline float numeric_inverse(const float& n) { return 1 / n; }
|
70
77
|
template <> inline double numeric_inverse(const double& n) { return 1 / n; }
|
71
78
|
|
72
|
-
|
73
|
-
|
74
79
|
/*
|
75
80
|
* Templated version of row-order and column-order getrf, derived from ATL_getrfR.c (from ATLAS 3.8.0).
|
76
81
|
*
|
@@ -109,7 +114,12 @@ inline int getrf_nothrow(const int M, const int N, DType* A, const int lda, int*
|
|
109
114
|
if (N_ul > NB) N_ul = ATL_MulByNB(ATL_DivByNB(N_ul));
|
110
115
|
#endif
|
111
116
|
|
112
|
-
int N_dr
|
117
|
+
int N_dr;
|
118
|
+
if (RowMajor) {
|
119
|
+
N_dr = M - N_ul;
|
120
|
+
} else {
|
121
|
+
N_dr = N - N_ul;
|
122
|
+
}
|
113
123
|
|
114
124
|
int i = RowMajor ? getrf_nothrow<true,DType>(N_ul, N, A, lda, ipiv) : getrf_nothrow<false,DType>(M, N_ul, A, lda, ipiv);
|
115
125
|
|
@@ -135,7 +145,7 @@ inline int getrf_nothrow(const int M, const int N, DType* A, const int lda, int*
|
|
135
145
|
nm::math::laswp<DType>(N_dr, Ac, lda, 0, N_ul, ipiv, 1);
|
136
146
|
|
137
147
|
nm::math::trsm<DType>(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, N_ul, N_dr, one, A, lda, Ac, lda);
|
138
|
-
nm::math::gemm<DType>(CblasColMajor, CblasNoTrans, CblasNoTrans, M-N_ul, N_dr, N_ul, &neg_one,
|
148
|
+
nm::math::gemm<DType>(CblasColMajor, CblasNoTrans, CblasNoTrans, M-N_ul, N_dr, N_ul, &neg_one, &(A[N_ul]), lda, Ac, lda, &one, An, lda);
|
139
149
|
|
140
150
|
i = getrf_nothrow<false,DType>(M-N_ul, N_dr, An, lda, ipiv+N_ul);
|
141
151
|
}
|
@@ -148,9 +158,14 @@ inline int getrf_nothrow(const int M, const int N, DType* A, const int lda, int*
|
|
148
158
|
|
149
159
|
nm::math::laswp<DType>(N_ul, A, lda, N_ul, MN, ipiv, 1); /* apply pivots */
|
150
160
|
|
151
|
-
} else if (MN == 1) { // there's another case for the colmajor version, but
|
161
|
+
} else if (MN == 1) { // there's another case for the colmajor version, but it doesn't seem to be necessary.
|
152
162
|
|
153
|
-
int i
|
163
|
+
int i;
|
164
|
+
if (RowMajor) {
|
165
|
+
i = *ipiv = nm::math::imax<DType>(N, A, 1); // cblas_iamax(N, A, 1);
|
166
|
+
} else {
|
167
|
+
i = *ipiv = nm::math::imax<DType>(M, A, 1);
|
168
|
+
}
|
154
169
|
|
155
170
|
DType tmp = A[i];
|
156
171
|
if (tmp != 0) {
|
data/ext/nmatrix/math/getrs.h
CHANGED
data/ext/nmatrix/math/imax.h
CHANGED
@@ -69,28 +69,6 @@ inline int imax(const int n, const DType *x, const int incx) {
|
|
69
69
|
return imax;
|
70
70
|
}
|
71
71
|
|
72
|
-
#if defined HAVE_CBLAS_H || defined HAVE_ATLAS_CBLAS_H
|
73
|
-
template<>
|
74
|
-
inline int imax(const int n, const float* x, const int incx) {
|
75
|
-
return cblas_isamax(n, x, incx);
|
76
|
-
}
|
77
|
-
|
78
|
-
template<>
|
79
|
-
inline int imax(const int n, const double* x, const int incx) {
|
80
|
-
return cblas_idamax(n, x, incx);
|
81
|
-
}
|
82
|
-
|
83
|
-
template<>
|
84
|
-
inline int imax(const int n, const Complex64* x, const int incx) {
|
85
|
-
return cblas_icamax(n, x, incx);
|
86
|
-
}
|
87
|
-
|
88
|
-
template <>
|
89
|
-
inline int imax(const int n, const Complex128* x, const int incx) {
|
90
|
-
return cblas_izamax(n, x, incx);
|
91
|
-
}
|
92
|
-
#endif
|
93
|
-
|
94
72
|
template<typename DType>
|
95
73
|
inline int cblas_imax(const int n, const void* x, const int incx) {
|
96
74
|
return imax<DType>(n, reinterpret_cast<const DType*>(x), incx);
|
@@ -42,9 +42,6 @@ namespace nm { namespace math {
|
|
42
42
|
template <> struct LongDType<double> { typedef double type; };
|
43
43
|
template <> struct LongDType<Complex64> { typedef Complex128 type; };
|
44
44
|
template <> struct LongDType<Complex128> { typedef Complex128 type; };
|
45
|
-
template <> struct LongDType<Rational32> { typedef Rational128 type; };
|
46
|
-
template <> struct LongDType<Rational64> { typedef Rational128 type; };
|
47
|
-
template <> struct LongDType<Rational128> { typedef Rational128 type; };
|
48
45
|
template <> struct LongDType<RubyObject> { typedef RubyObject type; };
|
49
46
|
|
50
47
|
}} // end of namespace nm::math
|
data/ext/nmatrix/math/math.h
CHANGED
@@ -68,19 +68,7 @@
|
|
68
68
|
* Standard Includes
|
69
69
|
*/
|
70
70
|
|
71
|
-
|
72
|
-
#if defined HAVE_CBLAS_H
|
73
|
-
#include <cblas.h>
|
74
|
-
#elif defined HAVE_ATLAS_CBLAS_H
|
75
|
-
#include <atlas/cblas.h>
|
76
|
-
#endif
|
77
|
-
|
78
|
-
#if defined HAVE_CLAPACK_H
|
79
|
-
#include <clapack.h>
|
80
|
-
#elif defined HAVE_ATLAS_CLAPACK_H
|
81
|
-
#include <atlas/clapack.h>
|
82
|
-
#endif
|
83
|
-
}
|
71
|
+
#include "cblas_enums.h"
|
84
72
|
|
85
73
|
#include <algorithm> // std::min, std::max
|
86
74
|
#include <limits> // std::numeric_limits
|
@@ -103,11 +91,18 @@ extern "C" {
|
|
103
91
|
/*
|
104
92
|
* C accessors.
|
105
93
|
*/
|
106
|
-
|
107
|
-
void nm_math_inverse(const int M, void* A_elements, nm::dtype_t dtype);
|
108
|
-
void nm_math_inverse_exact(const int M, const void* A_elements, const int lda, void* B_elements, const int ldb, nm::dtype_t dtype);
|
94
|
+
|
109
95
|
void nm_math_transpose_generic(const size_t M, const size_t N, const void* A, const int lda, void* B, const int ldb, size_t element_size);
|
110
96
|
void nm_math_init_blas(void);
|
97
|
+
|
98
|
+
/*
|
99
|
+
* Pure math implementations.
|
100
|
+
*/
|
101
|
+
void nm_math_solve(VALUE lu, VALUE b, VALUE x, VALUE ipiv);
|
102
|
+
void nm_math_inverse(const int M, void* A_elements, nm::dtype_t dtype);
|
103
|
+
void nm_math_hessenberg(VALUE a);
|
104
|
+
void nm_math_det_exact(const int M, const void* elements, const int lda, nm::dtype_t dtype, void* result);
|
105
|
+
void nm_math_inverse_exact(const int M, const void* A_elements, const int lda, void* B_elements, const int ldb, nm::dtype_t dtype);
|
111
106
|
}
|
112
107
|
|
113
108
|
|
@@ -123,94 +118,6 @@ namespace nm {
|
|
123
118
|
* Functions
|
124
119
|
*/
|
125
120
|
|
126
|
-
|
127
|
-
template <typename DType>
|
128
|
-
inline void syrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
|
129
|
-
const int K, const DType* alpha, const DType* A, const int lda, const DType* beta, DType* C, const int ldc) {
|
130
|
-
rb_raise(rb_eNotImpError, "syrk not yet implemented for non-BLAS dtypes");
|
131
|
-
}
|
132
|
-
|
133
|
-
template <typename DType>
|
134
|
-
inline void herk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
|
135
|
-
const int K, const DType* alpha, const DType* A, const int lda, const DType* beta, DType* C, const int ldc) {
|
136
|
-
rb_raise(rb_eNotImpError, "herk not yet implemented for non-BLAS dtypes");
|
137
|
-
}
|
138
|
-
|
139
|
-
template <>
|
140
|
-
inline void syrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
|
141
|
-
const int K, const float* alpha, const float* A, const int lda, const float* beta, float* C, const int ldc) {
|
142
|
-
cblas_ssyrk(Order, Uplo, Trans, N, K, *alpha, A, lda, *beta, C, ldc);
|
143
|
-
}
|
144
|
-
|
145
|
-
template <>
|
146
|
-
inline void syrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
|
147
|
-
const int K, const double* alpha, const double* A, const int lda, const double* beta, double* C, const int ldc) {
|
148
|
-
cblas_dsyrk(Order, Uplo, Trans, N, K, *alpha, A, lda, *beta, C, ldc);
|
149
|
-
}
|
150
|
-
|
151
|
-
template <>
|
152
|
-
inline void syrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
|
153
|
-
const int K, const Complex64* alpha, const Complex64* A, const int lda, const Complex64* beta, Complex64* C, const int ldc) {
|
154
|
-
cblas_csyrk(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc);
|
155
|
-
}
|
156
|
-
|
157
|
-
template <>
|
158
|
-
inline void syrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
|
159
|
-
const int K, const Complex128* alpha, const Complex128* A, const int lda, const Complex128* beta, Complex128* C, const int ldc) {
|
160
|
-
cblas_zsyrk(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc);
|
161
|
-
}
|
162
|
-
|
163
|
-
|
164
|
-
template <>
|
165
|
-
inline void herk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
|
166
|
-
const int K, const Complex64* alpha, const Complex64* A, const int lda, const Complex64* beta, Complex64* C, const int ldc) {
|
167
|
-
cblas_cherk(Order, Uplo, Trans, N, K, alpha->r, A, lda, beta->r, C, ldc);
|
168
|
-
}
|
169
|
-
|
170
|
-
template <>
|
171
|
-
inline void herk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
|
172
|
-
const int K, const Complex128* alpha, const Complex128* A, const int lda, const Complex128* beta, Complex128* C, const int ldc) {
|
173
|
-
cblas_zherk(Order, Uplo, Trans, N, K, alpha->r, A, lda, beta->r, C, ldc);
|
174
|
-
}
|
175
|
-
|
176
|
-
|
177
|
-
template <typename DType>
|
178
|
-
inline void trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
179
|
-
const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const DType* alpha,
|
180
|
-
const DType* A, const int lda, DType* B, const int ldb) {
|
181
|
-
rb_raise(rb_eNotImpError, "trmm not yet implemented for non-BLAS dtypes");
|
182
|
-
}
|
183
|
-
|
184
|
-
template <>
|
185
|
-
inline void trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
186
|
-
const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const float* alpha,
|
187
|
-
const float* A, const int lda, float* B, const int ldb) {
|
188
|
-
cblas_strmm(order, side, uplo, ta, diag, m, n, *alpha, A, lda, B, ldb);
|
189
|
-
}
|
190
|
-
|
191
|
-
template <>
|
192
|
-
inline void trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
193
|
-
const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const double* alpha,
|
194
|
-
const double* A, const int lda, double* B, const int ldb) {
|
195
|
-
cblas_dtrmm(order, side, uplo, ta, diag, m, n, *alpha, A, lda, B, ldb);
|
196
|
-
}
|
197
|
-
|
198
|
-
template <>
|
199
|
-
inline void trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
200
|
-
const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const Complex64* alpha,
|
201
|
-
const Complex64* A, const int lda, Complex64* B, const int ldb) {
|
202
|
-
cblas_ctrmm(order, side, uplo, ta, diag, m, n, alpha, A, lda, B, ldb);
|
203
|
-
}
|
204
|
-
|
205
|
-
template <>
|
206
|
-
inline void trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
207
|
-
const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const Complex128* alpha,
|
208
|
-
const Complex128* A, const int lda, Complex128* B, const int ldb) {
|
209
|
-
cblas_ztrmm(order, side, uplo, ta, diag, m, n, alpha, A, lda, B, ldb);
|
210
|
-
}
|
211
|
-
|
212
|
-
|
213
|
-
|
214
121
|
// Yale: numeric matrix multiply c=a*b
|
215
122
|
template <typename DType>
|
216
123
|
inline void numbmm(const unsigned int n, const unsigned int m, const unsigned int l, const IType* ia, const IType* ja, const DType* a, const bool diaga,
|
@@ -502,74 +409,6 @@ inline void smmp_sort_columns(const size_t n, const IType* ia, IType* ja, DType*
|
|
502
409
|
}
|
503
410
|
|
504
411
|
|
505
|
-
/*
|
506
|
-
* From ATLAS 3.8.0:
|
507
|
-
*
|
508
|
-
* Computes one of two LU factorizations based on the setting of the Order
|
509
|
-
* parameter, as follows:
|
510
|
-
* ----------------------------------------------------------------------------
|
511
|
-
* Order == CblasColMajor
|
512
|
-
* Column-major factorization of form
|
513
|
-
* A = P * L * U
|
514
|
-
* where P is a row-permutation matrix, L is lower triangular with unit
|
515
|
-
* diagonal elements (lower trapazoidal if M > N), and U is upper triangular
|
516
|
-
* (upper trapazoidal if M < N).
|
517
|
-
*
|
518
|
-
* ----------------------------------------------------------------------------
|
519
|
-
* Order == CblasRowMajor
|
520
|
-
* Row-major factorization of form
|
521
|
-
* A = P * L * U
|
522
|
-
* where P is a column-permutation matrix, L is lower triangular (lower
|
523
|
-
* trapazoidal if M > N), and U is upper triangular with unit diagonals (upper
|
524
|
-
* trapazoidal if M < N).
|
525
|
-
*
|
526
|
-
* ============================================================================
|
527
|
-
* Let IERR be the return value of the function:
|
528
|
-
* If IERR == 0, successful exit.
|
529
|
-
* If (IERR < 0) the -IERR argument had an illegal value
|
530
|
-
* If (IERR > 0 && Order == CblasColMajor)
|
531
|
-
* U(i-1,i-1) is exactly zero. The factorization has been completed,
|
532
|
-
* but the factor U is exactly singular, and division by zero will
|
533
|
-
* occur if it is used to solve a system of equations.
|
534
|
-
* If (IERR > 0 && Order == CblasRowMajor)
|
535
|
-
* L(i-1,i-1) is exactly zero. The factorization has been completed,
|
536
|
-
* but the factor L is exactly singular, and division by zero will
|
537
|
-
* occur if it is used to solve a system of equations.
|
538
|
-
*/
|
539
|
-
template <typename DType>
|
540
|
-
inline int potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, DType* A, const int lda) {
|
541
|
-
#if defined HAVE_CLAPACK_H || defined HAVE_ATLAS_CLAPACK_H
|
542
|
-
rb_raise(rb_eNotImpError, "not yet implemented for non-BLAS dtypes");
|
543
|
-
#else
|
544
|
-
rb_raise(rb_eNotImpError, "only CLAPACK version implemented thus far");
|
545
|
-
#endif
|
546
|
-
return 0;
|
547
|
-
}
|
548
|
-
|
549
|
-
#if defined HAVE_CLAPACK_H || defined HAVE_ATLAS_CLAPACK_H
|
550
|
-
template <>
|
551
|
-
inline int potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, float* A, const int lda) {
|
552
|
-
return clapack_spotrf(order, uplo, N, A, lda);
|
553
|
-
}
|
554
|
-
|
555
|
-
template <>
|
556
|
-
inline int potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, double* A, const int lda) {
|
557
|
-
return clapack_dpotrf(order, uplo, N, A, lda);
|
558
|
-
}
|
559
|
-
|
560
|
-
template <>
|
561
|
-
inline int potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, Complex64* A, const int lda) {
|
562
|
-
return clapack_cpotrf(order, uplo, N, reinterpret_cast<void*>(A), lda);
|
563
|
-
}
|
564
|
-
|
565
|
-
template <>
|
566
|
-
inline int potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, Complex128* A, const int lda) {
|
567
|
-
return clapack_zpotrf(order, uplo, N, reinterpret_cast<void*>(A), lda);
|
568
|
-
}
|
569
|
-
#endif
|
570
|
-
|
571
|
-
|
572
|
-
|
573
412
|
// Copies an upper row-major array from U, zeroing U; U is unit, so diagonal is not copied.
|
574
413
|
//
|
575
414
|
// From ATLAS 3.8.0.
|
@@ -875,110 +714,6 @@ int getri(const int N, DType* A, const int lda, const int* ipiv, DType* wrk, con
|
|
875
714
|
}
|
876
715
|
*/
|
877
716
|
|
878
|
-
|
879
|
-
|
880
|
-
template <bool is_complex, typename DType>
|
881
|
-
inline void lauum(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, DType* A, const int lda) {
|
882
|
-
|
883
|
-
int Nleft, Nright;
|
884
|
-
const DType ONE = 1;
|
885
|
-
DType *G, *U0 = A, *U1;
|
886
|
-
|
887
|
-
if (N > 1) {
|
888
|
-
Nleft = N >> 1;
|
889
|
-
#ifdef NB
|
890
|
-
if (Nleft > NB) Nleft = ATL_MulByNB(ATL_DivByNB(Nleft));
|
891
|
-
#endif
|
892
|
-
|
893
|
-
Nright = N - Nleft;
|
894
|
-
|
895
|
-
// FIXME: There's a simpler way to write this next block, but I'm way too tired to work it out right now.
|
896
|
-
if (uplo == CblasUpper) {
|
897
|
-
if (order == CblasRowMajor) {
|
898
|
-
G = A + Nleft;
|
899
|
-
U1 = G + Nleft * lda;
|
900
|
-
} else {
|
901
|
-
G = A + Nleft * lda;
|
902
|
-
U1 = G + Nleft;
|
903
|
-
}
|
904
|
-
} else {
|
905
|
-
if (order == CblasRowMajor) {
|
906
|
-
G = A + Nleft * lda;
|
907
|
-
U1 = G + Nleft;
|
908
|
-
} else {
|
909
|
-
G = A + Nleft;
|
910
|
-
U1 = G + Nleft * lda;
|
911
|
-
}
|
912
|
-
}
|
913
|
-
|
914
|
-
lauum<is_complex, DType>(order, uplo, Nleft, U0, lda);
|
915
|
-
|
916
|
-
if (is_complex) {
|
917
|
-
|
918
|
-
nm::math::herk<DType>(order, uplo,
|
919
|
-
uplo == CblasLower ? CblasConjTrans : CblasNoTrans,
|
920
|
-
Nleft, Nright, &ONE, G, lda, &ONE, U0, lda);
|
921
|
-
|
922
|
-
nm::math::trmm<DType>(order, CblasLeft, uplo, CblasConjTrans, CblasNonUnit, Nright, Nleft, &ONE, U1, lda, G, lda);
|
923
|
-
} else {
|
924
|
-
nm::math::syrk<DType>(order, uplo,
|
925
|
-
uplo == CblasLower ? CblasTrans : CblasNoTrans,
|
926
|
-
Nleft, Nright, &ONE, G, lda, &ONE, U0, lda);
|
927
|
-
|
928
|
-
nm::math::trmm<DType>(order, CblasLeft, uplo, CblasTrans, CblasNonUnit, Nright, Nleft, &ONE, U1, lda, G, lda);
|
929
|
-
}
|
930
|
-
lauum<is_complex, DType>(order, uplo, Nright, U1, lda);
|
931
|
-
|
932
|
-
} else {
|
933
|
-
*A = *A * *A;
|
934
|
-
}
|
935
|
-
}
|
936
|
-
|
937
|
-
|
938
|
-
#if defined HAVE_CLAPACK_H || defined HAVE_ATLAS_CLAPACK_H
|
939
|
-
template <bool is_complex>
|
940
|
-
inline void lauum(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, float* A, const int lda) {
|
941
|
-
clapack_slauum(order, uplo, N, A, lda);
|
942
|
-
}
|
943
|
-
|
944
|
-
template <bool is_complex>
|
945
|
-
inline void lauum(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, double* A, const int lda) {
|
946
|
-
clapack_dlauum(order, uplo, N, A, lda);
|
947
|
-
}
|
948
|
-
|
949
|
-
template <bool is_complex>
|
950
|
-
inline void lauum(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, Complex64* A, const int lda) {
|
951
|
-
clapack_clauum(order, uplo, N, A, lda);
|
952
|
-
}
|
953
|
-
|
954
|
-
template <bool is_complex>
|
955
|
-
inline void lauum(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, Complex128* A, const int lda) {
|
956
|
-
clapack_zlauum(order, uplo, N, A, lda);
|
957
|
-
}
|
958
|
-
#endif
|
959
|
-
|
960
|
-
|
961
|
-
/*
|
962
|
-
* Function signature conversion for calling LAPACK's lauum functions as directly as possible.
|
963
|
-
*
|
964
|
-
* For documentation: http://www.netlib.org/lapack/double/dlauum.f
|
965
|
-
*
|
966
|
-
* This function should normally go in math.cpp, but we need it to be available to nmatrix.cpp.
|
967
|
-
*/
|
968
|
-
template <bool is_complex, typename DType>
|
969
|
-
inline int clapack_lauum(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, void* a, const int lda) {
|
970
|
-
if (n < 0) rb_raise(rb_eArgError, "n cannot be less than zero, is set to %d", n);
|
971
|
-
if (lda < n || lda < 1) rb_raise(rb_eArgError, "lda must be >= max(n,1); lda=%d, n=%d\n", lda, n);
|
972
|
-
|
973
|
-
if (uplo == CblasUpper) lauum<is_complex, DType>(order, uplo, n, reinterpret_cast<DType*>(a), lda);
|
974
|
-
else lauum<is_complex, DType>(order, uplo, n, reinterpret_cast<DType*>(a), lda);
|
975
|
-
|
976
|
-
return 0;
|
977
|
-
}
|
978
|
-
|
979
|
-
|
980
|
-
|
981
|
-
|
982
717
|
/*
|
983
718
|
* Macro for declaring LAPACK specializations of the getrf function.
|
984
719
|
*
|
@@ -1003,67 +738,6 @@ LAPACK_GETRF(Complex64, clapack_cgetrf, void)
|
|
1003
738
|
LAPACK_GETRF(Complex128, clapack_zgetrf, void)
|
1004
739
|
*/
|
1005
740
|
|
1006
|
-
|
1007
|
-
|
1008
|
-
/*
|
1009
|
-
* Function signature conversion for calling LAPACK's potrf functions as directly as possible.
|
1010
|
-
*
|
1011
|
-
* For documentation: http://www.netlib.org/lapack/double/dpotrf.f
|
1012
|
-
*
|
1013
|
-
* This function should normally go in math.cpp, but we need it to be available to nmatrix.cpp.
|
1014
|
-
*/
|
1015
|
-
template <typename DType>
|
1016
|
-
inline int clapack_potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, void* a, const int lda) {
|
1017
|
-
return potrf<DType>(order, uplo, n, reinterpret_cast<DType*>(a), lda);
|
1018
|
-
}
|
1019
|
-
|
1020
|
-
|
1021
|
-
|
1022
|
-
template <typename DType>
|
1023
|
-
inline int potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, DType* a, const int lda) {
|
1024
|
-
rb_raise(rb_eNotImpError, "potri not yet implemented for non-BLAS dtypes");
|
1025
|
-
return 0;
|
1026
|
-
}
|
1027
|
-
|
1028
|
-
|
1029
|
-
#if defined HAVE_CLAPACK_H || defined HAVE_ATLAS_CLAPACK_H
|
1030
|
-
template <>
|
1031
|
-
inline int potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, float* a, const int lda) {
|
1032
|
-
return clapack_spotri(order, uplo, n, a, lda);
|
1033
|
-
}
|
1034
|
-
|
1035
|
-
template <>
|
1036
|
-
inline int potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, double* a, const int lda) {
|
1037
|
-
return clapack_dpotri(order, uplo, n, a, lda);
|
1038
|
-
}
|
1039
|
-
|
1040
|
-
template <>
|
1041
|
-
inline int potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, Complex64* a, const int lda) {
|
1042
|
-
return clapack_cpotri(order, uplo, n, reinterpret_cast<void*>(a), lda);
|
1043
|
-
}
|
1044
|
-
|
1045
|
-
template <>
|
1046
|
-
inline int potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, Complex128* a, const int lda) {
|
1047
|
-
return clapack_zpotri(order, uplo, n, reinterpret_cast<void*>(a), lda);
|
1048
|
-
}
|
1049
|
-
#endif
|
1050
|
-
|
1051
|
-
|
1052
|
-
/*
|
1053
|
-
* Function signature conversion for calling LAPACK's potri functions as directly as possible.
|
1054
|
-
*
|
1055
|
-
* For documentation: http://www.netlib.org/lapack/double/dpotri.f
|
1056
|
-
*
|
1057
|
-
* This function should normally go in math.cpp, but we need it to be available to nmatrix.cpp.
|
1058
|
-
*/
|
1059
|
-
template <typename DType>
|
1060
|
-
inline int clapack_potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, void* a, const int lda) {
|
1061
|
-
return potri<DType>(order, uplo, n, reinterpret_cast<DType*>(a), lda);
|
1062
|
-
}
|
1063
|
-
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
741
|
}} // end namespace nm::math
|
1068
742
|
|
1069
743
|
|