nmatrix 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nmatrix/data/data.cpp +9 -9
- data/ext/nmatrix/data/data.h +7 -8
- data/ext/nmatrix/data/ruby_object.h +1 -4
- data/ext/nmatrix/extconf.rb +9 -127
- data/ext/nmatrix/math.cpp +25 -25
- data/ext/nmatrix/math/asum.h +10 -31
- data/ext/nmatrix/math/cblas_templates_core.h +10 -10
- data/ext/nmatrix/math/getrf.h +2 -2
- data/ext/nmatrix/math/imax.h +12 -9
- data/ext/nmatrix/math/laswp.h +3 -3
- data/ext/nmatrix/math/long_dtype.h +16 -3
- data/ext/nmatrix/math/magnitude.h +54 -0
- data/ext/nmatrix/math/nrm2.h +19 -14
- data/ext/nmatrix/math/trsm.h +40 -36
- data/ext/nmatrix/math/util.h +14 -0
- data/ext/nmatrix/nmatrix.h +39 -1
- data/ext/nmatrix/ruby_nmatrix.c +45 -83
- data/ext/nmatrix/storage/common.h +9 -3
- data/ext/nmatrix/storage/dense/dense.cpp +4 -4
- data/ext/nmatrix/storage/list/list.cpp +2 -2
- data/ext/nmatrix/storage/yale/class.h +1 -1
- data/lib/nmatrix/blas.rb +103 -34
- data/lib/nmatrix/io/fortran_format.rb +8 -5
- data/lib/nmatrix/io/harwell_boeing.rb +11 -10
- data/lib/nmatrix/io/market.rb +9 -6
- data/lib/nmatrix/io/mat5_reader.rb +54 -29
- data/lib/nmatrix/io/mat_reader.rb +26 -14
- data/lib/nmatrix/io/point_cloud.rb +19 -11
- data/lib/nmatrix/math.rb +224 -5
- data/lib/nmatrix/mkmf.rb +103 -0
- data/lib/nmatrix/nmatrix.rb +20 -6
- data/lib/nmatrix/shortcuts.rb +415 -0
- data/lib/nmatrix/version.rb +1 -1
- data/spec/00_nmatrix_spec.rb +50 -1
- data/spec/02_slice_spec.rb +21 -21
- data/spec/blas_spec.rb +25 -3
- data/spec/math_spec.rb +233 -5
- data/spec/shortcuts_spec.rb +145 -5
- data/spec/spec_helper.rb +24 -1
- metadata +20 -4
data/ext/nmatrix/math/getrf.h
CHANGED
@@ -9,8 +9,8 @@
|
|
9
9
|
//
|
10
10
|
// == Copyright Information
|
11
11
|
//
|
12
|
-
// SciRuby is Copyright (c) 2010 -
|
13
|
-
// NMatrix is Copyright (c) 2012 -
|
12
|
+
// SciRuby is Copyright (c) 2010 - present, Ruby Science Foundation
|
13
|
+
// NMatrix is Copyright (c) 2012 - present, John Woods and the Ruby Science Foundation
|
14
14
|
//
|
15
15
|
// Please see LICENSE.txt for additional copyright notices.
|
16
16
|
//
|
data/ext/nmatrix/math/imax.h
CHANGED
@@ -9,8 +9,8 @@
|
|
9
9
|
//
|
10
10
|
// == Copyright Information
|
11
11
|
//
|
12
|
-
// SciRuby is Copyright (c) 2010 -
|
13
|
-
// NMatrix is Copyright (c) 2012 -
|
12
|
+
// SciRuby is Copyright (c) 2010 - present, Ruby Science Foundation
|
13
|
+
// NMatrix is Copyright (c) 2012 - present, John Woods and the Ruby Science Foundation
|
14
14
|
//
|
15
15
|
// Please see LICENSE.txt for additional copyright notices.
|
16
16
|
//
|
@@ -29,8 +29,11 @@
|
|
29
29
|
#ifndef IMAX_H
|
30
30
|
#define IMAX_H
|
31
31
|
|
32
|
+
#include "math/magnitude.h"
|
33
|
+
|
32
34
|
namespace nm { namespace math {
|
33
35
|
|
36
|
+
|
34
37
|
template<typename DType>
|
35
38
|
inline int imax(const int n, const DType *x, const int incx) {
|
36
39
|
|
@@ -41,28 +44,28 @@ inline int imax(const int n, const DType *x, const int incx) {
|
|
41
44
|
return 0;
|
42
45
|
}
|
43
46
|
|
44
|
-
DType dmax;
|
47
|
+
typename MagnitudeDType<DType>::type dmax;
|
45
48
|
int imax = 0;
|
46
49
|
|
47
50
|
if (incx == 1) { // if incrementing by 1
|
48
51
|
|
49
|
-
dmax =
|
52
|
+
dmax = magnitude(x[0]);
|
50
53
|
|
51
54
|
for (int i = 1; i < n; ++i) {
|
52
|
-
if (
|
55
|
+
if (magnitude(x[i]) > dmax) {
|
53
56
|
imax = i;
|
54
|
-
dmax =
|
57
|
+
dmax = magnitude(x[i]);
|
55
58
|
}
|
56
59
|
}
|
57
60
|
|
58
61
|
} else { // if incrementing by more than 1
|
59
62
|
|
60
|
-
dmax =
|
63
|
+
dmax = magnitude(x[0]);
|
61
64
|
|
62
65
|
for (int i = 1, ix = incx; i < n; ++i, ix += incx) {
|
63
|
-
if (
|
66
|
+
if (magnitude(x[ix]) > dmax) {
|
64
67
|
imax = i;
|
65
|
-
dmax =
|
68
|
+
dmax = magnitude(x[ix]);
|
66
69
|
}
|
67
70
|
}
|
68
71
|
}
|
data/ext/nmatrix/math/laswp.h
CHANGED
@@ -102,7 +102,7 @@ inline void laswp(const int N, DType* A, const int lda, const int K1, const int
|
|
102
102
|
DType *a0 = &(A[i]),
|
103
103
|
*a1 = &(A[ip]);
|
104
104
|
|
105
|
-
for (
|
105
|
+
for (int h = 32; h; h--) {
|
106
106
|
DType r = *a0;
|
107
107
|
*a0 = *a1;
|
108
108
|
*a1 = r;
|
@@ -131,7 +131,7 @@ inline void laswp(const int N, DType* A, const int lda, const int K1, const int
|
|
131
131
|
DType *a0 = &(A[i]),
|
132
132
|
*a1 = &(A[ip]);
|
133
133
|
|
134
|
-
for (
|
134
|
+
for (int h = mr; h; h--) {
|
135
135
|
DType r = *a0;
|
136
136
|
*a0 = *a1;
|
137
137
|
*a1 = r;
|
@@ -162,4 +162,4 @@ inline void clapack_laswp(const int n, void* a, const int lda, const int k1, con
|
|
162
162
|
}
|
163
163
|
|
164
164
|
} } // namespace nm::math
|
165
|
-
#endif // LASWP_H
|
165
|
+
#endif // LASWP_H
|
@@ -9,8 +9,8 @@
|
|
9
9
|
//
|
10
10
|
// == Copyright Information
|
11
11
|
//
|
12
|
-
// SciRuby is Copyright (c) 2010 -
|
13
|
-
// NMatrix is Copyright (c) 2012 -
|
12
|
+
// SciRuby is Copyright (c) 2010 - present, Ruby Science Foundation
|
13
|
+
// NMatrix is Copyright (c) 2012 - present, John Woods and the Ruby Science Foundation
|
14
14
|
//
|
15
15
|
// Please see LICENSE.txt for additional copyright notices.
|
16
16
|
//
|
@@ -23,7 +23,8 @@
|
|
23
23
|
//
|
24
24
|
// == long_dtype.h
|
25
25
|
//
|
26
|
-
// Declarations necessary for the native versions of GEMM and GEMV
|
26
|
+
// Declarations necessary for the native versions of GEMM and GEMV,
|
27
|
+
// as well as for IMAX.
|
27
28
|
//
|
28
29
|
|
29
30
|
#ifndef LONG_DTYPE_H
|
@@ -44,6 +45,18 @@ namespace nm { namespace math {
|
|
44
45
|
template <> struct LongDType<Complex128> { typedef Complex128 type; };
|
45
46
|
template <> struct LongDType<RubyObject> { typedef RubyObject type; };
|
46
47
|
|
48
|
+
template <typename DType> struct MagnitudeDType;
|
49
|
+
template <> struct MagnitudeDType<uint8_t> { typedef uint8_t type; };
|
50
|
+
template <> struct MagnitudeDType<int8_t> { typedef int8_t type; };
|
51
|
+
template <> struct MagnitudeDType<int16_t> { typedef int16_t type; };
|
52
|
+
template <> struct MagnitudeDType<int32_t> { typedef int32_t type; };
|
53
|
+
template <> struct MagnitudeDType<int64_t> { typedef int64_t type; };
|
54
|
+
template <> struct MagnitudeDType<float> { typedef float type; };
|
55
|
+
template <> struct MagnitudeDType<double> { typedef double type; };
|
56
|
+
template <> struct MagnitudeDType<Complex64> { typedef float type; };
|
57
|
+
template <> struct MagnitudeDType<Complex128> { typedef double type; };
|
58
|
+
template <> struct MagnitudeDType<RubyObject> { typedef RubyObject type; };
|
59
|
+
|
47
60
|
}} // end of namespace nm::math
|
48
61
|
|
49
62
|
#endif
|
@@ -0,0 +1,54 @@
|
|
1
|
+
/////////////////////////////////////////////////////////////////////
|
2
|
+
// = NMatrix
|
3
|
+
//
|
4
|
+
// A linear algebra library for scientific computation in Ruby.
|
5
|
+
// NMatrix is part of SciRuby.
|
6
|
+
//
|
7
|
+
// NMatrix was originally inspired by and derived from NArray, by
|
8
|
+
// Masahiro Tanaka: http://narray.rubyforge.org
|
9
|
+
//
|
10
|
+
// == Copyright Information
|
11
|
+
//
|
12
|
+
// SciRuby is Copyright (c) 2010 - present, Ruby Science Foundation
|
13
|
+
// NMatrix is Copyright (c) 2012 - present, John Woods and the Ruby Science Foundation
|
14
|
+
//
|
15
|
+
// Please see LICENSE.txt for additional copyright notices.
|
16
|
+
//
|
17
|
+
// == Contributing
|
18
|
+
//
|
19
|
+
// By contributing source code to SciRuby, you agree to be bound by
|
20
|
+
// our Contributor Agreement:
|
21
|
+
//
|
22
|
+
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
|
23
|
+
//
|
24
|
+
// == math/magnitude.h
|
25
|
+
//
|
26
|
+
// Takes the absolute value (meaning magnitude) of each DType.
|
27
|
+
// Needed for a variety of BLAS/LAPACK functions.
|
28
|
+
//
|
29
|
+
|
30
|
+
#ifndef MAGNITUDE_H
|
31
|
+
#define MAGNITUDE_H
|
32
|
+
|
33
|
+
#include "math/long_dtype.h"
|
34
|
+
|
35
|
+
namespace nm { namespace math {
|
36
|
+
|
37
|
+
/* Magnitude -- may be complicated for unsigned types, and need to call the correct STL abs for floats/doubles */
|
38
|
+
template <typename DType, typename MDType = typename MagnitudeDType<DType>::type>
|
39
|
+
inline MDType magnitude(const DType& v) {
|
40
|
+
return v.abs();
|
41
|
+
}
|
42
|
+
template <> inline float magnitude(const float& v) { return std::abs(v); }
|
43
|
+
template <> inline double magnitude(const double& v) { return std::abs(v); }
|
44
|
+
template <> inline uint8_t magnitude(const uint8_t& v) { return v; }
|
45
|
+
template <> inline int8_t magnitude(const int8_t& v) { return std::abs(v); }
|
46
|
+
template <> inline int16_t magnitude(const int16_t& v) { return std::abs(v); }
|
47
|
+
template <> inline int32_t magnitude(const int32_t& v) { return std::abs(v); }
|
48
|
+
template <> inline int64_t magnitude(const int64_t& v) { return std::abs(v); }
|
49
|
+
template <> inline float magnitude(const nm::Complex64& v) { return std::sqrt(v.r * v.r + v.i * v.i); }
|
50
|
+
template <> inline double magnitude(const nm::Complex128& v) { return std::sqrt(v.r * v.r + v.i * v.i); }
|
51
|
+
|
52
|
+
}}
|
53
|
+
|
54
|
+
#endif // MAGNITUDE_H
|
data/ext/nmatrix/math/nrm2.h
CHANGED
@@ -9,8 +9,8 @@
|
|
9
9
|
//
|
10
10
|
// == Copyright Information
|
11
11
|
//
|
12
|
-
// SciRuby is Copyright (c) 2010 -
|
13
|
-
// NMatrix is Copyright (c) 2012 -
|
12
|
+
// SciRuby is Copyright (c) 2010 - present, Ruby Science Foundation
|
13
|
+
// NMatrix is Copyright (c) 2012 - present, John Woods and the Ruby Science Foundation
|
14
14
|
//
|
15
15
|
// Please see LICENSE.txt for additional copyright notices.
|
16
16
|
//
|
@@ -74,8 +74,8 @@ namespace nm { namespace math {
|
|
74
74
|
* complex64 -> float or double
|
75
75
|
* complex128 -> double
|
76
76
|
*/
|
77
|
-
template <typename
|
78
|
-
|
77
|
+
template <typename DType, typename MDType = typename MagnitudeDType<DType>::type>
|
78
|
+
MDType nrm2(const int N, const DType* X, const int incX) {
|
79
79
|
const DType ONE = 1, ZERO = 0;
|
80
80
|
typename LongDType<DType>::type scale = 0, ssq = 1, absxi, temp;
|
81
81
|
|
@@ -89,13 +89,14 @@ ReturnDType nrm2(const int N, const DType* X, const int incX) {
|
|
89
89
|
temp = scale / absxi;
|
90
90
|
scale = absxi;
|
91
91
|
ssq = ONE + ssq * (temp * temp);
|
92
|
-
}
|
92
|
+
}
|
93
|
+
else if(scale != 0) {
|
93
94
|
temp = absxi / scale;
|
94
95
|
ssq += temp * temp;
|
95
96
|
}
|
96
97
|
}
|
97
98
|
|
98
|
-
return scale * std::sqrt( ssq );
|
99
|
+
return (MDType)(scale * std::sqrt( ssq ));
|
99
100
|
}
|
100
101
|
|
101
102
|
|
@@ -106,7 +107,8 @@ static inline void nrm2_complex_helper(const FloatDType& xr, const FloatDType& x
|
|
106
107
|
double temp = scale / absx;
|
107
108
|
scale = absx;
|
108
109
|
ssq = 1.0 + ssq * (temp * temp);
|
109
|
-
}
|
110
|
+
}
|
111
|
+
else if(scale != 0) {
|
110
112
|
double temp = absx / scale;
|
111
113
|
ssq += temp * temp;
|
112
114
|
}
|
@@ -116,7 +118,8 @@ static inline void nrm2_complex_helper(const FloatDType& xr, const FloatDType& x
|
|
116
118
|
double temp = scale / absx;
|
117
119
|
scale = absx;
|
118
120
|
ssq = 1.0 + ssq * (temp * temp);
|
119
|
-
}
|
121
|
+
}
|
122
|
+
else if(scale != 0) {
|
120
123
|
double temp = absx / scale;
|
121
124
|
ssq += temp * temp;
|
122
125
|
}
|
@@ -124,33 +127,35 @@ static inline void nrm2_complex_helper(const FloatDType& xr, const FloatDType& x
|
|
124
127
|
|
125
128
|
template <>
|
126
129
|
float nrm2(const int N, const Complex64* X, const int incX) {
|
127
|
-
double scale = 0, ssq = 1
|
130
|
+
double scale = 0, ssq = 1;
|
128
131
|
|
129
132
|
if ((N < 1) || (incX < 1)) return 0.0;
|
130
133
|
|
131
134
|
for (int i = 0; i < N; ++i) {
|
132
|
-
nrm2_complex_helper<float>(X[i*incX].r, X[i*incX].i, scale,
|
135
|
+
nrm2_complex_helper<float>(X[i*incX].r, X[i*incX].i, scale, ssq);
|
133
136
|
}
|
134
137
|
|
135
138
|
return scale * std::sqrt( ssq );
|
136
139
|
}
|
137
140
|
|
141
|
+
// FIXME: Function above is duplicated here, should be writeable as a template using
|
142
|
+
// FIXME: xMagnitudeDType.
|
138
143
|
template <>
|
139
144
|
double nrm2(const int N, const Complex128* X, const int incX) {
|
140
|
-
double scale = 0, ssq = 1
|
145
|
+
double scale = 0, ssq = 1;
|
141
146
|
|
142
147
|
if ((N < 1) || (incX < 1)) return 0.0;
|
143
148
|
|
144
149
|
for (int i = 0; i < N; ++i) {
|
145
|
-
nrm2_complex_helper<double>(X[i*incX].r, X[i*incX].i, scale,
|
150
|
+
nrm2_complex_helper<double>(X[i*incX].r, X[i*incX].i, scale, ssq);
|
146
151
|
}
|
147
152
|
|
148
153
|
return scale * std::sqrt( ssq );
|
149
154
|
}
|
150
155
|
|
151
|
-
template <typename
|
156
|
+
template <typename DType, typename MDType = typename MagnitudeDType<DType>::type>
|
152
157
|
inline void cblas_nrm2(const int N, const void* X, const int incX, void* result) {
|
153
|
-
*reinterpret_cast<
|
158
|
+
*reinterpret_cast<MDType*>( result ) = nrm2<DType, MDType>( N, reinterpret_cast<const DType*>(X), incX );
|
154
159
|
}
|
155
160
|
|
156
161
|
|
data/ext/nmatrix/math/trsm.h
CHANGED
@@ -81,10 +81,14 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
|
81
81
|
// (row-major) trsm: left upper trans nonunit m=3 n=1 1/1 a 3 b 3
|
82
82
|
|
83
83
|
if (m == 0 || n == 0) return; /* Quick return if possible. */
|
84
|
+
|
85
|
+
// Apply necessary offset
|
86
|
+
a -= 1 + lda;
|
87
|
+
b -= 1 + ldb;
|
84
88
|
|
85
89
|
if (alpha == 0) { // Handle alpha == 0
|
86
|
-
for (int j =
|
87
|
-
for (int i =
|
90
|
+
for (int j = 1; j <= n; ++j) {
|
91
|
+
for (int i = 1; i <= m; ++i) {
|
88
92
|
b[i + j * ldb] = 0;
|
89
93
|
}
|
90
94
|
}
|
@@ -96,37 +100,37 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
|
96
100
|
|
97
101
|
/* Form B := alpha*inv( A )*B. */
|
98
102
|
if (uplo == CblasUpper) {
|
99
|
-
for (int j =
|
103
|
+
for (int j = 1; j <= n; ++j) {
|
100
104
|
if (alpha != 1) {
|
101
|
-
for (int i =
|
105
|
+
for (int i = 1; i <= m; ++i) {
|
102
106
|
b[i + j * ldb] = alpha * b[i + j * ldb];
|
103
107
|
}
|
104
108
|
}
|
105
|
-
for (int k = m
|
109
|
+
for (int k = m; k >= 1; --k) {
|
106
110
|
if (b[k + j * ldb] != 0) {
|
107
111
|
if (diag == CblasNonUnit) {
|
108
112
|
b[k + j * ldb] /= a[k + k * lda];
|
109
113
|
}
|
110
114
|
|
111
|
-
for (int i =
|
115
|
+
for (int i = 1; i <= k-1; ++i) {
|
112
116
|
b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
|
113
117
|
}
|
114
118
|
}
|
115
119
|
}
|
116
120
|
}
|
117
121
|
} else {
|
118
|
-
for (int j =
|
122
|
+
for (int j = 1; j <= n; ++j) {
|
119
123
|
if (alpha != 1) {
|
120
|
-
for (int i =
|
124
|
+
for (int i = 1; i <= m; ++i) {
|
121
125
|
b[i + j * ldb] = alpha * b[i + j * ldb];
|
122
126
|
}
|
123
127
|
}
|
124
|
-
for (int k =
|
128
|
+
for (int k = 1; k <= m; ++k) {
|
125
129
|
if (b[k + j * ldb] != 0.) {
|
126
130
|
if (diag == CblasNonUnit) {
|
127
131
|
b[k + j * ldb] /= a[k + k * lda];
|
128
132
|
}
|
129
|
-
for (int i = k+1; i
|
133
|
+
for (int i = k+1; i <= m; ++i) {
|
130
134
|
b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
|
131
135
|
}
|
132
136
|
}
|
@@ -137,10 +141,10 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
|
137
141
|
|
138
142
|
/* Form B := alpha*inv( A**T )*B. */
|
139
143
|
if (uplo == CblasUpper) {
|
140
|
-
for (int j =
|
141
|
-
for (int i =
|
144
|
+
for (int j = 1; j <= n; ++j) {
|
145
|
+
for (int i = 1; i <= m; ++i) {
|
142
146
|
DType temp = alpha * b[i + j * ldb];
|
143
|
-
for (int k =
|
147
|
+
for (int k = 1; k <= i-1; ++k) { // limit was i-1. Lots of similar bugs in this code, probably.
|
144
148
|
temp -= a[k + i * lda] * b[k + j * ldb];
|
145
149
|
}
|
146
150
|
if (diag == CblasNonUnit) {
|
@@ -150,10 +154,10 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
|
150
154
|
}
|
151
155
|
}
|
152
156
|
} else {
|
153
|
-
for (int j =
|
154
|
-
for (int i = m
|
157
|
+
for (int j = 1; j <= n; ++j) {
|
158
|
+
for (int i = m; i >= 1; --i) {
|
155
159
|
DType temp= alpha * b[i + j * ldb];
|
156
|
-
for (int k = i+1; k
|
160
|
+
for (int k = i+1; k <= m; ++k) {
|
157
161
|
temp -= a[k + i * lda] * b[k + j * ldb];
|
158
162
|
}
|
159
163
|
if (diag == CblasNonUnit) {
|
@@ -171,37 +175,37 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
|
171
175
|
/* Form B := alpha*B*inv( A ). */
|
172
176
|
|
173
177
|
if (uplo == CblasUpper) {
|
174
|
-
for (int j =
|
178
|
+
for (int j = 1; j <= n; ++j) {
|
175
179
|
if (alpha != 1) {
|
176
|
-
for (int i =
|
180
|
+
for (int i = 1; i <= m; ++i) {
|
177
181
|
b[i + j * ldb] = alpha * b[i + j * ldb];
|
178
182
|
}
|
179
183
|
}
|
180
|
-
for (int k =
|
184
|
+
for (int k = 1; k <= j-1; ++k) {
|
181
185
|
if (a[k + j * lda] != 0) {
|
182
|
-
for (int i =
|
186
|
+
for (int i = 1; i <= m; ++i) {
|
183
187
|
b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
|
184
188
|
}
|
185
189
|
}
|
186
190
|
}
|
187
191
|
if (diag == CblasNonUnit) {
|
188
192
|
DType temp = 1 / a[j + j * lda];
|
189
|
-
for (int i =
|
193
|
+
for (int i = 1; i <= m; ++i) {
|
190
194
|
b[i + j * ldb] = temp * b[i + j * ldb];
|
191
195
|
}
|
192
196
|
}
|
193
197
|
}
|
194
198
|
} else {
|
195
|
-
for (int j = n
|
199
|
+
for (int j = n; j >= 1; --j) {
|
196
200
|
if (alpha != 1) {
|
197
|
-
for (int i =
|
201
|
+
for (int i = 1; i <= m; ++i) {
|
198
202
|
b[i + j * ldb] = alpha * b[i + j * ldb];
|
199
203
|
}
|
200
204
|
}
|
201
205
|
|
202
|
-
for (int k = j+1; k
|
206
|
+
for (int k = j+1; k <= n; ++k) {
|
203
207
|
if (a[k + j * lda] != 0.) {
|
204
|
-
for (int i =
|
208
|
+
for (int i = 1; i <= m; ++i) {
|
205
209
|
b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
|
206
210
|
}
|
207
211
|
}
|
@@ -209,7 +213,7 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
|
209
213
|
if (diag == CblasNonUnit) {
|
210
214
|
DType temp = 1 / a[j + j * lda];
|
211
215
|
|
212
|
-
for (int i =
|
216
|
+
for (int i = 1; i <= m; ++i) {
|
213
217
|
b[i + j * ldb] = temp * b[i + j * ldb];
|
214
218
|
}
|
215
219
|
}
|
@@ -220,45 +224,45 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
|
220
224
|
/* Form B := alpha*B*inv( A**T ). */
|
221
225
|
|
222
226
|
if (uplo == CblasUpper) {
|
223
|
-
for (int k = n
|
227
|
+
for (int k = n; k >= 1; --k) {
|
224
228
|
if (diag == CblasNonUnit) {
|
225
229
|
DType temp= 1 / a[k + k * lda];
|
226
|
-
for (int i =
|
230
|
+
for (int i = 1; i <= m; ++i) {
|
227
231
|
b[i + k * ldb] = temp * b[i + k * ldb];
|
228
232
|
}
|
229
233
|
}
|
230
|
-
for (int j =
|
234
|
+
for (int j = 1; j <= k-1; ++j) {
|
231
235
|
if (a[j + k * lda] != 0.) {
|
232
236
|
DType temp= a[j + k * lda];
|
233
|
-
for (int i =
|
237
|
+
for (int i = 1; i <= m; ++i) {
|
234
238
|
b[i + j * ldb] -= temp * b[i + k * ldb];
|
235
239
|
}
|
236
240
|
}
|
237
241
|
}
|
238
242
|
if (alpha != 1) {
|
239
|
-
for (int i =
|
243
|
+
for (int i = 1; i <= m; ++i) {
|
240
244
|
b[i + k * ldb] = alpha * b[i + k * ldb];
|
241
245
|
}
|
242
246
|
}
|
243
247
|
}
|
244
248
|
} else {
|
245
|
-
for (int k =
|
249
|
+
for (int k = 1; k <= n; ++k) {
|
246
250
|
if (diag == CblasNonUnit) {
|
247
251
|
DType temp = 1 / a[k + k * lda];
|
248
|
-
for (int i =
|
252
|
+
for (int i = 1; i <= m; ++i) {
|
249
253
|
b[i + k * ldb] = temp * b[i + k * ldb];
|
250
254
|
}
|
251
255
|
}
|
252
|
-
for (int j = k+1; j
|
256
|
+
for (int j = k+1; j <= n; ++j) {
|
253
257
|
if (a[j + k * lda] != 0.) {
|
254
258
|
DType temp = a[j + k * lda];
|
255
|
-
for (int i =
|
259
|
+
for (int i = 1; i <= m; ++i) {
|
256
260
|
b[i + j * ldb] -= temp * b[i + k * ldb];
|
257
261
|
}
|
258
262
|
}
|
259
263
|
}
|
260
264
|
if (alpha != 1) {
|
261
|
-
for (int i =
|
265
|
+
for (int i = 1; i <= m; ++i) {
|
262
266
|
b[i + k * ldb] = alpha * b[i + k * ldb];
|
263
267
|
}
|
264
268
|
}
|