nmatrix 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/ext/nmatrix/data/data.cpp +9 -9
  3. data/ext/nmatrix/data/data.h +7 -8
  4. data/ext/nmatrix/data/ruby_object.h +1 -4
  5. data/ext/nmatrix/extconf.rb +9 -127
  6. data/ext/nmatrix/math.cpp +25 -25
  7. data/ext/nmatrix/math/asum.h +10 -31
  8. data/ext/nmatrix/math/cblas_templates_core.h +10 -10
  9. data/ext/nmatrix/math/getrf.h +2 -2
  10. data/ext/nmatrix/math/imax.h +12 -9
  11. data/ext/nmatrix/math/laswp.h +3 -3
  12. data/ext/nmatrix/math/long_dtype.h +16 -3
  13. data/ext/nmatrix/math/magnitude.h +54 -0
  14. data/ext/nmatrix/math/nrm2.h +19 -14
  15. data/ext/nmatrix/math/trsm.h +40 -36
  16. data/ext/nmatrix/math/util.h +14 -0
  17. data/ext/nmatrix/nmatrix.h +39 -1
  18. data/ext/nmatrix/ruby_nmatrix.c +45 -83
  19. data/ext/nmatrix/storage/common.h +9 -3
  20. data/ext/nmatrix/storage/dense/dense.cpp +4 -4
  21. data/ext/nmatrix/storage/list/list.cpp +2 -2
  22. data/ext/nmatrix/storage/yale/class.h +1 -1
  23. data/lib/nmatrix/blas.rb +103 -34
  24. data/lib/nmatrix/io/fortran_format.rb +8 -5
  25. data/lib/nmatrix/io/harwell_boeing.rb +11 -10
  26. data/lib/nmatrix/io/market.rb +9 -6
  27. data/lib/nmatrix/io/mat5_reader.rb +54 -29
  28. data/lib/nmatrix/io/mat_reader.rb +26 -14
  29. data/lib/nmatrix/io/point_cloud.rb +19 -11
  30. data/lib/nmatrix/math.rb +224 -5
  31. data/lib/nmatrix/mkmf.rb +103 -0
  32. data/lib/nmatrix/nmatrix.rb +20 -6
  33. data/lib/nmatrix/shortcuts.rb +415 -0
  34. data/lib/nmatrix/version.rb +1 -1
  35. data/spec/00_nmatrix_spec.rb +50 -1
  36. data/spec/02_slice_spec.rb +21 -21
  37. data/spec/blas_spec.rb +25 -3
  38. data/spec/math_spec.rb +233 -5
  39. data/spec/shortcuts_spec.rb +145 -5
  40. data/spec/spec_helper.rb +24 -1
  41. metadata +20 -4
@@ -9,8 +9,8 @@
9
9
  //
10
10
  // == Copyright Information
11
11
  //
12
- // SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
13
- // NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
12
+ // SciRuby is Copyright (c) 2010 - present, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012 - present, John Woods and the Ruby Science Foundation
14
14
  //
15
15
  // Please see LICENSE.txt for additional copyright notices.
16
16
  //
@@ -9,8 +9,8 @@
9
9
  //
10
10
  // == Copyright Information
11
11
  //
12
- // SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
13
- // NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
12
+ // SciRuby is Copyright (c) 2010 - present, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012 - present, John Woods and the Ruby Science Foundation
14
14
  //
15
15
  // Please see LICENSE.txt for additional copyright notices.
16
16
  //
@@ -29,8 +29,11 @@
29
29
  #ifndef IMAX_H
30
30
  #define IMAX_H
31
31
 
32
+ #include "math/magnitude.h"
33
+
32
34
  namespace nm { namespace math {
33
35
 
36
+
34
37
  template<typename DType>
35
38
  inline int imax(const int n, const DType *x, const int incx) {
36
39
 
@@ -41,28 +44,28 @@ inline int imax(const int n, const DType *x, const int incx) {
41
44
  return 0;
42
45
  }
43
46
 
44
- DType dmax;
47
+ typename MagnitudeDType<DType>::type dmax;
45
48
  int imax = 0;
46
49
 
47
50
  if (incx == 1) { // if incrementing by 1
48
51
 
49
- dmax = abs(x[0]);
52
+ dmax = magnitude(x[0]);
50
53
 
51
54
  for (int i = 1; i < n; ++i) {
52
- if (std::abs(x[i]) > dmax) {
55
+ if (magnitude(x[i]) > dmax) {
53
56
  imax = i;
54
- dmax = std::abs(x[i]);
57
+ dmax = magnitude(x[i]);
55
58
  }
56
59
  }
57
60
 
58
61
  } else { // if incrementing by more than 1
59
62
 
60
- dmax = std::abs(x[0]);
63
+ dmax = magnitude(x[0]);
61
64
 
62
65
  for (int i = 1, ix = incx; i < n; ++i, ix += incx) {
63
- if (std::abs(x[ix]) > dmax) {
66
+ if (magnitude(x[ix]) > dmax) {
64
67
  imax = i;
65
- dmax = std::abs(x[ix]);
68
+ dmax = magnitude(x[ix]);
66
69
  }
67
70
  }
68
71
  }
@@ -102,7 +102,7 @@ inline void laswp(const int N, DType* A, const int lda, const int K1, const int
102
102
  DType *a0 = &(A[i]),
103
103
  *a1 = &(A[ip]);
104
104
 
105
- for (register int h = 32; h; h--) {
105
+ for (int h = 32; h; h--) {
106
106
  DType r = *a0;
107
107
  *a0 = *a1;
108
108
  *a1 = r;
@@ -131,7 +131,7 @@ inline void laswp(const int N, DType* A, const int lda, const int K1, const int
131
131
  DType *a0 = &(A[i]),
132
132
  *a1 = &(A[ip]);
133
133
 
134
- for (register int h = mr; h; h--) {
134
+ for (int h = mr; h; h--) {
135
135
  DType r = *a0;
136
136
  *a0 = *a1;
137
137
  *a1 = r;
@@ -162,4 +162,4 @@ inline void clapack_laswp(const int n, void* a, const int lda, const int k1, con
162
162
  }
163
163
 
164
164
  } } // namespace nm::math
165
- #endif // LASWP_H
165
+ #endif // LASWP_H
@@ -9,8 +9,8 @@
9
9
  //
10
10
  // == Copyright Information
11
11
  //
12
- // SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
13
- // NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
12
+ // SciRuby is Copyright (c) 2010 - present, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012 - present, John Woods and the Ruby Science Foundation
14
14
  //
15
15
  // Please see LICENSE.txt for additional copyright notices.
16
16
  //
@@ -23,7 +23,8 @@
23
23
  //
24
24
  // == long_dtype.h
25
25
  //
26
- // Declarations necessary for the native versions of GEMM and GEMV.
26
+ // Declarations necessary for the native versions of GEMM and GEMV,
27
+ // as well as for IMAX.
27
28
  //
28
29
 
29
30
  #ifndef LONG_DTYPE_H
@@ -44,6 +45,18 @@ namespace nm { namespace math {
44
45
  template <> struct LongDType<Complex128> { typedef Complex128 type; };
45
46
  template <> struct LongDType<RubyObject> { typedef RubyObject type; };
46
47
 
48
+ template <typename DType> struct MagnitudeDType;
49
+ template <> struct MagnitudeDType<uint8_t> { typedef uint8_t type; };
50
+ template <> struct MagnitudeDType<int8_t> { typedef int8_t type; };
51
+ template <> struct MagnitudeDType<int16_t> { typedef int16_t type; };
52
+ template <> struct MagnitudeDType<int32_t> { typedef int32_t type; };
53
+ template <> struct MagnitudeDType<int64_t> { typedef int64_t type; };
54
+ template <> struct MagnitudeDType<float> { typedef float type; };
55
+ template <> struct MagnitudeDType<double> { typedef double type; };
56
+ template <> struct MagnitudeDType<Complex64> { typedef float type; };
57
+ template <> struct MagnitudeDType<Complex128> { typedef double type; };
58
+ template <> struct MagnitudeDType<RubyObject> { typedef RubyObject type; };
59
+
47
60
  }} // end of namespace nm::math
48
61
 
49
62
  #endif
@@ -0,0 +1,54 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - present, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012 - present, John Woods and the Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == math/magnitude.h
25
+ //
26
+ // Takes the absolute value (meaning magnitude) of each DType.
27
+ // Needed for a variety of BLAS/LAPACK functions.
28
+ //
29
+
30
+ #ifndef MAGNITUDE_H
31
+ #define MAGNITUDE_H
32
+
33
+ #include "math/long_dtype.h"
34
+
35
+ namespace nm { namespace math {
36
+
37
+ /* Magnitude -- may be complicated for unsigned types, and need to call the correct STL abs for floats/doubles */
38
+ template <typename DType, typename MDType = typename MagnitudeDType<DType>::type>
39
+ inline MDType magnitude(const DType& v) {
40
+ return v.abs();
41
+ }
42
+ template <> inline float magnitude(const float& v) { return std::abs(v); }
43
+ template <> inline double magnitude(const double& v) { return std::abs(v); }
44
+ template <> inline uint8_t magnitude(const uint8_t& v) { return v; }
45
+ template <> inline int8_t magnitude(const int8_t& v) { return std::abs(v); }
46
+ template <> inline int16_t magnitude(const int16_t& v) { return std::abs(v); }
47
+ template <> inline int32_t magnitude(const int32_t& v) { return std::abs(v); }
48
+ template <> inline int64_t magnitude(const int64_t& v) { return std::abs(v); }
49
+ template <> inline float magnitude(const nm::Complex64& v) { return std::sqrt(v.r * v.r + v.i * v.i); }
50
+ template <> inline double magnitude(const nm::Complex128& v) { return std::sqrt(v.r * v.r + v.i * v.i); }
51
+
52
+ }}
53
+
54
+ #endif // MAGNITUDE_H
@@ -9,8 +9,8 @@
9
9
  //
10
10
  // == Copyright Information
11
11
  //
12
- // SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
13
- // NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
12
+ // SciRuby is Copyright (c) 2010 - present, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012 - present, John Woods and the Ruby Science Foundation
14
14
  //
15
15
  // Please see LICENSE.txt for additional copyright notices.
16
16
  //
@@ -74,8 +74,8 @@ namespace nm { namespace math {
74
74
  * complex64 -> float or double
75
75
  * complex128 -> double
76
76
  */
77
- template <typename ReturnDType, typename DType>
78
- ReturnDType nrm2(const int N, const DType* X, const int incX) {
77
+ template <typename DType, typename MDType = typename MagnitudeDType<DType>::type>
78
+ MDType nrm2(const int N, const DType* X, const int incX) {
79
79
  const DType ONE = 1, ZERO = 0;
80
80
  typename LongDType<DType>::type scale = 0, ssq = 1, absxi, temp;
81
81
 
@@ -89,13 +89,14 @@ ReturnDType nrm2(const int N, const DType* X, const int incX) {
89
89
  temp = scale / absxi;
90
90
  scale = absxi;
91
91
  ssq = ONE + ssq * (temp * temp);
92
- } else {
92
+ }
93
+ else if(scale != 0) {
93
94
  temp = absxi / scale;
94
95
  ssq += temp * temp;
95
96
  }
96
97
  }
97
98
 
98
- return scale * std::sqrt( ssq );
99
+ return (MDType)(scale * std::sqrt( ssq ));
99
100
  }
100
101
 
101
102
 
@@ -106,7 +107,8 @@ static inline void nrm2_complex_helper(const FloatDType& xr, const FloatDType& x
106
107
  double temp = scale / absx;
107
108
  scale = absx;
108
109
  ssq = 1.0 + ssq * (temp * temp);
109
- } else {
110
+ }
111
+ else if(scale != 0) {
110
112
  double temp = absx / scale;
111
113
  ssq += temp * temp;
112
114
  }
@@ -116,7 +118,8 @@ static inline void nrm2_complex_helper(const FloatDType& xr, const FloatDType& x
116
118
  double temp = scale / absx;
117
119
  scale = absx;
118
120
  ssq = 1.0 + ssq * (temp * temp);
119
- } else {
121
+ }
122
+ else if(scale != 0) {
120
123
  double temp = absx / scale;
121
124
  ssq += temp * temp;
122
125
  }
@@ -124,33 +127,35 @@ static inline void nrm2_complex_helper(const FloatDType& xr, const FloatDType& x
124
127
 
125
128
  template <>
126
129
  float nrm2(const int N, const Complex64* X, const int incX) {
127
- double scale = 0, ssq = 1, temp;
130
+ double scale = 0, ssq = 1;
128
131
 
129
132
  if ((N < 1) || (incX < 1)) return 0.0;
130
133
 
131
134
  for (int i = 0; i < N; ++i) {
132
- nrm2_complex_helper<float>(X[i*incX].r, X[i*incX].i, scale, temp);
135
+ nrm2_complex_helper<float>(X[i*incX].r, X[i*incX].i, scale, ssq);
133
136
  }
134
137
 
135
138
  return scale * std::sqrt( ssq );
136
139
  }
137
140
 
141
+ // FIXME: Function above is duplicated here, should be writeable as a template using
142
+ // FIXME: xMagnitudeDType.
138
143
  template <>
139
144
  double nrm2(const int N, const Complex128* X, const int incX) {
140
- double scale = 0, ssq = 1, temp;
145
+ double scale = 0, ssq = 1;
141
146
 
142
147
  if ((N < 1) || (incX < 1)) return 0.0;
143
148
 
144
149
  for (int i = 0; i < N; ++i) {
145
- nrm2_complex_helper<double>(X[i*incX].r, X[i*incX].i, scale, temp);
150
+ nrm2_complex_helper<double>(X[i*incX].r, X[i*incX].i, scale, ssq);
146
151
  }
147
152
 
148
153
  return scale * std::sqrt( ssq );
149
154
  }
150
155
 
151
- template <typename ReturnDType, typename DType>
156
+ template <typename DType, typename MDType = typename MagnitudeDType<DType>::type>
152
157
  inline void cblas_nrm2(const int N, const void* X, const int incX, void* result) {
153
- *reinterpret_cast<ReturnDType*>( result ) = nrm2<ReturnDType, DType>( N, reinterpret_cast<const DType*>(X), incX );
158
+ *reinterpret_cast<MDType*>( result ) = nrm2<DType, MDType>( N, reinterpret_cast<const DType*>(X), incX );
154
159
  }
155
160
 
156
161
 
@@ -81,10 +81,14 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
81
81
  // (row-major) trsm: left upper trans nonunit m=3 n=1 1/1 a 3 b 3
82
82
 
83
83
  if (m == 0 || n == 0) return; /* Quick return if possible. */
84
+
85
+ // Apply necessary offset
86
+ a -= 1 + lda;
87
+ b -= 1 + ldb;
84
88
 
85
89
  if (alpha == 0) { // Handle alpha == 0
86
- for (int j = 0; j < n; ++j) {
87
- for (int i = 0; i < m; ++i) {
90
+ for (int j = 1; j <= n; ++j) {
91
+ for (int i = 1; i <= m; ++i) {
88
92
  b[i + j * ldb] = 0;
89
93
  }
90
94
  }
@@ -96,37 +100,37 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
96
100
 
97
101
  /* Form B := alpha*inv( A )*B. */
98
102
  if (uplo == CblasUpper) {
99
- for (int j = 0; j < n; ++j) {
103
+ for (int j = 1; j <= n; ++j) {
100
104
  if (alpha != 1) {
101
- for (int i = 0; i < m; ++i) {
105
+ for (int i = 1; i <= m; ++i) {
102
106
  b[i + j * ldb] = alpha * b[i + j * ldb];
103
107
  }
104
108
  }
105
- for (int k = m-1; k >= 0; --k) {
109
+ for (int k = m; k >= 1; --k) {
106
110
  if (b[k + j * ldb] != 0) {
107
111
  if (diag == CblasNonUnit) {
108
112
  b[k + j * ldb] /= a[k + k * lda];
109
113
  }
110
114
 
111
- for (int i = 0; i < k-1; ++i) {
115
+ for (int i = 1; i <= k-1; ++i) {
112
116
  b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
113
117
  }
114
118
  }
115
119
  }
116
120
  }
117
121
  } else {
118
- for (int j = 0; j < n; ++j) {
122
+ for (int j = 1; j <= n; ++j) {
119
123
  if (alpha != 1) {
120
- for (int i = 0; i < m; ++i) {
124
+ for (int i = 1; i <= m; ++i) {
121
125
  b[i + j * ldb] = alpha * b[i + j * ldb];
122
126
  }
123
127
  }
124
- for (int k = 0; k < m; ++k) {
128
+ for (int k = 1; k <= m; ++k) {
125
129
  if (b[k + j * ldb] != 0.) {
126
130
  if (diag == CblasNonUnit) {
127
131
  b[k + j * ldb] /= a[k + k * lda];
128
132
  }
129
- for (int i = k+1; i < m; ++i) {
133
+ for (int i = k+1; i <= m; ++i) {
130
134
  b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
131
135
  }
132
136
  }
@@ -137,10 +141,10 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
137
141
 
138
142
  /* Form B := alpha*inv( A**T )*B. */
139
143
  if (uplo == CblasUpper) {
140
- for (int j = 0; j < n; ++j) {
141
- for (int i = 0; i < m; ++i) {
144
+ for (int j = 1; j <= n; ++j) {
145
+ for (int i = 1; i <= m; ++i) {
142
146
  DType temp = alpha * b[i + j * ldb];
143
- for (int k = 0; k < i; ++k) { // limit was i-1. Lots of similar bugs in this code, probably.
147
+ for (int k = 1; k <= i-1; ++k) { // limit was i-1. Lots of similar bugs in this code, probably.
144
148
  temp -= a[k + i * lda] * b[k + j * ldb];
145
149
  }
146
150
  if (diag == CblasNonUnit) {
@@ -150,10 +154,10 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
150
154
  }
151
155
  }
152
156
  } else {
153
- for (int j = 0; j < n; ++j) {
154
- for (int i = m-1; i >= 0; --i) {
157
+ for (int j = 1; j <= n; ++j) {
158
+ for (int i = m; i >= 1; --i) {
155
159
  DType temp= alpha * b[i + j * ldb];
156
- for (int k = i+1; k < m; ++k) {
160
+ for (int k = i+1; k <= m; ++k) {
157
161
  temp -= a[k + i * lda] * b[k + j * ldb];
158
162
  }
159
163
  if (diag == CblasNonUnit) {
@@ -171,37 +175,37 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
171
175
  /* Form B := alpha*B*inv( A ). */
172
176
 
173
177
  if (uplo == CblasUpper) {
174
- for (int j = 0; j < n; ++j) {
178
+ for (int j = 1; j <= n; ++j) {
175
179
  if (alpha != 1) {
176
- for (int i = 0; i < m; ++i) {
180
+ for (int i = 1; i <= m; ++i) {
177
181
  b[i + j * ldb] = alpha * b[i + j * ldb];
178
182
  }
179
183
  }
180
- for (int k = 0; k < j-1; ++k) {
184
+ for (int k = 1; k <= j-1; ++k) {
181
185
  if (a[k + j * lda] != 0) {
182
- for (int i = 0; i < m; ++i) {
186
+ for (int i = 1; i <= m; ++i) {
183
187
  b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
184
188
  }
185
189
  }
186
190
  }
187
191
  if (diag == CblasNonUnit) {
188
192
  DType temp = 1 / a[j + j * lda];
189
- for (int i = 0; i < m; ++i) {
193
+ for (int i = 1; i <= m; ++i) {
190
194
  b[i + j * ldb] = temp * b[i + j * ldb];
191
195
  }
192
196
  }
193
197
  }
194
198
  } else {
195
- for (int j = n-1; j >= 0; --j) {
199
+ for (int j = n; j >= 1; --j) {
196
200
  if (alpha != 1) {
197
- for (int i = 0; i < m; ++i) {
201
+ for (int i = 1; i <= m; ++i) {
198
202
  b[i + j * ldb] = alpha * b[i + j * ldb];
199
203
  }
200
204
  }
201
205
 
202
- for (int k = j+1; k < n; ++k) {
206
+ for (int k = j+1; k <= n; ++k) {
203
207
  if (a[k + j * lda] != 0.) {
204
- for (int i = 0; i < m; ++i) {
208
+ for (int i = 1; i <= m; ++i) {
205
209
  b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
206
210
  }
207
211
  }
@@ -209,7 +213,7 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
209
213
  if (diag == CblasNonUnit) {
210
214
  DType temp = 1 / a[j + j * lda];
211
215
 
212
- for (int i = 0; i < m; ++i) {
216
+ for (int i = 1; i <= m; ++i) {
213
217
  b[i + j * ldb] = temp * b[i + j * ldb];
214
218
  }
215
219
  }
@@ -220,45 +224,45 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
220
224
  /* Form B := alpha*B*inv( A**T ). */
221
225
 
222
226
  if (uplo == CblasUpper) {
223
- for (int k = n-1; k >= 0; --k) {
227
+ for (int k = n; k >= 1; --k) {
224
228
  if (diag == CblasNonUnit) {
225
229
  DType temp= 1 / a[k + k * lda];
226
- for (int i = 0; i < m; ++i) {
230
+ for (int i = 1; i <= m; ++i) {
227
231
  b[i + k * ldb] = temp * b[i + k * ldb];
228
232
  }
229
233
  }
230
- for (int j = 0; j < k-1; ++j) {
234
+ for (int j = 1; j <= k-1; ++j) {
231
235
  if (a[j + k * lda] != 0.) {
232
236
  DType temp= a[j + k * lda];
233
- for (int i = 0; i < m; ++i) {
237
+ for (int i = 1; i <= m; ++i) {
234
238
  b[i + j * ldb] -= temp * b[i + k * ldb];
235
239
  }
236
240
  }
237
241
  }
238
242
  if (alpha != 1) {
239
- for (int i = 0; i < m; ++i) {
243
+ for (int i = 1; i <= m; ++i) {
240
244
  b[i + k * ldb] = alpha * b[i + k * ldb];
241
245
  }
242
246
  }
243
247
  }
244
248
  } else {
245
- for (int k = 0; k < n; ++k) {
249
+ for (int k = 1; k <= n; ++k) {
246
250
  if (diag == CblasNonUnit) {
247
251
  DType temp = 1 / a[k + k * lda];
248
- for (int i = 0; i < m; ++i) {
252
+ for (int i = 1; i <= m; ++i) {
249
253
  b[i + k * ldb] = temp * b[i + k * ldb];
250
254
  }
251
255
  }
252
- for (int j = k+1; j < n; ++j) {
256
+ for (int j = k+1; j <= n; ++j) {
253
257
  if (a[j + k * lda] != 0.) {
254
258
  DType temp = a[j + k * lda];
255
- for (int i = 0; i < m; ++i) {
259
+ for (int i = 1; i <= m; ++i) {
256
260
  b[i + j * ldb] -= temp * b[i + k * ldb];
257
261
  }
258
262
  }
259
263
  }
260
264
  if (alpha != 1) {
261
- for (int i = 0; i < m; ++i) {
265
+ for (int i = 1; i <= m; ++i) {
262
266
  b[i + k * ldb] = alpha * b[i + k * ldb];
263
267
  }
264
268
  }