nmatrix-lapacke 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -72,6 +72,7 @@
72
72
 
73
73
  #include <algorithm> // std::min, std::max
74
74
  #include <limits> // std::numeric_limits
75
+ #include <memory> // std::unique_ptr
75
76
 
76
77
  /*
77
78
  * Project Includes
@@ -123,8 +124,8 @@ template <typename DType>
123
124
  inline void numbmm(const unsigned int n, const unsigned int m, const unsigned int l, const IType* ia, const IType* ja, const DType* a, const bool diaga,
124
125
  const IType* ib, const IType* jb, const DType* b, const bool diagb, IType* ic, IType* jc, DType* c, const bool diagc) {
125
126
  const unsigned int max_lmn = std::max(std::max(m, n), l);
126
- IType next[max_lmn];
127
- DType sums[max_lmn];
127
+ std::unique_ptr<IType[]> next(new IType[max_lmn]);
128
+ std::unique_ptr<DType[]> sums(new DType[max_lmn]);
128
129
 
129
130
  DType v;
130
131
 
@@ -88,183 +88,183 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
88
88
  b[i + j * ldb] = 0;
89
89
  }
90
90
  }
91
- return;
91
+ return;
92
92
  }
93
93
 
94
94
  if (side == CblasLeft) {
95
- if (trans_a == CblasNoTrans) {
95
+ if (trans_a == CblasNoTrans) {
96
96
 
97
97
  /* Form B := alpha*inv( A )*B. */
98
- if (uplo == CblasUpper) {
99
- for (int j = 0; j < n; ++j) {
100
- if (alpha != 1) {
101
- for (int i = 0; i < m; ++i) {
102
- b[i + j * ldb] = alpha * b[i + j * ldb];
103
- }
104
- }
105
- for (int k = m-1; k >= 0; --k) {
106
- if (b[k + j * ldb] != 0) {
107
- if (diag == CblasNonUnit) {
108
- b[k + j * ldb] /= a[k + k * lda];
109
- }
98
+ if (uplo == CblasUpper) {
99
+ for (int j = 0; j < n; ++j) {
100
+ if (alpha != 1) {
101
+ for (int i = 0; i < m; ++i) {
102
+ b[i + j * ldb] = alpha * b[i + j * ldb];
103
+ }
104
+ }
105
+ for (int k = m-1; k >= 0; --k) {
106
+ if (b[k + j * ldb] != 0) {
107
+ if (diag == CblasNonUnit) {
108
+ b[k + j * ldb] /= a[k + k * lda];
109
+ }
110
110
 
111
111
  for (int i = 0; i < k-1; ++i) {
112
112
  b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
113
113
  }
114
- }
115
- }
116
- }
117
- } else {
118
- for (int j = 0; j < n; ++j) {
119
- if (alpha != 1) {
114
+ }
115
+ }
116
+ }
117
+ } else {
118
+ for (int j = 0; j < n; ++j) {
119
+ if (alpha != 1) {
120
120
  for (int i = 0; i < m; ++i) {
121
121
  b[i + j * ldb] = alpha * b[i + j * ldb];
122
- }
123
- }
124
- for (int k = 0; k < m; ++k) {
125
- if (b[k + j * ldb] != 0.) {
126
- if (diag == CblasNonUnit) {
127
- b[k + j * ldb] /= a[k + k * lda];
128
- }
129
- for (int i = k+1; i < m; ++i) {
130
- b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
131
- }
132
- }
133
- }
134
- }
135
- }
136
- } else { // CblasTrans
122
+ }
123
+ }
124
+ for (int k = 0; k < m; ++k) {
125
+ if (b[k + j * ldb] != 0.) {
126
+ if (diag == CblasNonUnit) {
127
+ b[k + j * ldb] /= a[k + k * lda];
128
+ }
129
+ for (int i = k+1; i < m; ++i) {
130
+ b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
131
+ }
132
+ }
133
+ }
134
+ }
135
+ }
136
+ } else { // CblasTrans
137
137
 
138
138
  /* Form B := alpha*inv( A**T )*B. */
139
- if (uplo == CblasUpper) {
140
- for (int j = 0; j < n; ++j) {
141
- for (int i = 0; i < m; ++i) {
142
- DType temp = alpha * b[i + j * ldb];
139
+ if (uplo == CblasUpper) {
140
+ for (int j = 0; j < n; ++j) {
141
+ for (int i = 0; i < m; ++i) {
142
+ DType temp = alpha * b[i + j * ldb];
143
143
  for (int k = 0; k < i; ++k) { // limit was i-1. Lots of similar bugs in this code, probably.
144
144
  temp -= a[k + i * lda] * b[k + j * ldb];
145
- }
146
- if (diag == CblasNonUnit) {
147
- temp /= a[i + i * lda];
148
- }
149
- b[i + j * ldb] = temp;
150
- }
151
- }
152
- } else {
153
- for (int j = 0; j < n; ++j) {
154
- for (int i = m-1; i >= 0; --i) {
155
- DType temp= alpha * b[i + j * ldb];
156
- for (int k = i+1; k < m; ++k) {
157
- temp -= a[k + i * lda] * b[k + j * ldb];
158
- }
159
- if (diag == CblasNonUnit) {
160
- temp /= a[i + i * lda];
161
- }
162
- b[i + j * ldb] = temp;
163
- }
164
- }
165
- }
166
- }
145
+ }
146
+ if (diag == CblasNonUnit) {
147
+ temp /= a[i + i * lda];
148
+ }
149
+ b[i + j * ldb] = temp;
150
+ }
151
+ }
152
+ } else {
153
+ for (int j = 0; j < n; ++j) {
154
+ for (int i = m-1; i >= 0; --i) {
155
+ DType temp= alpha * b[i + j * ldb];
156
+ for (int k = i+1; k < m; ++k) {
157
+ temp -= a[k + i * lda] * b[k + j * ldb];
158
+ }
159
+ if (diag == CblasNonUnit) {
160
+ temp /= a[i + i * lda];
161
+ }
162
+ b[i + j * ldb] = temp;
163
+ }
164
+ }
165
+ }
166
+ }
167
167
  } else { // right side
168
168
 
169
- if (trans_a == CblasNoTrans) {
169
+ if (trans_a == CblasNoTrans) {
170
170
 
171
171
  /* Form B := alpha*B*inv( A ). */
172
172
 
173
- if (uplo == CblasUpper) {
174
- for (int j = 0; j < n; ++j) {
175
- if (alpha != 1) {
176
- for (int i = 0; i < m; ++i) {
177
- b[i + j * ldb] = alpha * b[i + j * ldb];
178
- }
179
- }
180
- for (int k = 0; k < j-1; ++k) {
181
- if (a[k + j * lda] != 0) {
182
- for (int i = 0; i < m; ++i) {
183
- b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
184
- }
185
- }
186
- }
187
- if (diag == CblasNonUnit) {
188
- DType temp = 1 / a[j + j * lda];
189
- for (int i = 0; i < m; ++i) {
190
- b[i + j * ldb] = temp * b[i + j * ldb];
191
- }
192
- }
193
- }
194
- } else {
195
- for (int j = n-1; j >= 0; --j) {
196
- if (alpha != 1) {
197
- for (int i = 0; i < m; ++i) {
198
- b[i + j * ldb] = alpha * b[i + j * ldb];
199
- }
200
- }
173
+ if (uplo == CblasUpper) {
174
+ for (int j = 0; j < n; ++j) {
175
+ if (alpha != 1) {
176
+ for (int i = 0; i < m; ++i) {
177
+ b[i + j * ldb] = alpha * b[i + j * ldb];
178
+ }
179
+ }
180
+ for (int k = 0; k < j-1; ++k) {
181
+ if (a[k + j * lda] != 0) {
182
+ for (int i = 0; i < m; ++i) {
183
+ b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
184
+ }
185
+ }
186
+ }
187
+ if (diag == CblasNonUnit) {
188
+ DType temp = 1 / a[j + j * lda];
189
+ for (int i = 0; i < m; ++i) {
190
+ b[i + j * ldb] = temp * b[i + j * ldb];
191
+ }
192
+ }
193
+ }
194
+ } else {
195
+ for (int j = n-1; j >= 0; --j) {
196
+ if (alpha != 1) {
197
+ for (int i = 0; i < m; ++i) {
198
+ b[i + j * ldb] = alpha * b[i + j * ldb];
199
+ }
200
+ }
201
201
 
202
- for (int k = j+1; k < n; ++k) {
203
- if (a[k + j * lda] != 0.) {
204
- for (int i = 0; i < m; ++i) {
205
- b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
206
- }
207
- }
208
- }
209
- if (diag == CblasNonUnit) {
210
- DType temp = 1 / a[j + j * lda];
202
+ for (int k = j+1; k < n; ++k) {
203
+ if (a[k + j * lda] != 0.) {
204
+ for (int i = 0; i < m; ++i) {
205
+ b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
206
+ }
207
+ }
208
+ }
209
+ if (diag == CblasNonUnit) {
210
+ DType temp = 1 / a[j + j * lda];
211
211
 
212
- for (int i = 0; i < m; ++i) {
213
- b[i + j * ldb] = temp * b[i + j * ldb];
214
- }
215
- }
216
- }
217
- }
218
- } else { // CblasTrans
212
+ for (int i = 0; i < m; ++i) {
213
+ b[i + j * ldb] = temp * b[i + j * ldb];
214
+ }
215
+ }
216
+ }
217
+ }
218
+ } else { // CblasTrans
219
219
 
220
220
  /* Form B := alpha*B*inv( A**T ). */
221
221
 
222
- if (uplo == CblasUpper) {
223
- for (int k = n-1; k >= 0; --k) {
224
- if (diag == CblasNonUnit) {
225
- DType temp= 1 / a[k + k * lda];
226
- for (int i = 0; i < m; ++i) {
227
- b[i + k * ldb] = temp * b[i + k * ldb];
228
- }
229
- }
230
- for (int j = 0; j < k-1; ++j) {
231
- if (a[j + k * lda] != 0.) {
232
- DType temp= a[j + k * lda];
233
- for (int i = 0; i < m; ++i) {
234
- b[i + j * ldb] -= temp * b[i + k * ldb];
235
- }
236
- }
237
- }
238
- if (alpha != 1) {
239
- for (int i = 0; i < m; ++i) {
240
- b[i + k * ldb] = alpha * b[i + k * ldb];
241
- }
242
- }
243
- }
244
- } else {
245
- for (int k = 0; k < n; ++k) {
246
- if (diag == CblasNonUnit) {
247
- DType temp = 1 / a[k + k * lda];
248
- for (int i = 0; i < m; ++i) {
249
- b[i + k * ldb] = temp * b[i + k * ldb];
250
- }
251
- }
252
- for (int j = k+1; j < n; ++j) {
253
- if (a[j + k * lda] != 0.) {
254
- DType temp = a[j + k * lda];
255
- for (int i = 0; i < m; ++i) {
256
- b[i + j * ldb] -= temp * b[i + k * ldb];
257
- }
258
- }
259
- }
260
- if (alpha != 1) {
261
- for (int i = 0; i < m; ++i) {
262
- b[i + k * ldb] = alpha * b[i + k * ldb];
263
- }
264
- }
265
- }
266
- }
267
- }
222
+ if (uplo == CblasUpper) {
223
+ for (int k = n-1; k >= 0; --k) {
224
+ if (diag == CblasNonUnit) {
225
+ DType temp= 1 / a[k + k * lda];
226
+ for (int i = 0; i < m; ++i) {
227
+ b[i + k * ldb] = temp * b[i + k * ldb];
228
+ }
229
+ }
230
+ for (int j = 0; j < k-1; ++j) {
231
+ if (a[j + k * lda] != 0.) {
232
+ DType temp= a[j + k * lda];
233
+ for (int i = 0; i < m; ++i) {
234
+ b[i + j * ldb] -= temp * b[i + k * ldb];
235
+ }
236
+ }
237
+ }
238
+ if (alpha != 1) {
239
+ for (int i = 0; i < m; ++i) {
240
+ b[i + k * ldb] = alpha * b[i + k * ldb];
241
+ }
242
+ }
243
+ }
244
+ } else {
245
+ for (int k = 0; k < n; ++k) {
246
+ if (diag == CblasNonUnit) {
247
+ DType temp = 1 / a[k + k * lda];
248
+ for (int i = 0; i < m; ++i) {
249
+ b[i + k * ldb] = temp * b[i + k * ldb];
250
+ }
251
+ }
252
+ for (int j = k+1; j < n; ++j) {
253
+ if (a[j + k * lda] != 0.) {
254
+ DType temp = a[j + k * lda];
255
+ for (int i = 0; i < m; ++i) {
256
+ b[i + j * ldb] -= temp * b[i + k * ldb];
257
+ }
258
+ }
259
+ }
260
+ if (alpha != 1) {
261
+ for (int i = 0; i < m; ++i) {
262
+ b[i + k * ldb] = alpha * b[i + k * ldb];
263
+ }
264
+ }
265
+ }
266
+ }
267
+ }
268
268
  }
269
269
  }
270
270
 
@@ -57,6 +57,33 @@
57
57
  #include "nm_memory.h"
58
58
  #endif
59
59
 
60
+ #ifndef FIX_CONST_VALUE_PTR
61
+ # if defined(__fcc__) || defined(__fcc_version) || \
62
+ defined(__FCC__) || defined(__FCC_VERSION)
63
+ /* workaround for old version of Fujitsu C Compiler (fcc) */
64
+ # define FIX_CONST_VALUE_PTR(x) ((const VALUE *)(x))
65
+ # else
66
+ # define FIX_CONST_VALUE_PTR(x) (x)
67
+ # endif
68
+ #endif
69
+
70
+ #ifndef HAVE_RB_ARRAY_CONST_PTR
71
+ static inline const VALUE *
72
+ rb_array_const_ptr(VALUE a)
73
+ {
74
+ return FIX_CONST_VALUE_PTR((RBASIC(a)->flags & RARRAY_EMBED_FLAG) ?
75
+ RARRAY(a)->as.ary : RARRAY(a)->as.heap.ptr);
76
+ }
77
+ #endif
78
+
79
+ #ifndef RARRAY_CONST_PTR
80
+ # define RARRAY_CONST_PTR(a) rb_array_const_ptr(a)
81
+ #endif
82
+
83
+ #ifndef RARRAY_AREF
84
+ # define RARRAY_AREF(a, i) (RARRAY_CONST_PTR(a)[i])
85
+ #endif
86
+
60
87
  /*
61
88
  * Macros
62
89
  */
@@ -323,7 +350,10 @@ NM_DEF_STRUCT_POST(NM_GC_HOLDER); // };
323
350
  #define NM_SHAPE1(val) (NM_STORAGE(val)->shape[1])
324
351
  #define NM_DEFAULT_VAL(val) (NM_STORAGE_LIST(val)->default_val)
325
352
 
353
+ // Number of elements in a dense nmatrix.
326
354
  #define NM_DENSE_COUNT(val) (nm_storage_count_max_elements(NM_STORAGE_DENSE(val)))
355
+
356
+ // Get a pointer to the array that stores elements in a dense matrix.
327
357
  #define NM_DENSE_ELEMENTS(val) (NM_STORAGE_DENSE(val)->elements)
328
358
  #define NM_SIZEOF_DTYPE(val) (DTYPE_SIZES[NM_DTYPE(val)])
329
359
  #define NM_REF(val,slice) (RefFuncs[NM_STYPE(val)]( NM_STORAGE(val), slice, NM_SIZEOF_DTYPE(val) ))
@@ -45,57 +45,57 @@ extern ID nm_rb_dtype,
45
45
  nm_rb_default,
46
46
 
47
47
  nm_rb_real,
48
- nm_rb_imag,
48
+ nm_rb_imag,
49
49
 
50
- nm_rb_numer,
51
- nm_rb_denom,
50
+ nm_rb_numer,
51
+ nm_rb_denom,
52
52
 
53
- nm_rb_complex_conjugate,
54
- nm_rb_transpose,
55
- nm_rb_no_transpose,
56
- nm_rb_left,
57
- nm_rb_right,
58
- nm_rb_upper,
59
- nm_rb_lower,
60
- nm_rb_unit,
61
- nm_rb_nonunit,
53
+ nm_rb_complex_conjugate,
54
+ nm_rb_transpose,
55
+ nm_rb_no_transpose,
56
+ nm_rb_left,
57
+ nm_rb_right,
58
+ nm_rb_upper,
59
+ nm_rb_lower,
60
+ nm_rb_unit,
61
+ nm_rb_nonunit,
62
62
 
63
- nm_rb_dense,
64
- nm_rb_list,
65
- nm_rb_yale,
63
+ nm_rb_dense,
64
+ nm_rb_list,
65
+ nm_rb_yale,
66
66
 
67
67
  nm_rb_row,
68
68
  nm_rb_column,
69
69
 
70
- nm_rb_add,
71
- nm_rb_sub,
72
- nm_rb_mul,
73
- nm_rb_div,
70
+ nm_rb_add,
71
+ nm_rb_sub,
72
+ nm_rb_mul,
73
+ nm_rb_div,
74
74
 
75
- nm_rb_negate,
75
+ nm_rb_negate,
76
76
 
77
- nm_rb_percent,
78
- nm_rb_gt,
79
- nm_rb_lt,
80
- nm_rb_eql,
81
- nm_rb_neql,
82
- nm_rb_gte,
83
- nm_rb_lte,
77
+ nm_rb_percent,
78
+ nm_rb_gt,
79
+ nm_rb_lt,
80
+ nm_rb_eql,
81
+ nm_rb_neql,
82
+ nm_rb_gte,
83
+ nm_rb_lte,
84
84
 
85
- nm_rb_hash;
85
+ nm_rb_hash;
86
86
 
87
- extern VALUE cNMatrix,
87
+ extern VALUE cNMatrix,
88
88
  cNMatrix_IO,
89
89
  cNMatrix_IO_Matlab,
90
- cNMatrix_YaleFunctions,
90
+ cNMatrix_YaleFunctions,
91
91
 
92
- cNMatrix_GC_holder,
92
+ cNMatrix_GC_holder,
93
93
 
94
- nm_eDataTypeError,
94
+ nm_eDataTypeError,
95
95
  nm_eConvergenceError,
96
- nm_eStorageTypeError,
97
- nm_eShapeError,
98
- nm_eNotInvertibleError;
96
+ nm_eStorageTypeError,
97
+ nm_eShapeError,
98
+ nm_eNotInvertibleError;
99
99
 
100
100
  /*
101
101
  * Functions