nmatrix-lapacke 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -72,6 +72,7 @@
72
72
 
73
73
  #include <algorithm> // std::min, std::max
74
74
  #include <limits> // std::numeric_limits
75
+ #include <memory> // std::unique_ptr
75
76
 
76
77
  /*
77
78
  * Project Includes
@@ -123,8 +124,8 @@ template <typename DType>
123
124
  inline void numbmm(const unsigned int n, const unsigned int m, const unsigned int l, const IType* ia, const IType* ja, const DType* a, const bool diaga,
124
125
  const IType* ib, const IType* jb, const DType* b, const bool diagb, IType* ic, IType* jc, DType* c, const bool diagc) {
125
126
  const unsigned int max_lmn = std::max(std::max(m, n), l);
126
- IType next[max_lmn];
127
- DType sums[max_lmn];
127
+ std::unique_ptr<IType[]> next(new IType[max_lmn]);
128
+ std::unique_ptr<DType[]> sums(new DType[max_lmn]);
128
129
 
129
130
  DType v;
130
131
 
@@ -88,183 +88,183 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
88
88
  b[i + j * ldb] = 0;
89
89
  }
90
90
  }
91
- return;
91
+ return;
92
92
  }
93
93
 
94
94
  if (side == CblasLeft) {
95
- if (trans_a == CblasNoTrans) {
95
+ if (trans_a == CblasNoTrans) {
96
96
 
97
97
  /* Form B := alpha*inv( A )*B. */
98
- if (uplo == CblasUpper) {
99
- for (int j = 0; j < n; ++j) {
100
- if (alpha != 1) {
101
- for (int i = 0; i < m; ++i) {
102
- b[i + j * ldb] = alpha * b[i + j * ldb];
103
- }
104
- }
105
- for (int k = m-1; k >= 0; --k) {
106
- if (b[k + j * ldb] != 0) {
107
- if (diag == CblasNonUnit) {
108
- b[k + j * ldb] /= a[k + k * lda];
109
- }
98
+ if (uplo == CblasUpper) {
99
+ for (int j = 0; j < n; ++j) {
100
+ if (alpha != 1) {
101
+ for (int i = 0; i < m; ++i) {
102
+ b[i + j * ldb] = alpha * b[i + j * ldb];
103
+ }
104
+ }
105
+ for (int k = m-1; k >= 0; --k) {
106
+ if (b[k + j * ldb] != 0) {
107
+ if (diag == CblasNonUnit) {
108
+ b[k + j * ldb] /= a[k + k * lda];
109
+ }
110
110
 
111
111
  for (int i = 0; i < k-1; ++i) {
112
112
  b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
113
113
  }
114
- }
115
- }
116
- }
117
- } else {
118
- for (int j = 0; j < n; ++j) {
119
- if (alpha != 1) {
114
+ }
115
+ }
116
+ }
117
+ } else {
118
+ for (int j = 0; j < n; ++j) {
119
+ if (alpha != 1) {
120
120
  for (int i = 0; i < m; ++i) {
121
121
  b[i + j * ldb] = alpha * b[i + j * ldb];
122
- }
123
- }
124
- for (int k = 0; k < m; ++k) {
125
- if (b[k + j * ldb] != 0.) {
126
- if (diag == CblasNonUnit) {
127
- b[k + j * ldb] /= a[k + k * lda];
128
- }
129
- for (int i = k+1; i < m; ++i) {
130
- b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
131
- }
132
- }
133
- }
134
- }
135
- }
136
- } else { // CblasTrans
122
+ }
123
+ }
124
+ for (int k = 0; k < m; ++k) {
125
+ if (b[k + j * ldb] != 0.) {
126
+ if (diag == CblasNonUnit) {
127
+ b[k + j * ldb] /= a[k + k * lda];
128
+ }
129
+ for (int i = k+1; i < m; ++i) {
130
+ b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
131
+ }
132
+ }
133
+ }
134
+ }
135
+ }
136
+ } else { // CblasTrans
137
137
 
138
138
  /* Form B := alpha*inv( A**T )*B. */
139
- if (uplo == CblasUpper) {
140
- for (int j = 0; j < n; ++j) {
141
- for (int i = 0; i < m; ++i) {
142
- DType temp = alpha * b[i + j * ldb];
139
+ if (uplo == CblasUpper) {
140
+ for (int j = 0; j < n; ++j) {
141
+ for (int i = 0; i < m; ++i) {
142
+ DType temp = alpha * b[i + j * ldb];
143
143
  for (int k = 0; k < i; ++k) { // limit was i-1. Lots of similar bugs in this code, probably.
144
144
  temp -= a[k + i * lda] * b[k + j * ldb];
145
- }
146
- if (diag == CblasNonUnit) {
147
- temp /= a[i + i * lda];
148
- }
149
- b[i + j * ldb] = temp;
150
- }
151
- }
152
- } else {
153
- for (int j = 0; j < n; ++j) {
154
- for (int i = m-1; i >= 0; --i) {
155
- DType temp= alpha * b[i + j * ldb];
156
- for (int k = i+1; k < m; ++k) {
157
- temp -= a[k + i * lda] * b[k + j * ldb];
158
- }
159
- if (diag == CblasNonUnit) {
160
- temp /= a[i + i * lda];
161
- }
162
- b[i + j * ldb] = temp;
163
- }
164
- }
165
- }
166
- }
145
+ }
146
+ if (diag == CblasNonUnit) {
147
+ temp /= a[i + i * lda];
148
+ }
149
+ b[i + j * ldb] = temp;
150
+ }
151
+ }
152
+ } else {
153
+ for (int j = 0; j < n; ++j) {
154
+ for (int i = m-1; i >= 0; --i) {
155
+ DType temp= alpha * b[i + j * ldb];
156
+ for (int k = i+1; k < m; ++k) {
157
+ temp -= a[k + i * lda] * b[k + j * ldb];
158
+ }
159
+ if (diag == CblasNonUnit) {
160
+ temp /= a[i + i * lda];
161
+ }
162
+ b[i + j * ldb] = temp;
163
+ }
164
+ }
165
+ }
166
+ }
167
167
  } else { // right side
168
168
 
169
- if (trans_a == CblasNoTrans) {
169
+ if (trans_a == CblasNoTrans) {
170
170
 
171
171
  /* Form B := alpha*B*inv( A ). */
172
172
 
173
- if (uplo == CblasUpper) {
174
- for (int j = 0; j < n; ++j) {
175
- if (alpha != 1) {
176
- for (int i = 0; i < m; ++i) {
177
- b[i + j * ldb] = alpha * b[i + j * ldb];
178
- }
179
- }
180
- for (int k = 0; k < j-1; ++k) {
181
- if (a[k + j * lda] != 0) {
182
- for (int i = 0; i < m; ++i) {
183
- b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
184
- }
185
- }
186
- }
187
- if (diag == CblasNonUnit) {
188
- DType temp = 1 / a[j + j * lda];
189
- for (int i = 0; i < m; ++i) {
190
- b[i + j * ldb] = temp * b[i + j * ldb];
191
- }
192
- }
193
- }
194
- } else {
195
- for (int j = n-1; j >= 0; --j) {
196
- if (alpha != 1) {
197
- for (int i = 0; i < m; ++i) {
198
- b[i + j * ldb] = alpha * b[i + j * ldb];
199
- }
200
- }
173
+ if (uplo == CblasUpper) {
174
+ for (int j = 0; j < n; ++j) {
175
+ if (alpha != 1) {
176
+ for (int i = 0; i < m; ++i) {
177
+ b[i + j * ldb] = alpha * b[i + j * ldb];
178
+ }
179
+ }
180
+ for (int k = 0; k < j-1; ++k) {
181
+ if (a[k + j * lda] != 0) {
182
+ for (int i = 0; i < m; ++i) {
183
+ b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
184
+ }
185
+ }
186
+ }
187
+ if (diag == CblasNonUnit) {
188
+ DType temp = 1 / a[j + j * lda];
189
+ for (int i = 0; i < m; ++i) {
190
+ b[i + j * ldb] = temp * b[i + j * ldb];
191
+ }
192
+ }
193
+ }
194
+ } else {
195
+ for (int j = n-1; j >= 0; --j) {
196
+ if (alpha != 1) {
197
+ for (int i = 0; i < m; ++i) {
198
+ b[i + j * ldb] = alpha * b[i + j * ldb];
199
+ }
200
+ }
201
201
 
202
- for (int k = j+1; k < n; ++k) {
203
- if (a[k + j * lda] != 0.) {
204
- for (int i = 0; i < m; ++i) {
205
- b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
206
- }
207
- }
208
- }
209
- if (diag == CblasNonUnit) {
210
- DType temp = 1 / a[j + j * lda];
202
+ for (int k = j+1; k < n; ++k) {
203
+ if (a[k + j * lda] != 0.) {
204
+ for (int i = 0; i < m; ++i) {
205
+ b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
206
+ }
207
+ }
208
+ }
209
+ if (diag == CblasNonUnit) {
210
+ DType temp = 1 / a[j + j * lda];
211
211
 
212
- for (int i = 0; i < m; ++i) {
213
- b[i + j * ldb] = temp * b[i + j * ldb];
214
- }
215
- }
216
- }
217
- }
218
- } else { // CblasTrans
212
+ for (int i = 0; i < m; ++i) {
213
+ b[i + j * ldb] = temp * b[i + j * ldb];
214
+ }
215
+ }
216
+ }
217
+ }
218
+ } else { // CblasTrans
219
219
 
220
220
  /* Form B := alpha*B*inv( A**T ). */
221
221
 
222
- if (uplo == CblasUpper) {
223
- for (int k = n-1; k >= 0; --k) {
224
- if (diag == CblasNonUnit) {
225
- DType temp= 1 / a[k + k * lda];
226
- for (int i = 0; i < m; ++i) {
227
- b[i + k * ldb] = temp * b[i + k * ldb];
228
- }
229
- }
230
- for (int j = 0; j < k-1; ++j) {
231
- if (a[j + k * lda] != 0.) {
232
- DType temp= a[j + k * lda];
233
- for (int i = 0; i < m; ++i) {
234
- b[i + j * ldb] -= temp * b[i + k * ldb];
235
- }
236
- }
237
- }
238
- if (alpha != 1) {
239
- for (int i = 0; i < m; ++i) {
240
- b[i + k * ldb] = alpha * b[i + k * ldb];
241
- }
242
- }
243
- }
244
- } else {
245
- for (int k = 0; k < n; ++k) {
246
- if (diag == CblasNonUnit) {
247
- DType temp = 1 / a[k + k * lda];
248
- for (int i = 0; i < m; ++i) {
249
- b[i + k * ldb] = temp * b[i + k * ldb];
250
- }
251
- }
252
- for (int j = k+1; j < n; ++j) {
253
- if (a[j + k * lda] != 0.) {
254
- DType temp = a[j + k * lda];
255
- for (int i = 0; i < m; ++i) {
256
- b[i + j * ldb] -= temp * b[i + k * ldb];
257
- }
258
- }
259
- }
260
- if (alpha != 1) {
261
- for (int i = 0; i < m; ++i) {
262
- b[i + k * ldb] = alpha * b[i + k * ldb];
263
- }
264
- }
265
- }
266
- }
267
- }
222
+ if (uplo == CblasUpper) {
223
+ for (int k = n-1; k >= 0; --k) {
224
+ if (diag == CblasNonUnit) {
225
+ DType temp= 1 / a[k + k * lda];
226
+ for (int i = 0; i < m; ++i) {
227
+ b[i + k * ldb] = temp * b[i + k * ldb];
228
+ }
229
+ }
230
+ for (int j = 0; j < k-1; ++j) {
231
+ if (a[j + k * lda] != 0.) {
232
+ DType temp= a[j + k * lda];
233
+ for (int i = 0; i < m; ++i) {
234
+ b[i + j * ldb] -= temp * b[i + k * ldb];
235
+ }
236
+ }
237
+ }
238
+ if (alpha != 1) {
239
+ for (int i = 0; i < m; ++i) {
240
+ b[i + k * ldb] = alpha * b[i + k * ldb];
241
+ }
242
+ }
243
+ }
244
+ } else {
245
+ for (int k = 0; k < n; ++k) {
246
+ if (diag == CblasNonUnit) {
247
+ DType temp = 1 / a[k + k * lda];
248
+ for (int i = 0; i < m; ++i) {
249
+ b[i + k * ldb] = temp * b[i + k * ldb];
250
+ }
251
+ }
252
+ for (int j = k+1; j < n; ++j) {
253
+ if (a[j + k * lda] != 0.) {
254
+ DType temp = a[j + k * lda];
255
+ for (int i = 0; i < m; ++i) {
256
+ b[i + j * ldb] -= temp * b[i + k * ldb];
257
+ }
258
+ }
259
+ }
260
+ if (alpha != 1) {
261
+ for (int i = 0; i < m; ++i) {
262
+ b[i + k * ldb] = alpha * b[i + k * ldb];
263
+ }
264
+ }
265
+ }
266
+ }
267
+ }
268
268
  }
269
269
  }
270
270
 
@@ -57,6 +57,33 @@
57
57
  #include "nm_memory.h"
58
58
  #endif
59
59
 
60
+ #ifndef FIX_CONST_VALUE_PTR
61
+ # if defined(__fcc__) || defined(__fcc_version) || \
62
+ defined(__FCC__) || defined(__FCC_VERSION)
63
+ /* workaround for old version of Fujitsu C Compiler (fcc) */
64
+ # define FIX_CONST_VALUE_PTR(x) ((const VALUE *)(x))
65
+ # else
66
+ # define FIX_CONST_VALUE_PTR(x) (x)
67
+ # endif
68
+ #endif
69
+
70
+ #ifndef HAVE_RB_ARRAY_CONST_PTR
71
+ static inline const VALUE *
72
+ rb_array_const_ptr(VALUE a)
73
+ {
74
+ return FIX_CONST_VALUE_PTR((RBASIC(a)->flags & RARRAY_EMBED_FLAG) ?
75
+ RARRAY(a)->as.ary : RARRAY(a)->as.heap.ptr);
76
+ }
77
+ #endif
78
+
79
+ #ifndef RARRAY_CONST_PTR
80
+ # define RARRAY_CONST_PTR(a) rb_array_const_ptr(a)
81
+ #endif
82
+
83
+ #ifndef RARRAY_AREF
84
+ # define RARRAY_AREF(a, i) (RARRAY_CONST_PTR(a)[i])
85
+ #endif
86
+
60
87
  /*
61
88
  * Macros
62
89
  */
@@ -323,7 +350,10 @@ NM_DEF_STRUCT_POST(NM_GC_HOLDER); // };
323
350
  #define NM_SHAPE1(val) (NM_STORAGE(val)->shape[1])
324
351
  #define NM_DEFAULT_VAL(val) (NM_STORAGE_LIST(val)->default_val)
325
352
 
353
+ // Number of elements in a dense nmatrix.
326
354
  #define NM_DENSE_COUNT(val) (nm_storage_count_max_elements(NM_STORAGE_DENSE(val)))
355
+
356
+ // Get a pointer to the array that stores elements in a dense matrix.
327
357
  #define NM_DENSE_ELEMENTS(val) (NM_STORAGE_DENSE(val)->elements)
328
358
  #define NM_SIZEOF_DTYPE(val) (DTYPE_SIZES[NM_DTYPE(val)])
329
359
  #define NM_REF(val,slice) (RefFuncs[NM_STYPE(val)]( NM_STORAGE(val), slice, NM_SIZEOF_DTYPE(val) ))
@@ -45,57 +45,57 @@ extern ID nm_rb_dtype,
45
45
  nm_rb_default,
46
46
 
47
47
  nm_rb_real,
48
- nm_rb_imag,
48
+ nm_rb_imag,
49
49
 
50
- nm_rb_numer,
51
- nm_rb_denom,
50
+ nm_rb_numer,
51
+ nm_rb_denom,
52
52
 
53
- nm_rb_complex_conjugate,
54
- nm_rb_transpose,
55
- nm_rb_no_transpose,
56
- nm_rb_left,
57
- nm_rb_right,
58
- nm_rb_upper,
59
- nm_rb_lower,
60
- nm_rb_unit,
61
- nm_rb_nonunit,
53
+ nm_rb_complex_conjugate,
54
+ nm_rb_transpose,
55
+ nm_rb_no_transpose,
56
+ nm_rb_left,
57
+ nm_rb_right,
58
+ nm_rb_upper,
59
+ nm_rb_lower,
60
+ nm_rb_unit,
61
+ nm_rb_nonunit,
62
62
 
63
- nm_rb_dense,
64
- nm_rb_list,
65
- nm_rb_yale,
63
+ nm_rb_dense,
64
+ nm_rb_list,
65
+ nm_rb_yale,
66
66
 
67
67
  nm_rb_row,
68
68
  nm_rb_column,
69
69
 
70
- nm_rb_add,
71
- nm_rb_sub,
72
- nm_rb_mul,
73
- nm_rb_div,
70
+ nm_rb_add,
71
+ nm_rb_sub,
72
+ nm_rb_mul,
73
+ nm_rb_div,
74
74
 
75
- nm_rb_negate,
75
+ nm_rb_negate,
76
76
 
77
- nm_rb_percent,
78
- nm_rb_gt,
79
- nm_rb_lt,
80
- nm_rb_eql,
81
- nm_rb_neql,
82
- nm_rb_gte,
83
- nm_rb_lte,
77
+ nm_rb_percent,
78
+ nm_rb_gt,
79
+ nm_rb_lt,
80
+ nm_rb_eql,
81
+ nm_rb_neql,
82
+ nm_rb_gte,
83
+ nm_rb_lte,
84
84
 
85
- nm_rb_hash;
85
+ nm_rb_hash;
86
86
 
87
- extern VALUE cNMatrix,
87
+ extern VALUE cNMatrix,
88
88
  cNMatrix_IO,
89
89
  cNMatrix_IO_Matlab,
90
- cNMatrix_YaleFunctions,
90
+ cNMatrix_YaleFunctions,
91
91
 
92
- cNMatrix_GC_holder,
92
+ cNMatrix_GC_holder,
93
93
 
94
- nm_eDataTypeError,
94
+ nm_eDataTypeError,
95
95
  nm_eConvergenceError,
96
- nm_eStorageTypeError,
97
- nm_eShapeError,
98
- nm_eNotInvertibleError;
96
+ nm_eStorageTypeError,
97
+ nm_eShapeError,
98
+ nm_eNotInvertibleError;
99
99
 
100
100
  /*
101
101
  * Functions