nmatrix-lapacke 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nmatrix/data/complex.h +183 -159
- data/ext/nmatrix/data/data.h +306 -292
- data/ext/nmatrix/data/ruby_object.h +193 -193
- data/ext/nmatrix/math/math.h +3 -2
- data/ext/nmatrix/math/trsm.h +152 -152
- data/ext/nmatrix/nmatrix.h +30 -0
- data/ext/nmatrix/ruby_constants.h +35 -35
- data/ext/nmatrix/storage/common.h +4 -3
- data/ext/nmatrix/storage/dense/dense.h +8 -7
- data/ext/nmatrix/storage/list/list.h +7 -6
- data/ext/nmatrix/storage/storage.h +12 -11
- data/ext/nmatrix/storage/yale/class.h +2 -2
- data/ext/nmatrix/storage/yale/iterators/base.h +2 -1
- data/ext/nmatrix/storage/yale/iterators/iterator.h +2 -1
- data/ext/nmatrix/storage/yale/iterators/row.h +2 -1
- data/ext/nmatrix/storage/yale/iterators/row_stored.h +2 -1
- data/ext/nmatrix/storage/yale/iterators/row_stored_nd.h +1 -0
- data/ext/nmatrix/storage/yale/iterators/stored_diagonal.h +2 -1
- data/ext/nmatrix/storage/yale/yale.h +7 -6
- data/ext/nmatrix/types.h +3 -2
- data/ext/nmatrix/util/sl_list.h +19 -18
- data/ext/nmatrix_lapacke/extconf.rb +15 -9
- data/ext/nmatrix_lapacke/math_lapacke.cpp +6 -6
- data/lib/nmatrix/lapacke.rb +31 -9
- data/spec/00_nmatrix_spec.rb +6 -0
- data/spec/math_spec.rb +77 -0
- data/spec/spec_helper.rb +9 -0
- metadata +4 -4
data/ext/nmatrix/math/math.h
CHANGED
@@ -72,6 +72,7 @@
|
|
72
72
|
|
73
73
|
#include <algorithm> // std::min, std::max
|
74
74
|
#include <limits> // std::numeric_limits
|
75
|
+
#include <memory> // std::unique_ptr
|
75
76
|
|
76
77
|
/*
|
77
78
|
* Project Includes
|
@@ -123,8 +124,8 @@ template <typename DType>
|
|
123
124
|
inline void numbmm(const unsigned int n, const unsigned int m, const unsigned int l, const IType* ia, const IType* ja, const DType* a, const bool diaga,
|
124
125
|
const IType* ib, const IType* jb, const DType* b, const bool diagb, IType* ic, IType* jc, DType* c, const bool diagc) {
|
125
126
|
const unsigned int max_lmn = std::max(std::max(m, n), l);
|
126
|
-
IType next[max_lmn];
|
127
|
-
DType sums[max_lmn];
|
127
|
+
std::unique_ptr<IType[]> next(new IType[max_lmn]);
|
128
|
+
std::unique_ptr<DType[]> sums(new DType[max_lmn]);
|
128
129
|
|
129
130
|
DType v;
|
130
131
|
|
data/ext/nmatrix/math/trsm.h
CHANGED
@@ -88,183 +88,183 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
|
88
88
|
b[i + j * ldb] = 0;
|
89
89
|
}
|
90
90
|
}
|
91
|
-
|
91
|
+
return;
|
92
92
|
}
|
93
93
|
|
94
94
|
if (side == CblasLeft) {
|
95
|
-
|
95
|
+
if (trans_a == CblasNoTrans) {
|
96
96
|
|
97
97
|
/* Form B := alpha*inv( A )*B. */
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
98
|
+
if (uplo == CblasUpper) {
|
99
|
+
for (int j = 0; j < n; ++j) {
|
100
|
+
if (alpha != 1) {
|
101
|
+
for (int i = 0; i < m; ++i) {
|
102
|
+
b[i + j * ldb] = alpha * b[i + j * ldb];
|
103
|
+
}
|
104
|
+
}
|
105
|
+
for (int k = m-1; k >= 0; --k) {
|
106
|
+
if (b[k + j * ldb] != 0) {
|
107
|
+
if (diag == CblasNonUnit) {
|
108
|
+
b[k + j * ldb] /= a[k + k * lda];
|
109
|
+
}
|
110
110
|
|
111
111
|
for (int i = 0; i < k-1; ++i) {
|
112
112
|
b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
|
113
113
|
}
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
114
|
+
}
|
115
|
+
}
|
116
|
+
}
|
117
|
+
} else {
|
118
|
+
for (int j = 0; j < n; ++j) {
|
119
|
+
if (alpha != 1) {
|
120
120
|
for (int i = 0; i < m; ++i) {
|
121
121
|
b[i + j * ldb] = alpha * b[i + j * ldb];
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
122
|
+
}
|
123
|
+
}
|
124
|
+
for (int k = 0; k < m; ++k) {
|
125
|
+
if (b[k + j * ldb] != 0.) {
|
126
|
+
if (diag == CblasNonUnit) {
|
127
|
+
b[k + j * ldb] /= a[k + k * lda];
|
128
|
+
}
|
129
|
+
for (int i = k+1; i < m; ++i) {
|
130
|
+
b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
|
131
|
+
}
|
132
|
+
}
|
133
|
+
}
|
134
|
+
}
|
135
|
+
}
|
136
|
+
} else { // CblasTrans
|
137
137
|
|
138
138
|
/* Form B := alpha*inv( A**T )*B. */
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
139
|
+
if (uplo == CblasUpper) {
|
140
|
+
for (int j = 0; j < n; ++j) {
|
141
|
+
for (int i = 0; i < m; ++i) {
|
142
|
+
DType temp = alpha * b[i + j * ldb];
|
143
143
|
for (int k = 0; k < i; ++k) { // limit was i-1. Lots of similar bugs in this code, probably.
|
144
144
|
temp -= a[k + i * lda] * b[k + j * ldb];
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
145
|
+
}
|
146
|
+
if (diag == CblasNonUnit) {
|
147
|
+
temp /= a[i + i * lda];
|
148
|
+
}
|
149
|
+
b[i + j * ldb] = temp;
|
150
|
+
}
|
151
|
+
}
|
152
|
+
} else {
|
153
|
+
for (int j = 0; j < n; ++j) {
|
154
|
+
for (int i = m-1; i >= 0; --i) {
|
155
|
+
DType temp= alpha * b[i + j * ldb];
|
156
|
+
for (int k = i+1; k < m; ++k) {
|
157
|
+
temp -= a[k + i * lda] * b[k + j * ldb];
|
158
|
+
}
|
159
|
+
if (diag == CblasNonUnit) {
|
160
|
+
temp /= a[i + i * lda];
|
161
|
+
}
|
162
|
+
b[i + j * ldb] = temp;
|
163
|
+
}
|
164
|
+
}
|
165
|
+
}
|
166
|
+
}
|
167
167
|
} else { // right side
|
168
168
|
|
169
|
-
|
169
|
+
if (trans_a == CblasNoTrans) {
|
170
170
|
|
171
171
|
/* Form B := alpha*B*inv( A ). */
|
172
172
|
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
173
|
+
if (uplo == CblasUpper) {
|
174
|
+
for (int j = 0; j < n; ++j) {
|
175
|
+
if (alpha != 1) {
|
176
|
+
for (int i = 0; i < m; ++i) {
|
177
|
+
b[i + j * ldb] = alpha * b[i + j * ldb];
|
178
|
+
}
|
179
|
+
}
|
180
|
+
for (int k = 0; k < j-1; ++k) {
|
181
|
+
if (a[k + j * lda] != 0) {
|
182
|
+
for (int i = 0; i < m; ++i) {
|
183
|
+
b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
|
184
|
+
}
|
185
|
+
}
|
186
|
+
}
|
187
|
+
if (diag == CblasNonUnit) {
|
188
|
+
DType temp = 1 / a[j + j * lda];
|
189
|
+
for (int i = 0; i < m; ++i) {
|
190
|
+
b[i + j * ldb] = temp * b[i + j * ldb];
|
191
|
+
}
|
192
|
+
}
|
193
|
+
}
|
194
|
+
} else {
|
195
|
+
for (int j = n-1; j >= 0; --j) {
|
196
|
+
if (alpha != 1) {
|
197
|
+
for (int i = 0; i < m; ++i) {
|
198
|
+
b[i + j * ldb] = alpha * b[i + j * ldb];
|
199
|
+
}
|
200
|
+
}
|
201
201
|
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
202
|
+
for (int k = j+1; k < n; ++k) {
|
203
|
+
if (a[k + j * lda] != 0.) {
|
204
|
+
for (int i = 0; i < m; ++i) {
|
205
|
+
b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
|
206
|
+
}
|
207
|
+
}
|
208
|
+
}
|
209
|
+
if (diag == CblasNonUnit) {
|
210
|
+
DType temp = 1 / a[j + j * lda];
|
211
211
|
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
212
|
+
for (int i = 0; i < m; ++i) {
|
213
|
+
b[i + j * ldb] = temp * b[i + j * ldb];
|
214
|
+
}
|
215
|
+
}
|
216
|
+
}
|
217
|
+
}
|
218
|
+
} else { // CblasTrans
|
219
219
|
|
220
220
|
/* Form B := alpha*B*inv( A**T ). */
|
221
221
|
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
222
|
+
if (uplo == CblasUpper) {
|
223
|
+
for (int k = n-1; k >= 0; --k) {
|
224
|
+
if (diag == CblasNonUnit) {
|
225
|
+
DType temp= 1 / a[k + k * lda];
|
226
|
+
for (int i = 0; i < m; ++i) {
|
227
|
+
b[i + k * ldb] = temp * b[i + k * ldb];
|
228
|
+
}
|
229
|
+
}
|
230
|
+
for (int j = 0; j < k-1; ++j) {
|
231
|
+
if (a[j + k * lda] != 0.) {
|
232
|
+
DType temp= a[j + k * lda];
|
233
|
+
for (int i = 0; i < m; ++i) {
|
234
|
+
b[i + j * ldb] -= temp * b[i + k * ldb];
|
235
|
+
}
|
236
|
+
}
|
237
|
+
}
|
238
|
+
if (alpha != 1) {
|
239
|
+
for (int i = 0; i < m; ++i) {
|
240
|
+
b[i + k * ldb] = alpha * b[i + k * ldb];
|
241
|
+
}
|
242
|
+
}
|
243
|
+
}
|
244
|
+
} else {
|
245
|
+
for (int k = 0; k < n; ++k) {
|
246
|
+
if (diag == CblasNonUnit) {
|
247
|
+
DType temp = 1 / a[k + k * lda];
|
248
|
+
for (int i = 0; i < m; ++i) {
|
249
|
+
b[i + k * ldb] = temp * b[i + k * ldb];
|
250
|
+
}
|
251
|
+
}
|
252
|
+
for (int j = k+1; j < n; ++j) {
|
253
|
+
if (a[j + k * lda] != 0.) {
|
254
|
+
DType temp = a[j + k * lda];
|
255
|
+
for (int i = 0; i < m; ++i) {
|
256
|
+
b[i + j * ldb] -= temp * b[i + k * ldb];
|
257
|
+
}
|
258
|
+
}
|
259
|
+
}
|
260
|
+
if (alpha != 1) {
|
261
|
+
for (int i = 0; i < m; ++i) {
|
262
|
+
b[i + k * ldb] = alpha * b[i + k * ldb];
|
263
|
+
}
|
264
|
+
}
|
265
|
+
}
|
266
|
+
}
|
267
|
+
}
|
268
268
|
}
|
269
269
|
}
|
270
270
|
|
data/ext/nmatrix/nmatrix.h
CHANGED
@@ -57,6 +57,33 @@
|
|
57
57
|
#include "nm_memory.h"
|
58
58
|
#endif
|
59
59
|
|
60
|
+
#ifndef FIX_CONST_VALUE_PTR
|
61
|
+
# if defined(__fcc__) || defined(__fcc_version) || \
|
62
|
+
defined(__FCC__) || defined(__FCC_VERSION)
|
63
|
+
/* workaround for old version of Fujitsu C Compiler (fcc) */
|
64
|
+
# define FIX_CONST_VALUE_PTR(x) ((const VALUE *)(x))
|
65
|
+
# else
|
66
|
+
# define FIX_CONST_VALUE_PTR(x) (x)
|
67
|
+
# endif
|
68
|
+
#endif
|
69
|
+
|
70
|
+
#ifndef HAVE_RB_ARRAY_CONST_PTR
|
71
|
+
static inline const VALUE *
|
72
|
+
rb_array_const_ptr(VALUE a)
|
73
|
+
{
|
74
|
+
return FIX_CONST_VALUE_PTR((RBASIC(a)->flags & RARRAY_EMBED_FLAG) ?
|
75
|
+
RARRAY(a)->as.ary : RARRAY(a)->as.heap.ptr);
|
76
|
+
}
|
77
|
+
#endif
|
78
|
+
|
79
|
+
#ifndef RARRAY_CONST_PTR
|
80
|
+
# define RARRAY_CONST_PTR(a) rb_array_const_ptr(a)
|
81
|
+
#endif
|
82
|
+
|
83
|
+
#ifndef RARRAY_AREF
|
84
|
+
# define RARRAY_AREF(a, i) (RARRAY_CONST_PTR(a)[i])
|
85
|
+
#endif
|
86
|
+
|
60
87
|
/*
|
61
88
|
* Macros
|
62
89
|
*/
|
@@ -323,7 +350,10 @@ NM_DEF_STRUCT_POST(NM_GC_HOLDER); // };
|
|
323
350
|
#define NM_SHAPE1(val) (NM_STORAGE(val)->shape[1])
|
324
351
|
#define NM_DEFAULT_VAL(val) (NM_STORAGE_LIST(val)->default_val)
|
325
352
|
|
353
|
+
// Number of elements in a dense nmatrix.
|
326
354
|
#define NM_DENSE_COUNT(val) (nm_storage_count_max_elements(NM_STORAGE_DENSE(val)))
|
355
|
+
|
356
|
+
// Get a pointer to the array that stores elements in a dense matrix.
|
327
357
|
#define NM_DENSE_ELEMENTS(val) (NM_STORAGE_DENSE(val)->elements)
|
328
358
|
#define NM_SIZEOF_DTYPE(val) (DTYPE_SIZES[NM_DTYPE(val)])
|
329
359
|
#define NM_REF(val,slice) (RefFuncs[NM_STYPE(val)]( NM_STORAGE(val), slice, NM_SIZEOF_DTYPE(val) ))
|
@@ -45,57 +45,57 @@ extern ID nm_rb_dtype,
|
|
45
45
|
nm_rb_default,
|
46
46
|
|
47
47
|
nm_rb_real,
|
48
|
-
|
48
|
+
nm_rb_imag,
|
49
49
|
|
50
|
-
|
51
|
-
|
50
|
+
nm_rb_numer,
|
51
|
+
nm_rb_denom,
|
52
52
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
53
|
+
nm_rb_complex_conjugate,
|
54
|
+
nm_rb_transpose,
|
55
|
+
nm_rb_no_transpose,
|
56
|
+
nm_rb_left,
|
57
|
+
nm_rb_right,
|
58
|
+
nm_rb_upper,
|
59
|
+
nm_rb_lower,
|
60
|
+
nm_rb_unit,
|
61
|
+
nm_rb_nonunit,
|
62
62
|
|
63
|
-
|
64
|
-
|
65
|
-
|
63
|
+
nm_rb_dense,
|
64
|
+
nm_rb_list,
|
65
|
+
nm_rb_yale,
|
66
66
|
|
67
67
|
nm_rb_row,
|
68
68
|
nm_rb_column,
|
69
69
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
70
|
+
nm_rb_add,
|
71
|
+
nm_rb_sub,
|
72
|
+
nm_rb_mul,
|
73
|
+
nm_rb_div,
|
74
74
|
|
75
|
-
|
75
|
+
nm_rb_negate,
|
76
76
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
77
|
+
nm_rb_percent,
|
78
|
+
nm_rb_gt,
|
79
|
+
nm_rb_lt,
|
80
|
+
nm_rb_eql,
|
81
|
+
nm_rb_neql,
|
82
|
+
nm_rb_gte,
|
83
|
+
nm_rb_lte,
|
84
84
|
|
85
|
-
|
85
|
+
nm_rb_hash;
|
86
86
|
|
87
|
-
extern VALUE
|
87
|
+
extern VALUE cNMatrix,
|
88
88
|
cNMatrix_IO,
|
89
89
|
cNMatrix_IO_Matlab,
|
90
|
-
|
90
|
+
cNMatrix_YaleFunctions,
|
91
91
|
|
92
|
-
|
92
|
+
cNMatrix_GC_holder,
|
93
93
|
|
94
|
-
|
94
|
+
nm_eDataTypeError,
|
95
95
|
nm_eConvergenceError,
|
96
|
-
|
97
|
-
|
98
|
-
|
96
|
+
nm_eStorageTypeError,
|
97
|
+
nm_eShapeError,
|
98
|
+
nm_eNotInvertibleError;
|
99
99
|
|
100
100
|
/*
|
101
101
|
* Functions
|