nmatrix 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/nmatrix/data/complex.h +183 -159
- data/ext/nmatrix/data/data.cpp +113 -112
- data/ext/nmatrix/data/data.h +306 -292
- data/ext/nmatrix/data/ruby_object.h +193 -193
- data/ext/nmatrix/extconf.rb +11 -9
- data/ext/nmatrix/math.cpp +9 -11
- data/ext/nmatrix/math/math.h +3 -2
- data/ext/nmatrix/math/trsm.h +152 -152
- data/ext/nmatrix/nmatrix.h +30 -0
- data/ext/nmatrix/ruby_constants.cpp +67 -67
- data/ext/nmatrix/ruby_constants.h +35 -35
- data/ext/nmatrix/ruby_nmatrix.c +168 -183
- data/ext/nmatrix/storage/common.h +4 -3
- data/ext/nmatrix/storage/dense/dense.cpp +50 -50
- data/ext/nmatrix/storage/dense/dense.h +8 -7
- data/ext/nmatrix/storage/list/list.cpp +16 -16
- data/ext/nmatrix/storage/list/list.h +7 -6
- data/ext/nmatrix/storage/storage.cpp +32 -32
- data/ext/nmatrix/storage/storage.h +12 -11
- data/ext/nmatrix/storage/yale/class.h +2 -2
- data/ext/nmatrix/storage/yale/iterators/base.h +2 -1
- data/ext/nmatrix/storage/yale/iterators/iterator.h +2 -1
- data/ext/nmatrix/storage/yale/iterators/row.h +2 -1
- data/ext/nmatrix/storage/yale/iterators/row_stored.h +2 -1
- data/ext/nmatrix/storage/yale/iterators/row_stored_nd.h +1 -0
- data/ext/nmatrix/storage/yale/iterators/stored_diagonal.h +2 -1
- data/ext/nmatrix/storage/yale/yale.cpp +27 -27
- data/ext/nmatrix/storage/yale/yale.h +7 -6
- data/ext/nmatrix/ttable_helper.rb +10 -10
- data/ext/nmatrix/types.h +3 -2
- data/ext/nmatrix/util/io.cpp +7 -7
- data/ext/nmatrix/util/sl_list.cpp +26 -26
- data/ext/nmatrix/util/sl_list.h +19 -18
- data/lib/nmatrix/blas.rb +7 -7
- data/lib/nmatrix/io/mat5_reader.rb +30 -30
- data/lib/nmatrix/math.rb +73 -17
- data/lib/nmatrix/nmatrix.rb +10 -8
- data/lib/nmatrix/shortcuts.rb +3 -3
- data/lib/nmatrix/version.rb +3 -3
- data/spec/00_nmatrix_spec.rb +6 -0
- data/spec/math_spec.rb +77 -0
- data/spec/spec_helper.rb +9 -0
- metadata +2 -2
data/ext/nmatrix/extconf.rb
CHANGED
@@ -105,9 +105,6 @@ basenames = %w{nmatrix ruby_constants data/data util/io math util/sl_list storag
|
|
105
105
|
$objs = basenames.map { |b| "#{b}.o" }
|
106
106
|
$srcs = basenames.map { |b| "#{b}.cpp" }
|
107
107
|
|
108
|
-
#CONFIG['CXX'] = 'clang++'
|
109
|
-
CONFIG['CXX'] = 'g++'
|
110
|
-
|
111
108
|
def find_newer_gplusplus #:nodoc:
|
112
109
|
print "checking for apparent GNU g++ binary with C++0x/C++11 support... "
|
113
110
|
[9,8,7,6,5,4,3].each do |minor|
|
@@ -135,7 +132,7 @@ end
|
|
135
132
|
|
136
133
|
|
137
134
|
if CONFIG['CXX'] == 'clang++'
|
138
|
-
$
|
135
|
+
$CXX_STANDARD = 'c++11'
|
139
136
|
|
140
137
|
else
|
141
138
|
version = gplusplus_version
|
@@ -147,11 +144,11 @@ else
|
|
147
144
|
end
|
148
145
|
|
149
146
|
if version < '4.7.0'
|
150
|
-
$
|
147
|
+
$CXX_STANDARD = 'c++0x'
|
151
148
|
else
|
152
|
-
$
|
149
|
+
$CXX_STANDARD = 'c++11'
|
153
150
|
end
|
154
|
-
puts "using C++ standard... #{$
|
151
|
+
puts "using C++ standard... #{$CXX_STANDARD}"
|
155
152
|
puts "g++ reports version... " + `#{CONFIG['CXX']} --version|head -n 1|cut -f 3 -d " "`
|
156
153
|
end
|
157
154
|
|
@@ -160,13 +157,18 @@ end
|
|
160
157
|
# For release, these next two should both be changed to -O3.
|
161
158
|
$CFLAGS += " -O3 "
|
162
159
|
#$CFLAGS += " -static -O0 -g "
|
163
|
-
$
|
164
|
-
#$
|
160
|
+
$CXXFLAGS += " -O3 -std=#{$CXX_STANDARD} " #-fmax-errors=10 -save-temps
|
161
|
+
#$CXXFLAGS += " -static -O0 -g -std=#{$CXX_STANDARD} "
|
165
162
|
|
166
163
|
CONFIG['warnflags'].gsub!('-Wshorten-64-to-32', '') # doesn't work except in Mac-patched gcc (4.2)
|
167
164
|
CONFIG['warnflags'].gsub!('-Wdeclaration-after-statement', '')
|
168
165
|
CONFIG['warnflags'].gsub!('-Wimplicit-function-declaration', '')
|
169
166
|
|
167
|
+
have_func("rb_array_const_ptr", "ruby.h")
|
168
|
+
have_macro("FIX_CONST_VALUE_PTR", "ruby.h")
|
169
|
+
have_macro("RARRAY_CONST_PTR", "ruby.h")
|
170
|
+
have_macro("RARRAY_AREF", "ruby.h")
|
171
|
+
|
170
172
|
create_conf_h("nmatrix_config.h")
|
171
173
|
create_makefile("nmatrix")
|
172
174
|
|
data/ext/nmatrix/math.cpp
CHANGED
@@ -126,6 +126,7 @@
|
|
126
126
|
*/
|
127
127
|
|
128
128
|
|
129
|
+
#include <ruby.h>
|
129
130
|
#include <algorithm>
|
130
131
|
#include <limits>
|
131
132
|
#include <cmath>
|
@@ -232,8 +233,8 @@ namespace nm {
|
|
232
233
|
template <typename DType>
|
233
234
|
void inverse(const int M, void* a_elements) {
|
234
235
|
DType* matrix = reinterpret_cast<DType*>(a_elements);
|
235
|
-
int
|
236
|
-
int
|
236
|
+
int row_index[M]; // arrays for keeping track of column scrambling
|
237
|
+
int col_index[M];
|
237
238
|
|
238
239
|
for (int k = 0;k < M; ++k) {
|
239
240
|
DType akk = std::abs( matrix[k * (M + 1)] ) ; // diagonal element
|
@@ -294,9 +295,6 @@ namespace nm {
|
|
294
295
|
}
|
295
296
|
}
|
296
297
|
}
|
297
|
-
|
298
|
-
delete[] row_index;
|
299
|
-
delete[] col_index;
|
300
298
|
}
|
301
299
|
|
302
300
|
/*
|
@@ -599,8 +597,8 @@ static VALUE nm_cblas_rotg(VALUE self, VALUE ab) {
|
|
599
597
|
rb_ary_store(result, 0, *reinterpret_cast<VALUE*>(pC));
|
600
598
|
rb_ary_store(result, 1, *reinterpret_cast<VALUE*>(pS));
|
601
599
|
} else {
|
602
|
-
rb_ary_store(result, 0, rubyobj_from_cval(pC, dtype).rval);
|
603
|
-
rb_ary_store(result, 1, rubyobj_from_cval(pS, dtype).rval);
|
600
|
+
rb_ary_store(result, 0, nm::rubyobj_from_cval(pC, dtype).rval);
|
601
|
+
rb_ary_store(result, 1, nm::rubyobj_from_cval(pS, dtype).rval);
|
604
602
|
}
|
605
603
|
NM_CONSERVATIVE(nm_unregister_value(&ab));
|
606
604
|
NM_CONSERVATIVE(nm_unregister_value(&self));
|
@@ -724,7 +722,7 @@ static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx) {
|
|
724
722
|
|
725
723
|
ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
|
726
724
|
|
727
|
-
return rubyobj_from_cval(Result, rdtype).rval;
|
725
|
+
return nm::rubyobj_from_cval(Result, rdtype).rval;
|
728
726
|
}
|
729
727
|
}
|
730
728
|
|
@@ -773,7 +771,7 @@ static VALUE nm_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx) {
|
|
773
771
|
|
774
772
|
ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
|
775
773
|
|
776
|
-
return rubyobj_from_cval(Result, rdtype).rval;
|
774
|
+
return nm::rubyobj_from_cval(Result, rdtype).rval;
|
777
775
|
}
|
778
776
|
|
779
777
|
/*
|
@@ -1005,7 +1003,7 @@ static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VAL
|
|
1005
1003
|
} else {
|
1006
1004
|
ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
|
1007
1005
|
for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
|
1008
|
-
ipiv_[index] = FIX2INT(
|
1006
|
+
ipiv_[index] = FIX2INT( RARRAY_AREF(ipiv, index) );
|
1009
1007
|
}
|
1010
1008
|
}
|
1011
1009
|
|
@@ -1057,7 +1055,7 @@ static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1,
|
|
1057
1055
|
} else {
|
1058
1056
|
ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
|
1059
1057
|
for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
|
1060
|
-
ipiv_[index] = FIX2INT(
|
1058
|
+
ipiv_[index] = FIX2INT( RARRAY_AREF(ipiv, index) );
|
1061
1059
|
}
|
1062
1060
|
}
|
1063
1061
|
|
data/ext/nmatrix/math/math.h
CHANGED
@@ -72,6 +72,7 @@
|
|
72
72
|
|
73
73
|
#include <algorithm> // std::min, std::max
|
74
74
|
#include <limits> // std::numeric_limits
|
75
|
+
#include <memory> // std::unique_ptr
|
75
76
|
|
76
77
|
/*
|
77
78
|
* Project Includes
|
@@ -123,8 +124,8 @@ template <typename DType>
|
|
123
124
|
inline void numbmm(const unsigned int n, const unsigned int m, const unsigned int l, const IType* ia, const IType* ja, const DType* a, const bool diaga,
|
124
125
|
const IType* ib, const IType* jb, const DType* b, const bool diagb, IType* ic, IType* jc, DType* c, const bool diagc) {
|
125
126
|
const unsigned int max_lmn = std::max(std::max(m, n), l);
|
126
|
-
IType next[max_lmn];
|
127
|
-
DType sums[max_lmn];
|
127
|
+
std::unique_ptr<IType[]> next(new IType[max_lmn]);
|
128
|
+
std::unique_ptr<DType[]> sums(new DType[max_lmn]);
|
128
129
|
|
129
130
|
DType v;
|
130
131
|
|
data/ext/nmatrix/math/trsm.h
CHANGED
@@ -88,183 +88,183 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
|
88
88
|
b[i + j * ldb] = 0;
|
89
89
|
}
|
90
90
|
}
|
91
|
-
|
91
|
+
return;
|
92
92
|
}
|
93
93
|
|
94
94
|
if (side == CblasLeft) {
|
95
|
-
|
95
|
+
if (trans_a == CblasNoTrans) {
|
96
96
|
|
97
97
|
/* Form B := alpha*inv( A )*B. */
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
98
|
+
if (uplo == CblasUpper) {
|
99
|
+
for (int j = 0; j < n; ++j) {
|
100
|
+
if (alpha != 1) {
|
101
|
+
for (int i = 0; i < m; ++i) {
|
102
|
+
b[i + j * ldb] = alpha * b[i + j * ldb];
|
103
|
+
}
|
104
|
+
}
|
105
|
+
for (int k = m-1; k >= 0; --k) {
|
106
|
+
if (b[k + j * ldb] != 0) {
|
107
|
+
if (diag == CblasNonUnit) {
|
108
|
+
b[k + j * ldb] /= a[k + k * lda];
|
109
|
+
}
|
110
110
|
|
111
111
|
for (int i = 0; i < k-1; ++i) {
|
112
112
|
b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
|
113
113
|
}
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
114
|
+
}
|
115
|
+
}
|
116
|
+
}
|
117
|
+
} else {
|
118
|
+
for (int j = 0; j < n; ++j) {
|
119
|
+
if (alpha != 1) {
|
120
120
|
for (int i = 0; i < m; ++i) {
|
121
121
|
b[i + j * ldb] = alpha * b[i + j * ldb];
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
122
|
+
}
|
123
|
+
}
|
124
|
+
for (int k = 0; k < m; ++k) {
|
125
|
+
if (b[k + j * ldb] != 0.) {
|
126
|
+
if (diag == CblasNonUnit) {
|
127
|
+
b[k + j * ldb] /= a[k + k * lda];
|
128
|
+
}
|
129
|
+
for (int i = k+1; i < m; ++i) {
|
130
|
+
b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
|
131
|
+
}
|
132
|
+
}
|
133
|
+
}
|
134
|
+
}
|
135
|
+
}
|
136
|
+
} else { // CblasTrans
|
137
137
|
|
138
138
|
/* Form B := alpha*inv( A**T )*B. */
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
139
|
+
if (uplo == CblasUpper) {
|
140
|
+
for (int j = 0; j < n; ++j) {
|
141
|
+
for (int i = 0; i < m; ++i) {
|
142
|
+
DType temp = alpha * b[i + j * ldb];
|
143
143
|
for (int k = 0; k < i; ++k) { // limit was i-1. Lots of similar bugs in this code, probably.
|
144
144
|
temp -= a[k + i * lda] * b[k + j * ldb];
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
145
|
+
}
|
146
|
+
if (diag == CblasNonUnit) {
|
147
|
+
temp /= a[i + i * lda];
|
148
|
+
}
|
149
|
+
b[i + j * ldb] = temp;
|
150
|
+
}
|
151
|
+
}
|
152
|
+
} else {
|
153
|
+
for (int j = 0; j < n; ++j) {
|
154
|
+
for (int i = m-1; i >= 0; --i) {
|
155
|
+
DType temp= alpha * b[i + j * ldb];
|
156
|
+
for (int k = i+1; k < m; ++k) {
|
157
|
+
temp -= a[k + i * lda] * b[k + j * ldb];
|
158
|
+
}
|
159
|
+
if (diag == CblasNonUnit) {
|
160
|
+
temp /= a[i + i * lda];
|
161
|
+
}
|
162
|
+
b[i + j * ldb] = temp;
|
163
|
+
}
|
164
|
+
}
|
165
|
+
}
|
166
|
+
}
|
167
167
|
} else { // right side
|
168
168
|
|
169
|
-
|
169
|
+
if (trans_a == CblasNoTrans) {
|
170
170
|
|
171
171
|
/* Form B := alpha*B*inv( A ). */
|
172
172
|
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
173
|
+
if (uplo == CblasUpper) {
|
174
|
+
for (int j = 0; j < n; ++j) {
|
175
|
+
if (alpha != 1) {
|
176
|
+
for (int i = 0; i < m; ++i) {
|
177
|
+
b[i + j * ldb] = alpha * b[i + j * ldb];
|
178
|
+
}
|
179
|
+
}
|
180
|
+
for (int k = 0; k < j-1; ++k) {
|
181
|
+
if (a[k + j * lda] != 0) {
|
182
|
+
for (int i = 0; i < m; ++i) {
|
183
|
+
b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
|
184
|
+
}
|
185
|
+
}
|
186
|
+
}
|
187
|
+
if (diag == CblasNonUnit) {
|
188
|
+
DType temp = 1 / a[j + j * lda];
|
189
|
+
for (int i = 0; i < m; ++i) {
|
190
|
+
b[i + j * ldb] = temp * b[i + j * ldb];
|
191
|
+
}
|
192
|
+
}
|
193
|
+
}
|
194
|
+
} else {
|
195
|
+
for (int j = n-1; j >= 0; --j) {
|
196
|
+
if (alpha != 1) {
|
197
|
+
for (int i = 0; i < m; ++i) {
|
198
|
+
b[i + j * ldb] = alpha * b[i + j * ldb];
|
199
|
+
}
|
200
|
+
}
|
201
201
|
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
202
|
+
for (int k = j+1; k < n; ++k) {
|
203
|
+
if (a[k + j * lda] != 0.) {
|
204
|
+
for (int i = 0; i < m; ++i) {
|
205
|
+
b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
|
206
|
+
}
|
207
|
+
}
|
208
|
+
}
|
209
|
+
if (diag == CblasNonUnit) {
|
210
|
+
DType temp = 1 / a[j + j * lda];
|
211
211
|
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
212
|
+
for (int i = 0; i < m; ++i) {
|
213
|
+
b[i + j * ldb] = temp * b[i + j * ldb];
|
214
|
+
}
|
215
|
+
}
|
216
|
+
}
|
217
|
+
}
|
218
|
+
} else { // CblasTrans
|
219
219
|
|
220
220
|
/* Form B := alpha*B*inv( A**T ). */
|
221
221
|
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
222
|
+
if (uplo == CblasUpper) {
|
223
|
+
for (int k = n-1; k >= 0; --k) {
|
224
|
+
if (diag == CblasNonUnit) {
|
225
|
+
DType temp= 1 / a[k + k * lda];
|
226
|
+
for (int i = 0; i < m; ++i) {
|
227
|
+
b[i + k * ldb] = temp * b[i + k * ldb];
|
228
|
+
}
|
229
|
+
}
|
230
|
+
for (int j = 0; j < k-1; ++j) {
|
231
|
+
if (a[j + k * lda] != 0.) {
|
232
|
+
DType temp= a[j + k * lda];
|
233
|
+
for (int i = 0; i < m; ++i) {
|
234
|
+
b[i + j * ldb] -= temp * b[i + k * ldb];
|
235
|
+
}
|
236
|
+
}
|
237
|
+
}
|
238
|
+
if (alpha != 1) {
|
239
|
+
for (int i = 0; i < m; ++i) {
|
240
|
+
b[i + k * ldb] = alpha * b[i + k * ldb];
|
241
|
+
}
|
242
|
+
}
|
243
|
+
}
|
244
|
+
} else {
|
245
|
+
for (int k = 0; k < n; ++k) {
|
246
|
+
if (diag == CblasNonUnit) {
|
247
|
+
DType temp = 1 / a[k + k * lda];
|
248
|
+
for (int i = 0; i < m; ++i) {
|
249
|
+
b[i + k * ldb] = temp * b[i + k * ldb];
|
250
|
+
}
|
251
|
+
}
|
252
|
+
for (int j = k+1; j < n; ++j) {
|
253
|
+
if (a[j + k * lda] != 0.) {
|
254
|
+
DType temp = a[j + k * lda];
|
255
|
+
for (int i = 0; i < m; ++i) {
|
256
|
+
b[i + j * ldb] -= temp * b[i + k * ldb];
|
257
|
+
}
|
258
|
+
}
|
259
|
+
}
|
260
|
+
if (alpha != 1) {
|
261
|
+
for (int i = 0; i < m; ++i) {
|
262
|
+
b[i + k * ldb] = alpha * b[i + k * ldb];
|
263
|
+
}
|
264
|
+
}
|
265
|
+
}
|
266
|
+
}
|
267
|
+
}
|
268
268
|
}
|
269
269
|
}
|
270
270
|
|