nmatrix 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/History.txt +102 -10
- data/README.rdoc +24 -32
- data/Rakefile +1 -1
- data/ext/nmatrix/data/complex.h +9 -0
- data/ext/nmatrix/data/data.cpp +78 -4
- data/ext/nmatrix/data/data.h +86 -54
- data/ext/nmatrix/data/rational.h +2 -0
- data/ext/nmatrix/data/ruby_object.h +38 -8
- data/ext/nmatrix/extconf.rb +13 -7
- data/ext/nmatrix/nmatrix.cpp +262 -139
- data/ext/nmatrix/nmatrix.h +11 -4
- data/ext/nmatrix/storage/common.cpp +20 -13
- data/ext/nmatrix/storage/common.h +18 -12
- data/ext/nmatrix/storage/dense.cpp +122 -192
- data/ext/nmatrix/storage/dense.h +4 -2
- data/ext/nmatrix/storage/list.cpp +467 -636
- data/ext/nmatrix/storage/list.h +6 -3
- data/ext/nmatrix/storage/storage.cpp +83 -46
- data/ext/nmatrix/storage/storage.h +7 -7
- data/ext/nmatrix/storage/yale.cpp +621 -361
- data/ext/nmatrix/storage/yale.h +21 -9
- data/ext/nmatrix/ttable_helper.rb +27 -31
- data/ext/nmatrix/types.h +1 -1
- data/ext/nmatrix/util/math.cpp +9 -10
- data/ext/nmatrix/util/sl_list.cpp +1 -7
- data/ext/nmatrix/util/sl_list.h +0 -118
- data/lib/nmatrix/blas.rb +59 -18
- data/lib/nmatrix/monkeys.rb +0 -52
- data/lib/nmatrix/nmatrix.rb +136 -9
- data/lib/nmatrix/nvector.rb +33 -0
- data/lib/nmatrix/shortcuts.rb +95 -16
- data/lib/nmatrix/version.rb +1 -1
- data/lib/nmatrix/yale_functions.rb +25 -19
- data/spec/blas_spec.rb +1 -19
- data/spec/elementwise_spec.rb +132 -17
- data/spec/lapack_spec.rb +0 -3
- data/spec/nmatrix_list_spec.rb +18 -0
- data/spec/nmatrix_spec.rb +44 -18
- data/spec/nmatrix_yale_spec.rb +1 -3
- data/spec/shortcuts_spec.rb +26 -36
- data/spec/slice_spec.rb +2 -4
- metadata +2 -2
data/ext/nmatrix/nmatrix.h
CHANGED
@@ -136,10 +136,10 @@
|
|
136
136
|
|
137
137
|
#define NM_DEF_STORAGE_ELEMENTS \
|
138
138
|
NM_DECL_ENUM(dtype_t, dtype); \
|
139
|
-
size_t
|
140
|
-
size_t*
|
141
|
-
size_t*
|
142
|
-
int
|
139
|
+
size_t dim; \
|
140
|
+
size_t* shape; \
|
141
|
+
size_t* offset; \
|
142
|
+
int count; \
|
143
143
|
STORAGE* src;
|
144
144
|
|
145
145
|
#define NM_DEF_STORAGE_CHILD_STRUCT_PRE(name) struct name : STORAGE {
|
@@ -312,6 +312,7 @@ NM_DEF_STRUCT_POST(NMATRIX); // };
|
|
312
312
|
#define NM_SHAPE(val,i) (NM_STORAGE(val)->shape[(i)])
|
313
313
|
#define NM_SHAPE0(val) (NM_STORAGE(val)->shape[0])
|
314
314
|
#define NM_SHAPE1(val) (NM_STORAGE(val)->shape[1])
|
315
|
+
#define NM_DEFAULT_VAL(val) (NM_STORAGE_LIST(val)->default_val)
|
315
316
|
|
316
317
|
#define NM_DENSE_COUNT(val) (storage_count_max_elements(NM_STORAGE_DENSE(val)))
|
317
318
|
#define NM_SIZEOF_DTYPE(val) (DTYPE_SIZES[NM_DTYPE(val)])
|
@@ -355,6 +356,12 @@ extern "C" {
|
|
355
356
|
VALUE rb_nvector_dense_create(NM_DECL_ENUM(dtype_t, dtype), void* elements, size_t length);
|
356
357
|
|
357
358
|
NM_DECL_ENUM(dtype_t, nm_dtype_guess(VALUE)); // (This is a function)
|
359
|
+
NM_DECL_ENUM(dtype_t, nm_dtype_min(VALUE));
|
360
|
+
|
361
|
+
// Non-API functions needed by other cpp files.
|
362
|
+
NMATRIX* nm_create(nm::stype_t stype, STORAGE* storage);
|
363
|
+
void nm_delete(NMATRIX* mat);
|
364
|
+
void nm_delete_ref(NMATRIX* mat);
|
358
365
|
|
359
366
|
#ifdef __cplusplus
|
360
367
|
}
|
@@ -52,19 +52,26 @@
|
|
52
52
|
*/
|
53
53
|
|
54
54
|
extern "C" {
|
55
|
-
/*
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
size_t nm_storage_count_max_elements(const STORAGE* storage) {
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
55
|
+
/*
|
56
|
+
* Calculate the number of elements in the dense storage structure, based on
|
57
|
+
* shape and dim.
|
58
|
+
*/
|
59
|
+
size_t nm_storage_count_max_elements(const STORAGE* storage) {
|
60
|
+
unsigned int i;
|
61
|
+
size_t count = 1;
|
62
|
+
|
63
|
+
for (i = storage->dim; i-- > 0;) {
|
64
|
+
count *= storage->shape[i];
|
65
|
+
}
|
66
|
+
|
67
|
+
return count;
|
68
|
+
}
|
69
|
+
|
70
|
+
// Helper function used only for the RETURN_SIZED_ENUMERATOR macro. Returns the length of
|
71
|
+
// the matrix's storage.
|
72
|
+
VALUE nm_enumerator_length(VALUE nmatrix) {
|
73
|
+
long len = nm_storage_count_max_elements(NM_STORAGE_DENSE(nmatrix));
|
74
|
+
return LONG2NUM(len);
|
65
75
|
}
|
66
|
-
|
67
|
-
return count;
|
68
|
-
}
|
69
76
|
|
70
77
|
} // end of extern "C" block
|
@@ -32,6 +32,8 @@
|
|
32
32
|
* Standard Includes
|
33
33
|
*/
|
34
34
|
|
35
|
+
#include <cmath> // pow().
|
36
|
+
|
35
37
|
/*
|
36
38
|
* Project Includes
|
37
39
|
*/
|
@@ -41,7 +43,7 @@
|
|
41
43
|
/*
|
42
44
|
* Macros
|
43
45
|
*/
|
44
|
-
|
46
|
+
|
45
47
|
extern "C" {
|
46
48
|
|
47
49
|
/*
|
@@ -69,6 +71,7 @@ struct SLICE {
|
|
69
71
|
*/
|
70
72
|
|
71
73
|
size_t nm_storage_count_max_elements(const STORAGE* storage);
|
74
|
+
VALUE nm_enumerator_length(VALUE nmatrix);
|
72
75
|
|
73
76
|
} // end of extern "C" block
|
74
77
|
|
@@ -78,19 +81,22 @@ namespace nm {
|
|
78
81
|
* Templated helper function for element-wise operations, used by dense, yale, and list.
|
79
82
|
*/
|
80
83
|
template <ewop_t op, typename LDType, typename RDType>
|
81
|
-
inline
|
84
|
+
inline VALUE ew_op_switch(LDType left, RDType right) {
|
82
85
|
switch (op) {
|
83
86
|
case EW_ADD:
|
84
|
-
return left + right;
|
87
|
+
return RubyObject(left + right).rval;
|
85
88
|
|
86
89
|
case EW_SUB:
|
87
|
-
return left - right;
|
90
|
+
return RubyObject(left - right).rval;
|
88
91
|
|
89
92
|
case EW_MUL:
|
90
|
-
return left * right;
|
93
|
+
return RubyObject(left * right).rval;
|
91
94
|
|
92
95
|
case EW_DIV:
|
93
|
-
return left / right;
|
96
|
+
return RubyObject(left / right).rval;
|
97
|
+
|
98
|
+
case EW_POW:
|
99
|
+
return RubyObject(pow(left, right)).rval;
|
94
100
|
|
95
101
|
case EW_MOD:
|
96
102
|
rb_raise(rb_eNotImpError, "Element-wise modulo is currently not supported.");
|
@@ -99,11 +105,11 @@ namespace nm {
|
|
99
105
|
default:
|
100
106
|
rb_raise(rb_eStandardError, "This should not happen.");
|
101
107
|
}
|
102
|
-
return
|
108
|
+
return Qnil;
|
103
109
|
}
|
104
110
|
|
105
111
|
#define EWOP_INT_INT_DIV(ltype, rtype) template <> \
|
106
|
-
inline
|
112
|
+
inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
|
107
113
|
if (right == 0) rb_raise(rb_eZeroDivError, "cannot divide type by 0, would throw SIGFPE"); \
|
108
114
|
if ((left > 0 && right > 0) || (left < 0 && right < 0)) \
|
109
115
|
return left / right; \
|
@@ -112,27 +118,27 @@ namespace nm {
|
|
112
118
|
}
|
113
119
|
|
114
120
|
#define EWOP_UINT_UINT_DIV(ltype, rtype) template <> \
|
115
|
-
inline
|
121
|
+
inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
|
116
122
|
if (right == 0) rb_raise(rb_eZeroDivError, "cannot divide type by 0, would throw SIGFPE"); \
|
117
123
|
return left / right; \
|
118
124
|
}
|
119
125
|
|
120
126
|
#define EWOP_INT_UINT_DIV(ltype, rtype) template <> \
|
121
|
-
inline
|
127
|
+
inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
|
122
128
|
if (right == 0) rb_raise(rb_eZeroDivError, "cannot divide type by 0, would throw SIGFPE"); \
|
123
129
|
if (left > 0 ) return left / right; \
|
124
130
|
else return ( ltype )(std::floor((double)(left) / (double)(right))); \
|
125
131
|
}
|
126
132
|
|
127
133
|
#define EWOP_UINT_INT_DIV(ltype, rtype) template <> \
|
128
|
-
inline
|
134
|
+
inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
|
129
135
|
if (right == 0) rb_raise(rb_eZeroDivError, "cannot divide type by 0, would throw SIGFPE"); \
|
130
136
|
if (right > 0) return left / right; \
|
131
137
|
else return ( ltype )(std::floor((double)(left) / (double)(right))); \
|
132
138
|
}
|
133
139
|
|
134
140
|
#define EWOP_FLOAT_INT_DIV(ltype, rtype) template <> \
|
135
|
-
inline
|
141
|
+
inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
|
136
142
|
return left / (ltype)(right); \
|
137
143
|
}
|
138
144
|
|
@@ -60,12 +60,9 @@ namespace nm { namespace dense_storage {
|
|
60
60
|
|
61
61
|
template <typename LDType, typename RDType>
|
62
62
|
DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);
|
63
|
-
|
63
|
+
|
64
64
|
template <typename LDType, typename RDType>
|
65
65
|
bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right);
|
66
|
-
|
67
|
-
template <ewop_t op, typename LDType, typename RDType>
|
68
|
-
static DENSE_STORAGE* ew_op(const DENSE_STORAGE* left, const DENSE_STORAGE* right, const void* rscalar);
|
69
66
|
|
70
67
|
template <typename DType>
|
71
68
|
static DENSE_STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector);
|
@@ -132,7 +129,7 @@ DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t
|
|
132
129
|
|
133
130
|
if (elements_length == count) {
|
134
131
|
s->elements = elements;
|
135
|
-
|
132
|
+
|
136
133
|
} else {
|
137
134
|
s->elements = ALLOC_N(char, DTYPE_SIZES[dtype]*count);
|
138
135
|
|
@@ -145,12 +142,12 @@ DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t
|
|
145
142
|
if (i + elements_length > count) {
|
146
143
|
copy_length = count - i;
|
147
144
|
}
|
148
|
-
|
145
|
+
|
149
146
|
memcpy((char*)(s->elements)+i*DTYPE_SIZES[dtype], (char*)(elements)+(i % elements_length)*DTYPE_SIZES[dtype], copy_length*DTYPE_SIZES[dtype]);
|
150
147
|
}
|
151
148
|
|
152
149
|
// Get rid of the init_val.
|
153
|
-
|
150
|
+
xfree(elements);
|
154
151
|
}
|
155
152
|
}
|
156
153
|
|
@@ -210,20 +207,85 @@ void nm_dense_storage_mark(void* storage_base) {
|
|
210
207
|
///////////////
|
211
208
|
|
212
209
|
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
210
|
+
|
211
|
+
/*
|
212
|
+
* map_pair iterator for dense matrices (for element-wise operations)
|
213
|
+
*/
|
214
|
+
VALUE nm_dense_map_pair(VALUE self, VALUE right) {
|
215
|
+
DENSE_STORAGE *s = NM_STORAGE_DENSE(self),
|
216
|
+
*t = NM_STORAGE_DENSE(right);
|
217
|
+
|
218
|
+
RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_enumerator_length);
|
219
|
+
|
220
|
+
size_t* coords = ALLOCA_N(size_t, s->dim);
|
221
|
+
memset(coords, 0, sizeof(size_t) * s->dim);
|
222
|
+
|
223
|
+
size_t *shape_copy = ALLOC_N(size_t, s->dim);
|
224
|
+
memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
|
225
|
+
|
226
|
+
size_t count = nm_storage_count_max_elements(s);
|
227
|
+
|
228
|
+
DENSE_STORAGE* result = nm_dense_storage_create(nm::RUBYOBJ, shape_copy, s->dim, NULL, 0);
|
229
|
+
VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);
|
230
|
+
|
231
|
+
for (size_t k = 0; k < count; ++k) {
|
232
|
+
nm_dense_storage_coords(result, k, coords);
|
233
|
+
size_t s_index = nm_dense_storage_pos(s, coords),
|
234
|
+
t_index = nm_dense_storage_pos(t, coords);
|
235
|
+
|
236
|
+
VALUE sval = NM_DTYPE(self) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(s->elements)[s_index] : rubyobj_from_cval((char*)(s->elements) + s_index*DTYPE_SIZES[NM_DTYPE(self)], NM_DTYPE(self)).rval;
|
237
|
+
VALUE tval = NM_DTYPE(right) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(t->elements)[t_index] : rubyobj_from_cval((char*)(t->elements) + t_index*DTYPE_SIZES[NM_DTYPE(right)], NM_DTYPE(right)).rval;
|
238
|
+
|
239
|
+
result_elem[k] = rb_yield_values(2, sval, tval);
|
240
|
+
}
|
241
|
+
|
242
|
+
NMATRIX* m = nm_create(nm::DENSE_STORE, reinterpret_cast<STORAGE*>(result));
|
243
|
+
|
244
|
+
return Data_Wrap_Struct(CLASS_OF(self), nm_dense_storage_mark, nm_delete, m);
|
245
|
+
}
|
246
|
+
|
247
|
+
|
248
|
+
/*
|
249
|
+
* map enumerator for dense matrices.
|
250
|
+
*/
|
251
|
+
VALUE nm_dense_map(VALUE self) {
|
252
|
+
DENSE_STORAGE *s = NM_STORAGE_DENSE(self);
|
253
|
+
|
254
|
+
RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_enumerator_length);
|
255
|
+
|
256
|
+
size_t* coords = ALLOCA_N(size_t, s->dim);
|
257
|
+
memset(coords, 0, sizeof(size_t) * s->dim);
|
258
|
+
|
259
|
+
size_t *shape_copy = ALLOC_N(size_t, s->dim);
|
260
|
+
memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
|
261
|
+
|
262
|
+
size_t count = nm_storage_count_max_elements(s);
|
263
|
+
|
264
|
+
DENSE_STORAGE* result = nm_dense_storage_create(nm::RUBYOBJ, shape_copy, s->dim, NULL, 0);
|
265
|
+
VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);
|
266
|
+
|
267
|
+
for (size_t k = 0; k < count; ++k) {
|
268
|
+
nm_dense_storage_coords(result, k, coords);
|
269
|
+
size_t s_index = nm_dense_storage_pos(s, coords);
|
270
|
+
|
271
|
+
result_elem[k] = rb_yield(NM_DTYPE(self) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(s->elements)[s_index] : rubyobj_from_cval((char*)(s->elements) + s_index*DTYPE_SIZES[NM_DTYPE(self)], NM_DTYPE(self)).rval);
|
272
|
+
}
|
273
|
+
|
274
|
+
NMATRIX* m = nm_create(nm::DENSE_STORE, reinterpret_cast<STORAGE*>(result));
|
275
|
+
|
276
|
+
return Data_Wrap_Struct(CLASS_OF(self), nm_dense_storage_mark, nm_delete, m);
|
218
277
|
}
|
219
278
|
|
220
279
|
|
280
|
+
/*
|
281
|
+
* each_with_indices iterator for dense matrices.
|
282
|
+
*/
|
221
283
|
VALUE nm_dense_each_with_indices(VALUE nmatrix) {
|
222
284
|
volatile VALUE nm = nmatrix;
|
223
285
|
|
224
286
|
DENSE_STORAGE* s = NM_STORAGE_DENSE(nm);
|
225
287
|
|
226
|
-
RETURN_SIZED_ENUMERATOR(nm, 0, 0,
|
288
|
+
RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_enumerator_length); // fourth argument only used by Ruby2+
|
227
289
|
|
228
290
|
// Create indices and initialize them to zero
|
229
291
|
size_t* coords = ALLOCA_N(size_t, s->dim);
|
@@ -269,7 +331,7 @@ VALUE nm_dense_each(VALUE nmatrix) {
|
|
269
331
|
volatile VALUE nm = nmatrix; // Not sure this actually does anything.
|
270
332
|
DENSE_STORAGE* s = NM_STORAGE_DENSE(nm);
|
271
333
|
|
272
|
-
RETURN_SIZED_ENUMERATOR(nm, 0, 0,
|
334
|
+
RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_enumerator_length);
|
273
335
|
|
274
336
|
size_t* temp_coords = ALLOCA_N(size_t, s->dim);
|
275
337
|
size_t sliced_index;
|
@@ -280,10 +342,11 @@ VALUE nm_dense_each(VALUE nmatrix) {
|
|
280
342
|
if (NM_DTYPE(nm) == nm::RUBYOBJ) {
|
281
343
|
|
282
344
|
// matrix of Ruby objects -- yield those objects directly
|
283
|
-
for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i)
|
345
|
+
for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i) {
|
284
346
|
nm_dense_storage_coords(sliced_dummy, i, temp_coords);
|
285
347
|
sliced_index = nm_dense_storage_pos(s, temp_coords);
|
286
348
|
rb_yield( reinterpret_cast<VALUE*>(s->elements)[sliced_index] );
|
349
|
+
}
|
287
350
|
|
288
351
|
} else {
|
289
352
|
|
@@ -300,7 +363,7 @@ VALUE nm_dense_each(VALUE nmatrix) {
|
|
300
363
|
nm_dense_storage_delete(sliced_dummy);
|
301
364
|
|
302
365
|
return nmatrix;
|
303
|
-
|
366
|
+
|
304
367
|
}
|
305
368
|
|
306
369
|
|
@@ -321,13 +384,13 @@ void* nm_dense_storage_get(STORAGE* storage, SLICE* slice) {
|
|
321
384
|
shape[i] = slice->lengths[i];
|
322
385
|
}
|
323
386
|
|
324
|
-
ns = nm_dense_storage_create(s->dtype, shape, s->dim, NULL, 0);
|
387
|
+
ns = nm_dense_storage_create(s->dtype, shape, s->dim, NULL, 0);
|
325
388
|
|
326
|
-
slice_copy(ns,
|
327
|
-
reinterpret_cast<const DENSE_STORAGE*>(s->src),
|
328
|
-
slice->lengths,
|
329
|
-
0,
|
330
|
-
nm_dense_storage_pos(s, slice->coords),
|
389
|
+
slice_copy(ns,
|
390
|
+
reinterpret_cast<const DENSE_STORAGE*>(s->src),
|
391
|
+
slice->lengths,
|
392
|
+
0,
|
393
|
+
nm_dense_storage_pos(s, slice->coords),
|
331
394
|
0);
|
332
395
|
return ns;
|
333
396
|
}
|
@@ -343,7 +406,7 @@ void* nm_dense_storage_ref(STORAGE* storage, SLICE* slice) {
|
|
343
406
|
|
344
407
|
if (slice->single)
|
345
408
|
return (char*)(s->elements) + nm_dense_storage_pos(s, slice->coords) * DTYPE_SIZES[s->dtype];
|
346
|
-
|
409
|
+
|
347
410
|
else {
|
348
411
|
DENSE_STORAGE* ns = ALLOC( DENSE_STORAGE );
|
349
412
|
ns->dim = s->dim;
|
@@ -358,7 +421,7 @@ void* nm_dense_storage_ref(STORAGE* storage, SLICE* slice) {
|
|
358
421
|
|
359
422
|
ns->stride = s->stride;
|
360
423
|
ns->elements = s->elements;
|
361
|
-
|
424
|
+
|
362
425
|
s->src->count++;
|
363
426
|
ns->src = s->src;
|
364
427
|
|
@@ -387,8 +450,13 @@ void nm_dense_storage_set(STORAGE* storage, SLICE* slice, void* val) {
|
|
387
450
|
* have the same dtype.
|
388
451
|
*/
|
389
452
|
bool nm_dense_storage_eqeq(const STORAGE* left, const STORAGE* right) {
|
390
|
-
LR_DTYPE_TEMPLATE_TABLE(nm::dense_storage::eqeq, bool, const DENSE_STORAGE*, const DENSE_STORAGE*)
|
391
|
-
|
453
|
+
LR_DTYPE_TEMPLATE_TABLE(nm::dense_storage::eqeq, bool, const DENSE_STORAGE*, const DENSE_STORAGE*)
|
454
|
+
|
455
|
+
if (!ttable[left->dtype][right->dtype]) {
|
456
|
+
rb_raise(nm_eDataTypeError, "comparison between these dtypes is undefined");
|
457
|
+
return false;
|
458
|
+
}
|
459
|
+
|
392
460
|
return ttable[left->dtype][right->dtype]((const DENSE_STORAGE*)left, (const DENSE_STORAGE*)right);
|
393
461
|
}
|
394
462
|
|
@@ -399,10 +467,10 @@ bool nm_dense_storage_eqeq(const STORAGE* left, const STORAGE* right) {
|
|
399
467
|
bool nm_dense_storage_is_hermitian(const DENSE_STORAGE* mat, int lda) {
|
400
468
|
if (mat->dtype == nm::COMPLEX64) {
|
401
469
|
return nm::dense_storage::is_hermitian<nm::Complex64>(mat, lda);
|
402
|
-
|
470
|
+
|
403
471
|
} else if (mat->dtype == nm::COMPLEX128) {
|
404
472
|
return nm::dense_storage::is_hermitian<nm::Complex128>(mat, lda);
|
405
|
-
|
473
|
+
|
406
474
|
} else {
|
407
475
|
return nm_dense_storage_is_symmetric(mat, lda);
|
408
476
|
}
|
@@ -413,7 +481,7 @@ bool nm_dense_storage_is_hermitian(const DENSE_STORAGE* mat, int lda) {
|
|
413
481
|
*/
|
414
482
|
bool nm_dense_storage_is_symmetric(const DENSE_STORAGE* mat, int lda) {
|
415
483
|
DTYPE_TEMPLATE_TABLE(nm::dense_storage::is_symmetric, bool, const DENSE_STORAGE*, int);
|
416
|
-
|
484
|
+
|
417
485
|
return ttable[mat->dtype](mat, lda);
|
418
486
|
}
|
419
487
|
|
@@ -421,24 +489,6 @@ bool nm_dense_storage_is_symmetric(const DENSE_STORAGE* mat, int lda) {
|
|
421
489
|
// Math //
|
422
490
|
//////////
|
423
491
|
|
424
|
-
/*
|
425
|
-
* Dense matrix-matrix and matrix-scalar element-wise operations.
|
426
|
-
*
|
427
|
-
* right or rscalar should be NULL; they should not both be initialized. If right is NULL, it'll use the scalar value instead.
|
428
|
-
*/
|
429
|
-
STORAGE* nm_dense_storage_ew_op(nm::ewop_t op, const STORAGE* left, const STORAGE* right, VALUE scalar) {
|
430
|
-
OP_LR_DTYPE_TEMPLATE_TABLE(nm::dense_storage::ew_op, DENSE_STORAGE*, const DENSE_STORAGE* left, const DENSE_STORAGE* right, const void*);
|
431
|
-
|
432
|
-
if (right)
|
433
|
-
return ttable[op][left->dtype][right->dtype](reinterpret_cast<const DENSE_STORAGE*>(left), reinterpret_cast<const DENSE_STORAGE*>(right), NULL);
|
434
|
-
else {
|
435
|
-
nm::dtype_t r_dtype = nm_dtype_guess(scalar);
|
436
|
-
void* r_scalar = ALLOCA_N(char, DTYPE_SIZES[r_dtype]);
|
437
|
-
rubyval_to_cval(scalar, r_dtype, r_scalar);
|
438
|
-
|
439
|
-
return ttable[op][left->dtype][r_dtype](reinterpret_cast<const DENSE_STORAGE*>(left), NULL, r_scalar);
|
440
|
-
}
|
441
|
-
}
|
442
492
|
|
443
493
|
/*
|
444
494
|
* Dense matrix-matrix multiplication.
|
@@ -468,9 +518,9 @@ size_t nm_dense_storage_pos(const DENSE_STORAGE* s, const size_t* coords) {
|
|
468
518
|
}
|
469
519
|
|
470
520
|
/*
|
471
|
-
* Determine the a set of slice coordinates from linear array position (in elements
|
521
|
+
* Determine the a set of slice coordinates from linear array position (in elements
|
472
522
|
* of s) of some set of coordinates (given by slice). (Inverse of
|
473
|
-
* nm_dense_storage_pos).
|
523
|
+
* nm_dense_storage_pos).
|
474
524
|
*
|
475
525
|
* The parameter coords_out should be a pre-allocated array of size equal to s->dim.
|
476
526
|
*/
|
@@ -510,7 +560,7 @@ static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* le
|
|
510
560
|
for (size_t i = 0; i < lengths[n]; ++i) {
|
511
561
|
slice_copy(dest, src, lengths,
|
512
562
|
pdest + dest->stride[n]*i,
|
513
|
-
psrc + src->stride[n]*i,
|
563
|
+
psrc + src->stride[n]*i,
|
514
564
|
n + 1);
|
515
565
|
}
|
516
566
|
} else {
|
@@ -528,9 +578,14 @@ static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* le
|
|
528
578
|
/*
|
529
579
|
* Copy dense storage, changing dtype if necessary.
|
530
580
|
*/
|
531
|
-
STORAGE* nm_dense_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype) {
|
581
|
+
STORAGE* nm_dense_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype, void* dummy) {
|
532
582
|
NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::cast_copy, DENSE_STORAGE*, const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);
|
533
583
|
|
584
|
+
if (!ttable[new_dtype][rhs->dtype]) {
|
585
|
+
rb_raise(nm_eDataTypeError, "cast between these dtypes is undefined");
|
586
|
+
return NULL;
|
587
|
+
}
|
588
|
+
|
534
589
|
return (STORAGE*)ttable[new_dtype][rhs->dtype]((DENSE_STORAGE*)rhs, new_dtype);
|
535
590
|
}
|
536
591
|
|
@@ -538,7 +593,7 @@ STORAGE* nm_dense_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype) {
|
|
538
593
|
* Copy dense storage without a change in dtype.
|
539
594
|
*/
|
540
595
|
DENSE_STORAGE* nm_dense_storage_copy(const DENSE_STORAGE* rhs) {
|
541
|
-
size_t count = 0;
|
596
|
+
size_t count = 0;
|
542
597
|
size_t *shape = ALLOC_N(size_t, rhs->dim);
|
543
598
|
|
544
599
|
// copy shape and offset
|
@@ -593,6 +648,10 @@ STORAGE* nm_dense_storage_copy_transposed(const STORAGE* rhs_base) {
|
|
593
648
|
nm_math_transpose_generic(rhs->shape[0], rhs->shape[1], rhs->elements, rhs->shape[1], lhs->elements, lhs->shape[1], DTYPE_SIZES[rhs->dtype]);
|
594
649
|
} else {
|
595
650
|
NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::ref_slice_copy_transposed, void, const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs);
|
651
|
+
|
652
|
+
if (!ttable[lhs->dtype][rhs->dtype])
|
653
|
+
rb_raise(nm_eDataTypeError, "transposition between these dtypes is undefined");
|
654
|
+
|
596
655
|
ttable[lhs->dtype][rhs->dtype](rhs, lhs);
|
597
656
|
}
|
598
657
|
|
@@ -640,8 +699,7 @@ DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, dtype_t new_dtype) {
|
|
640
699
|
|
641
700
|
// Ensure that allocation worked before copying.
|
642
701
|
if (lhs && count) {
|
643
|
-
if (rhs->src != rhs) {
|
644
|
-
/* Make a copy of a ref to a matrix. */
|
702
|
+
if (rhs->src != rhs) { // Make a copy of a ref to a matrix.
|
645
703
|
|
646
704
|
DENSE_STORAGE* tmp = nm_dense_storage_copy(rhs);
|
647
705
|
|
@@ -650,13 +708,12 @@ DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, dtype_t new_dtype) {
|
|
650
708
|
lhs_els[count] = tmp_els[count];
|
651
709
|
}
|
652
710
|
nm_dense_storage_delete(tmp);
|
653
|
-
} else {
|
654
|
-
/* Make a regular copy. */
|
655
711
|
|
712
|
+
} else { // Make a regular copy.
|
656
713
|
while (count-- > 0) lhs_els[count] = rhs_els[count];
|
657
714
|
}
|
658
715
|
}
|
659
|
-
|
716
|
+
|
660
717
|
return lhs;
|
661
718
|
}
|
662
719
|
|
@@ -673,7 +730,7 @@ bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right) {
|
|
673
730
|
LDType* left_elements = (LDType*)left->elements;
|
674
731
|
RDType* right_elements = (RDType*)right->elements;
|
675
732
|
|
676
|
-
// Copy elements in temp matrix if you have
|
733
|
+
// Copy elements in temp matrix if you have reference to the right.
|
677
734
|
if (left->src != left) {
|
678
735
|
tmp1 = nm_dense_storage_copy(left);
|
679
736
|
left_elements = (LDType*)tmp1->elements;
|
@@ -682,7 +739,7 @@ bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right) {
|
|
682
739
|
tmp2 = nm_dense_storage_copy(right);
|
683
740
|
right_elements = (RDType*)tmp2->elements;
|
684
741
|
}
|
685
|
-
|
742
|
+
|
686
743
|
|
687
744
|
|
688
745
|
for (index = nm_storage_count_max_elements(left); index-- > 0;) {
|
@@ -704,20 +761,20 @@ template <typename DType>
|
|
704
761
|
bool is_hermitian(const DENSE_STORAGE* mat, int lda) {
|
705
762
|
unsigned int i, j;
|
706
763
|
register DType complex_conj;
|
707
|
-
|
764
|
+
|
708
765
|
const DType* els = (DType*) mat->elements;
|
709
|
-
|
766
|
+
|
710
767
|
for (i = mat->shape[0]; i-- > 0;) {
|
711
768
|
for (j = i + 1; j < mat->shape[1]; ++j) {
|
712
769
|
complex_conj = els[j*lda + 1];
|
713
770
|
complex_conj.i = -complex_conj.i;
|
714
|
-
|
771
|
+
|
715
772
|
if (els[i*lda+j] != complex_conj) {
|
716
773
|
return false;
|
717
774
|
}
|
718
775
|
}
|
719
776
|
}
|
720
|
-
|
777
|
+
|
721
778
|
return true;
|
722
779
|
}
|
723
780
|
|
@@ -725,7 +782,7 @@ template <typename DType>
|
|
725
782
|
bool is_symmetric(const DENSE_STORAGE* mat, int lda) {
|
726
783
|
unsigned int i, j;
|
727
784
|
const DType* els = (DType*) mat->elements;
|
728
|
-
|
785
|
+
|
729
786
|
for (i = mat->shape[0]; i-- > 0;) {
|
730
787
|
for (j = i + 1; j < mat->shape[1]; ++j) {
|
731
788
|
if (els[i*lda+j] != els[j*lda+i]) {
|
@@ -733,137 +790,10 @@ bool is_symmetric(const DENSE_STORAGE* mat, int lda) {
|
|
733
790
|
}
|
734
791
|
}
|
735
792
|
}
|
736
|
-
|
793
|
+
|
737
794
|
return true;
|
738
795
|
}
|
739
796
|
|
740
|
-
/*
|
741
|
-
* Templated dense storage element-wise operations which return the same DType.
|
742
|
-
*/
|
743
|
-
template <ewop_t op, typename LDType, typename RDType>
|
744
|
-
static DENSE_STORAGE* ew_op(const DENSE_STORAGE* left, const DENSE_STORAGE* right, const void* rscalar) {
|
745
|
-
unsigned int count;
|
746
|
-
size_t l_count;
|
747
|
-
size_t r_count;
|
748
|
-
|
749
|
-
size_t* temp_coords = ALLOCA_N(size_t, left->dim);
|
750
|
-
|
751
|
-
size_t* new_shape = ALLOC_N(size_t, left->dim);
|
752
|
-
memcpy(new_shape, left->shape, sizeof(size_t) * left->dim);
|
753
|
-
|
754
|
-
// Determine the return dtype. This depends on the type of operation we're doing. Usually, it's going to be
|
755
|
-
// set by the left matrix, but for comparisons, we'll use BYTE (in lieu of boolean).
|
756
|
-
dtype_t new_dtype = static_cast<uint8_t>(op) < NUM_NONCOMP_EWOPS ? left->dtype : BYTE;
|
757
|
-
|
758
|
-
DENSE_STORAGE* result = nm_dense_storage_create(new_dtype, new_shape, left->dim, NULL, 0);
|
759
|
-
|
760
|
-
LDType* l_elems = reinterpret_cast<LDType*>(left->elements);
|
761
|
-
|
762
|
-
if (right) { // matrix-matrix operation
|
763
|
-
RDType* r_elems = reinterpret_cast<RDType*>(right->elements);
|
764
|
-
|
765
|
-
if (static_cast<uint8_t>(op) < NUM_NONCOMP_EWOPS) { // use left-dtype
|
766
|
-
|
767
|
-
for (count = nm_storage_count_max_elements(result); count-- > 0;) {
|
768
|
-
nm_dense_storage_coords(result, count, temp_coords);
|
769
|
-
l_count = nm_dense_storage_pos(left, temp_coords);
|
770
|
-
r_count = nm_dense_storage_pos(right, temp_coords);
|
771
|
-
|
772
|
-
reinterpret_cast<LDType*>(result->elements)[count] = ew_op_switch<op,LDType,RDType>(l_elems[l_count], r_elems[r_count]);
|
773
|
-
}
|
774
|
-
|
775
|
-
} else { // new_dtype is BYTE: comparison operators
|
776
|
-
uint8_t* res_elems = reinterpret_cast<uint8_t*>(result->elements);
|
777
|
-
|
778
|
-
for (count = nm_storage_count_max_elements(result); count-- > 0;) {
|
779
|
-
nm_dense_storage_coords(result, count, temp_coords);
|
780
|
-
l_count = nm_dense_storage_pos(left, temp_coords);
|
781
|
-
r_count = nm_dense_storage_pos(right, temp_coords);
|
782
|
-
|
783
|
-
switch (op) {
|
784
|
-
case EW_EQEQ:
|
785
|
-
res_elems[count] = l_elems[l_count] == r_elems[r_count];
|
786
|
-
break;
|
787
|
-
|
788
|
-
case EW_NEQ:
|
789
|
-
res_elems[count] = l_elems[l_count] != r_elems[r_count];
|
790
|
-
break;
|
791
|
-
|
792
|
-
case EW_LT:
|
793
|
-
res_elems[count] = l_elems[l_count] < r_elems[r_count];
|
794
|
-
break;
|
795
|
-
|
796
|
-
case EW_GT:
|
797
|
-
res_elems[count] = l_elems[l_count] > r_elems[r_count];
|
798
|
-
break;
|
799
|
-
|
800
|
-
case EW_LEQ:
|
801
|
-
res_elems[count] = l_elems[l_count] <= r_elems[r_count];
|
802
|
-
break;
|
803
|
-
|
804
|
-
case EW_GEQ:
|
805
|
-
res_elems[count] = l_elems[l_count] >= r_elems[r_count];
|
806
|
-
break;
|
807
|
-
|
808
|
-
default:
|
809
|
-
rb_raise(rb_eStandardError, "this should not happen");
|
810
|
-
}
|
811
|
-
}
|
812
|
-
}
|
813
|
-
|
814
|
-
} else { // matrix-scalar operation
|
815
|
-
const RDType* r_elem = reinterpret_cast<const RDType*>(rscalar);
|
816
|
-
|
817
|
-
if (static_cast<uint8_t>(op) < NUM_NONCOMP_EWOPS) { // use left-dtype
|
818
|
-
|
819
|
-
for (count = nm_storage_count_max_elements(result); count-- > 0;) {
|
820
|
-
nm_dense_storage_coords(result, count, temp_coords);
|
821
|
-
l_count = nm_dense_storage_pos(left, temp_coords);
|
822
|
-
|
823
|
-
reinterpret_cast<LDType*>(result->elements)[count] = ew_op_switch<op,LDType,RDType>(l_elems[l_count], *r_elem);
|
824
|
-
}
|
825
|
-
|
826
|
-
} else {
|
827
|
-
uint8_t* res_elems = reinterpret_cast<uint8_t*>(result->elements);
|
828
|
-
|
829
|
-
for (count = nm_storage_count_max_elements(result); count-- > 0;) {
|
830
|
-
nm_dense_storage_coords(result, count, temp_coords);
|
831
|
-
l_count = nm_dense_storage_pos(left, temp_coords);
|
832
|
-
|
833
|
-
switch (op) {
|
834
|
-
case EW_EQEQ:
|
835
|
-
res_elems[count] = l_elems[l_count] == *r_elem;
|
836
|
-
break;
|
837
|
-
|
838
|
-
case EW_NEQ:
|
839
|
-
res_elems[count] = l_elems[l_count] != *r_elem;
|
840
|
-
break;
|
841
|
-
|
842
|
-
case EW_LT:
|
843
|
-
res_elems[count] = l_elems[l_count] < *r_elem;
|
844
|
-
break;
|
845
|
-
|
846
|
-
case EW_GT:
|
847
|
-
res_elems[count] = l_elems[l_count] > *r_elem;
|
848
|
-
break;
|
849
|
-
|
850
|
-
case EW_LEQ:
|
851
|
-
res_elems[count] = l_elems[l_count] <= *r_elem;
|
852
|
-
break;
|
853
|
-
|
854
|
-
case EW_GEQ:
|
855
|
-
res_elems[count] = l_elems[l_count] >= *r_elem;
|
856
|
-
break;
|
857
|
-
|
858
|
-
default:
|
859
|
-
rb_raise(rb_eStandardError, "this should not happen");
|
860
|
-
}
|
861
|
-
}
|
862
|
-
|
863
|
-
}
|
864
|
-
}
|
865
|
-
return result;
|
866
|
-
}
|
867
797
|
|
868
798
|
|
869
799
|
/*
|