nmatrix 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/History.txt +102 -10
- data/README.rdoc +24 -32
- data/Rakefile +1 -1
- data/ext/nmatrix/data/complex.h +9 -0
- data/ext/nmatrix/data/data.cpp +78 -4
- data/ext/nmatrix/data/data.h +86 -54
- data/ext/nmatrix/data/rational.h +2 -0
- data/ext/nmatrix/data/ruby_object.h +38 -8
- data/ext/nmatrix/extconf.rb +13 -7
- data/ext/nmatrix/nmatrix.cpp +262 -139
- data/ext/nmatrix/nmatrix.h +11 -4
- data/ext/nmatrix/storage/common.cpp +20 -13
- data/ext/nmatrix/storage/common.h +18 -12
- data/ext/nmatrix/storage/dense.cpp +122 -192
- data/ext/nmatrix/storage/dense.h +4 -2
- data/ext/nmatrix/storage/list.cpp +467 -636
- data/ext/nmatrix/storage/list.h +6 -3
- data/ext/nmatrix/storage/storage.cpp +83 -46
- data/ext/nmatrix/storage/storage.h +7 -7
- data/ext/nmatrix/storage/yale.cpp +621 -361
- data/ext/nmatrix/storage/yale.h +21 -9
- data/ext/nmatrix/ttable_helper.rb +27 -31
- data/ext/nmatrix/types.h +1 -1
- data/ext/nmatrix/util/math.cpp +9 -10
- data/ext/nmatrix/util/sl_list.cpp +1 -7
- data/ext/nmatrix/util/sl_list.h +0 -118
- data/lib/nmatrix/blas.rb +59 -18
- data/lib/nmatrix/monkeys.rb +0 -52
- data/lib/nmatrix/nmatrix.rb +136 -9
- data/lib/nmatrix/nvector.rb +33 -0
- data/lib/nmatrix/shortcuts.rb +95 -16
- data/lib/nmatrix/version.rb +1 -1
- data/lib/nmatrix/yale_functions.rb +25 -19
- data/spec/blas_spec.rb +1 -19
- data/spec/elementwise_spec.rb +132 -17
- data/spec/lapack_spec.rb +0 -3
- data/spec/nmatrix_list_spec.rb +18 -0
- data/spec/nmatrix_spec.rb +44 -18
- data/spec/nmatrix_yale_spec.rb +1 -3
- data/spec/shortcuts_spec.rb +26 -36
- data/spec/slice_spec.rb +2 -4
- metadata +2 -2
data/ext/nmatrix/nmatrix.h
CHANGED
@@ -136,10 +136,10 @@
|
|
136
136
|
|
137
137
|
#define NM_DEF_STORAGE_ELEMENTS \
|
138
138
|
NM_DECL_ENUM(dtype_t, dtype); \
|
139
|
-
size_t
|
140
|
-
size_t*
|
141
|
-
size_t*
|
142
|
-
int
|
139
|
+
size_t dim; \
|
140
|
+
size_t* shape; \
|
141
|
+
size_t* offset; \
|
142
|
+
int count; \
|
143
143
|
STORAGE* src;
|
144
144
|
|
145
145
|
#define NM_DEF_STORAGE_CHILD_STRUCT_PRE(name) struct name : STORAGE {
|
@@ -312,6 +312,7 @@ NM_DEF_STRUCT_POST(NMATRIX); // };
|
|
312
312
|
#define NM_SHAPE(val,i) (NM_STORAGE(val)->shape[(i)])
|
313
313
|
#define NM_SHAPE0(val) (NM_STORAGE(val)->shape[0])
|
314
314
|
#define NM_SHAPE1(val) (NM_STORAGE(val)->shape[1])
|
315
|
+
#define NM_DEFAULT_VAL(val) (NM_STORAGE_LIST(val)->default_val)
|
315
316
|
|
316
317
|
#define NM_DENSE_COUNT(val) (storage_count_max_elements(NM_STORAGE_DENSE(val)))
|
317
318
|
#define NM_SIZEOF_DTYPE(val) (DTYPE_SIZES[NM_DTYPE(val)])
|
@@ -355,6 +356,12 @@ extern "C" {
|
|
355
356
|
VALUE rb_nvector_dense_create(NM_DECL_ENUM(dtype_t, dtype), void* elements, size_t length);
|
356
357
|
|
357
358
|
NM_DECL_ENUM(dtype_t, nm_dtype_guess(VALUE)); // (This is a function)
|
359
|
+
NM_DECL_ENUM(dtype_t, nm_dtype_min(VALUE));
|
360
|
+
|
361
|
+
// Non-API functions needed by other cpp files.
|
362
|
+
NMATRIX* nm_create(nm::stype_t stype, STORAGE* storage);
|
363
|
+
void nm_delete(NMATRIX* mat);
|
364
|
+
void nm_delete_ref(NMATRIX* mat);
|
358
365
|
|
359
366
|
#ifdef __cplusplus
|
360
367
|
}
|
@@ -52,19 +52,26 @@
|
|
52
52
|
*/
|
53
53
|
|
54
54
|
extern "C" {
|
55
|
-
/*
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
size_t nm_storage_count_max_elements(const STORAGE* storage) {
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
55
|
+
/*
|
56
|
+
* Calculate the number of elements in the dense storage structure, based on
|
57
|
+
* shape and dim.
|
58
|
+
*/
|
59
|
+
size_t nm_storage_count_max_elements(const STORAGE* storage) {
|
60
|
+
unsigned int i;
|
61
|
+
size_t count = 1;
|
62
|
+
|
63
|
+
for (i = storage->dim; i-- > 0;) {
|
64
|
+
count *= storage->shape[i];
|
65
|
+
}
|
66
|
+
|
67
|
+
return count;
|
68
|
+
}
|
69
|
+
|
70
|
+
// Helper function used only for the RETURN_SIZED_ENUMERATOR macro. Returns the length of
|
71
|
+
// the matrix's storage.
|
72
|
+
VALUE nm_enumerator_length(VALUE nmatrix) {
|
73
|
+
long len = nm_storage_count_max_elements(NM_STORAGE_DENSE(nmatrix));
|
74
|
+
return LONG2NUM(len);
|
65
75
|
}
|
66
|
-
|
67
|
-
return count;
|
68
|
-
}
|
69
76
|
|
70
77
|
} // end of extern "C" block
|
@@ -32,6 +32,8 @@
|
|
32
32
|
* Standard Includes
|
33
33
|
*/
|
34
34
|
|
35
|
+
#include <cmath> // pow().
|
36
|
+
|
35
37
|
/*
|
36
38
|
* Project Includes
|
37
39
|
*/
|
@@ -41,7 +43,7 @@
|
|
41
43
|
/*
|
42
44
|
* Macros
|
43
45
|
*/
|
44
|
-
|
46
|
+
|
45
47
|
extern "C" {
|
46
48
|
|
47
49
|
/*
|
@@ -69,6 +71,7 @@ struct SLICE {
|
|
69
71
|
*/
|
70
72
|
|
71
73
|
size_t nm_storage_count_max_elements(const STORAGE* storage);
|
74
|
+
VALUE nm_enumerator_length(VALUE nmatrix);
|
72
75
|
|
73
76
|
} // end of extern "C" block
|
74
77
|
|
@@ -78,19 +81,22 @@ namespace nm {
|
|
78
81
|
* Templated helper function for element-wise operations, used by dense, yale, and list.
|
79
82
|
*/
|
80
83
|
template <ewop_t op, typename LDType, typename RDType>
|
81
|
-
inline
|
84
|
+
inline VALUE ew_op_switch(LDType left, RDType right) {
|
82
85
|
switch (op) {
|
83
86
|
case EW_ADD:
|
84
|
-
return left + right;
|
87
|
+
return RubyObject(left + right).rval;
|
85
88
|
|
86
89
|
case EW_SUB:
|
87
|
-
return left - right;
|
90
|
+
return RubyObject(left - right).rval;
|
88
91
|
|
89
92
|
case EW_MUL:
|
90
|
-
return left * right;
|
93
|
+
return RubyObject(left * right).rval;
|
91
94
|
|
92
95
|
case EW_DIV:
|
93
|
-
return left / right;
|
96
|
+
return RubyObject(left / right).rval;
|
97
|
+
|
98
|
+
case EW_POW:
|
99
|
+
return RubyObject(pow(left, right)).rval;
|
94
100
|
|
95
101
|
case EW_MOD:
|
96
102
|
rb_raise(rb_eNotImpError, "Element-wise modulo is currently not supported.");
|
@@ -99,11 +105,11 @@ namespace nm {
|
|
99
105
|
default:
|
100
106
|
rb_raise(rb_eStandardError, "This should not happen.");
|
101
107
|
}
|
102
|
-
return
|
108
|
+
return Qnil;
|
103
109
|
}
|
104
110
|
|
105
111
|
#define EWOP_INT_INT_DIV(ltype, rtype) template <> \
|
106
|
-
inline
|
112
|
+
inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
|
107
113
|
if (right == 0) rb_raise(rb_eZeroDivError, "cannot divide type by 0, would throw SIGFPE"); \
|
108
114
|
if ((left > 0 && right > 0) || (left < 0 && right < 0)) \
|
109
115
|
return left / right; \
|
@@ -112,27 +118,27 @@ namespace nm {
|
|
112
118
|
}
|
113
119
|
|
114
120
|
#define EWOP_UINT_UINT_DIV(ltype, rtype) template <> \
|
115
|
-
inline
|
121
|
+
inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
|
116
122
|
if (right == 0) rb_raise(rb_eZeroDivError, "cannot divide type by 0, would throw SIGFPE"); \
|
117
123
|
return left / right; \
|
118
124
|
}
|
119
125
|
|
120
126
|
#define EWOP_INT_UINT_DIV(ltype, rtype) template <> \
|
121
|
-
inline
|
127
|
+
inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
|
122
128
|
if (right == 0) rb_raise(rb_eZeroDivError, "cannot divide type by 0, would throw SIGFPE"); \
|
123
129
|
if (left > 0 ) return left / right; \
|
124
130
|
else return ( ltype )(std::floor((double)(left) / (double)(right))); \
|
125
131
|
}
|
126
132
|
|
127
133
|
#define EWOP_UINT_INT_DIV(ltype, rtype) template <> \
|
128
|
-
inline
|
134
|
+
inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
|
129
135
|
if (right == 0) rb_raise(rb_eZeroDivError, "cannot divide type by 0, would throw SIGFPE"); \
|
130
136
|
if (right > 0) return left / right; \
|
131
137
|
else return ( ltype )(std::floor((double)(left) / (double)(right))); \
|
132
138
|
}
|
133
139
|
|
134
140
|
#define EWOP_FLOAT_INT_DIV(ltype, rtype) template <> \
|
135
|
-
inline
|
141
|
+
inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
|
136
142
|
return left / (ltype)(right); \
|
137
143
|
}
|
138
144
|
|
@@ -60,12 +60,9 @@ namespace nm { namespace dense_storage {
|
|
60
60
|
|
61
61
|
template <typename LDType, typename RDType>
|
62
62
|
DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);
|
63
|
-
|
63
|
+
|
64
64
|
template <typename LDType, typename RDType>
|
65
65
|
bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right);
|
66
|
-
|
67
|
-
template <ewop_t op, typename LDType, typename RDType>
|
68
|
-
static DENSE_STORAGE* ew_op(const DENSE_STORAGE* left, const DENSE_STORAGE* right, const void* rscalar);
|
69
66
|
|
70
67
|
template <typename DType>
|
71
68
|
static DENSE_STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector);
|
@@ -132,7 +129,7 @@ DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t
|
|
132
129
|
|
133
130
|
if (elements_length == count) {
|
134
131
|
s->elements = elements;
|
135
|
-
|
132
|
+
|
136
133
|
} else {
|
137
134
|
s->elements = ALLOC_N(char, DTYPE_SIZES[dtype]*count);
|
138
135
|
|
@@ -145,12 +142,12 @@ DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t
|
|
145
142
|
if (i + elements_length > count) {
|
146
143
|
copy_length = count - i;
|
147
144
|
}
|
148
|
-
|
145
|
+
|
149
146
|
memcpy((char*)(s->elements)+i*DTYPE_SIZES[dtype], (char*)(elements)+(i % elements_length)*DTYPE_SIZES[dtype], copy_length*DTYPE_SIZES[dtype]);
|
150
147
|
}
|
151
148
|
|
152
149
|
// Get rid of the init_val.
|
153
|
-
|
150
|
+
xfree(elements);
|
154
151
|
}
|
155
152
|
}
|
156
153
|
|
@@ -210,20 +207,85 @@ void nm_dense_storage_mark(void* storage_base) {
|
|
210
207
|
///////////////
|
211
208
|
|
212
209
|
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
210
|
+
|
211
|
+
/*
|
212
|
+
* map_pair iterator for dense matrices (for element-wise operations)
|
213
|
+
*/
|
214
|
+
VALUE nm_dense_map_pair(VALUE self, VALUE right) {
|
215
|
+
DENSE_STORAGE *s = NM_STORAGE_DENSE(self),
|
216
|
+
*t = NM_STORAGE_DENSE(right);
|
217
|
+
|
218
|
+
RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_enumerator_length);
|
219
|
+
|
220
|
+
size_t* coords = ALLOCA_N(size_t, s->dim);
|
221
|
+
memset(coords, 0, sizeof(size_t) * s->dim);
|
222
|
+
|
223
|
+
size_t *shape_copy = ALLOC_N(size_t, s->dim);
|
224
|
+
memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
|
225
|
+
|
226
|
+
size_t count = nm_storage_count_max_elements(s);
|
227
|
+
|
228
|
+
DENSE_STORAGE* result = nm_dense_storage_create(nm::RUBYOBJ, shape_copy, s->dim, NULL, 0);
|
229
|
+
VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);
|
230
|
+
|
231
|
+
for (size_t k = 0; k < count; ++k) {
|
232
|
+
nm_dense_storage_coords(result, k, coords);
|
233
|
+
size_t s_index = nm_dense_storage_pos(s, coords),
|
234
|
+
t_index = nm_dense_storage_pos(t, coords);
|
235
|
+
|
236
|
+
VALUE sval = NM_DTYPE(self) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(s->elements)[s_index] : rubyobj_from_cval((char*)(s->elements) + s_index*DTYPE_SIZES[NM_DTYPE(self)], NM_DTYPE(self)).rval;
|
237
|
+
VALUE tval = NM_DTYPE(right) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(t->elements)[t_index] : rubyobj_from_cval((char*)(t->elements) + t_index*DTYPE_SIZES[NM_DTYPE(right)], NM_DTYPE(right)).rval;
|
238
|
+
|
239
|
+
result_elem[k] = rb_yield_values(2, sval, tval);
|
240
|
+
}
|
241
|
+
|
242
|
+
NMATRIX* m = nm_create(nm::DENSE_STORE, reinterpret_cast<STORAGE*>(result));
|
243
|
+
|
244
|
+
return Data_Wrap_Struct(CLASS_OF(self), nm_dense_storage_mark, nm_delete, m);
|
245
|
+
}
|
246
|
+
|
247
|
+
|
248
|
+
/*
|
249
|
+
* map enumerator for dense matrices.
|
250
|
+
*/
|
251
|
+
VALUE nm_dense_map(VALUE self) {
|
252
|
+
DENSE_STORAGE *s = NM_STORAGE_DENSE(self);
|
253
|
+
|
254
|
+
RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_enumerator_length);
|
255
|
+
|
256
|
+
size_t* coords = ALLOCA_N(size_t, s->dim);
|
257
|
+
memset(coords, 0, sizeof(size_t) * s->dim);
|
258
|
+
|
259
|
+
size_t *shape_copy = ALLOC_N(size_t, s->dim);
|
260
|
+
memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
|
261
|
+
|
262
|
+
size_t count = nm_storage_count_max_elements(s);
|
263
|
+
|
264
|
+
DENSE_STORAGE* result = nm_dense_storage_create(nm::RUBYOBJ, shape_copy, s->dim, NULL, 0);
|
265
|
+
VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);
|
266
|
+
|
267
|
+
for (size_t k = 0; k < count; ++k) {
|
268
|
+
nm_dense_storage_coords(result, k, coords);
|
269
|
+
size_t s_index = nm_dense_storage_pos(s, coords);
|
270
|
+
|
271
|
+
result_elem[k] = rb_yield(NM_DTYPE(self) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(s->elements)[s_index] : rubyobj_from_cval((char*)(s->elements) + s_index*DTYPE_SIZES[NM_DTYPE(self)], NM_DTYPE(self)).rval);
|
272
|
+
}
|
273
|
+
|
274
|
+
NMATRIX* m = nm_create(nm::DENSE_STORE, reinterpret_cast<STORAGE*>(result));
|
275
|
+
|
276
|
+
return Data_Wrap_Struct(CLASS_OF(self), nm_dense_storage_mark, nm_delete, m);
|
218
277
|
}
|
219
278
|
|
220
279
|
|
280
|
+
/*
|
281
|
+
* each_with_indices iterator for dense matrices.
|
282
|
+
*/
|
221
283
|
VALUE nm_dense_each_with_indices(VALUE nmatrix) {
|
222
284
|
volatile VALUE nm = nmatrix;
|
223
285
|
|
224
286
|
DENSE_STORAGE* s = NM_STORAGE_DENSE(nm);
|
225
287
|
|
226
|
-
RETURN_SIZED_ENUMERATOR(nm, 0, 0,
|
288
|
+
RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_enumerator_length); // fourth argument only used by Ruby2+
|
227
289
|
|
228
290
|
// Create indices and initialize them to zero
|
229
291
|
size_t* coords = ALLOCA_N(size_t, s->dim);
|
@@ -269,7 +331,7 @@ VALUE nm_dense_each(VALUE nmatrix) {
|
|
269
331
|
volatile VALUE nm = nmatrix; // Not sure this actually does anything.
|
270
332
|
DENSE_STORAGE* s = NM_STORAGE_DENSE(nm);
|
271
333
|
|
272
|
-
RETURN_SIZED_ENUMERATOR(nm, 0, 0,
|
334
|
+
RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_enumerator_length);
|
273
335
|
|
274
336
|
size_t* temp_coords = ALLOCA_N(size_t, s->dim);
|
275
337
|
size_t sliced_index;
|
@@ -280,10 +342,11 @@ VALUE nm_dense_each(VALUE nmatrix) {
|
|
280
342
|
if (NM_DTYPE(nm) == nm::RUBYOBJ) {
|
281
343
|
|
282
344
|
// matrix of Ruby objects -- yield those objects directly
|
283
|
-
for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i)
|
345
|
+
for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i) {
|
284
346
|
nm_dense_storage_coords(sliced_dummy, i, temp_coords);
|
285
347
|
sliced_index = nm_dense_storage_pos(s, temp_coords);
|
286
348
|
rb_yield( reinterpret_cast<VALUE*>(s->elements)[sliced_index] );
|
349
|
+
}
|
287
350
|
|
288
351
|
} else {
|
289
352
|
|
@@ -300,7 +363,7 @@ VALUE nm_dense_each(VALUE nmatrix) {
|
|
300
363
|
nm_dense_storage_delete(sliced_dummy);
|
301
364
|
|
302
365
|
return nmatrix;
|
303
|
-
|
366
|
+
|
304
367
|
}
|
305
368
|
|
306
369
|
|
@@ -321,13 +384,13 @@ void* nm_dense_storage_get(STORAGE* storage, SLICE* slice) {
|
|
321
384
|
shape[i] = slice->lengths[i];
|
322
385
|
}
|
323
386
|
|
324
|
-
ns = nm_dense_storage_create(s->dtype, shape, s->dim, NULL, 0);
|
387
|
+
ns = nm_dense_storage_create(s->dtype, shape, s->dim, NULL, 0);
|
325
388
|
|
326
|
-
slice_copy(ns,
|
327
|
-
reinterpret_cast<const DENSE_STORAGE*>(s->src),
|
328
|
-
slice->lengths,
|
329
|
-
0,
|
330
|
-
nm_dense_storage_pos(s, slice->coords),
|
389
|
+
slice_copy(ns,
|
390
|
+
reinterpret_cast<const DENSE_STORAGE*>(s->src),
|
391
|
+
slice->lengths,
|
392
|
+
0,
|
393
|
+
nm_dense_storage_pos(s, slice->coords),
|
331
394
|
0);
|
332
395
|
return ns;
|
333
396
|
}
|
@@ -343,7 +406,7 @@ void* nm_dense_storage_ref(STORAGE* storage, SLICE* slice) {
|
|
343
406
|
|
344
407
|
if (slice->single)
|
345
408
|
return (char*)(s->elements) + nm_dense_storage_pos(s, slice->coords) * DTYPE_SIZES[s->dtype];
|
346
|
-
|
409
|
+
|
347
410
|
else {
|
348
411
|
DENSE_STORAGE* ns = ALLOC( DENSE_STORAGE );
|
349
412
|
ns->dim = s->dim;
|
@@ -358,7 +421,7 @@ void* nm_dense_storage_ref(STORAGE* storage, SLICE* slice) {
|
|
358
421
|
|
359
422
|
ns->stride = s->stride;
|
360
423
|
ns->elements = s->elements;
|
361
|
-
|
424
|
+
|
362
425
|
s->src->count++;
|
363
426
|
ns->src = s->src;
|
364
427
|
|
@@ -387,8 +450,13 @@ void nm_dense_storage_set(STORAGE* storage, SLICE* slice, void* val) {
|
|
387
450
|
* have the same dtype.
|
388
451
|
*/
|
389
452
|
bool nm_dense_storage_eqeq(const STORAGE* left, const STORAGE* right) {
|
390
|
-
LR_DTYPE_TEMPLATE_TABLE(nm::dense_storage::eqeq, bool, const DENSE_STORAGE*, const DENSE_STORAGE*)
|
391
|
-
|
453
|
+
LR_DTYPE_TEMPLATE_TABLE(nm::dense_storage::eqeq, bool, const DENSE_STORAGE*, const DENSE_STORAGE*)
|
454
|
+
|
455
|
+
if (!ttable[left->dtype][right->dtype]) {
|
456
|
+
rb_raise(nm_eDataTypeError, "comparison between these dtypes is undefined");
|
457
|
+
return false;
|
458
|
+
}
|
459
|
+
|
392
460
|
return ttable[left->dtype][right->dtype]((const DENSE_STORAGE*)left, (const DENSE_STORAGE*)right);
|
393
461
|
}
|
394
462
|
|
@@ -399,10 +467,10 @@ bool nm_dense_storage_eqeq(const STORAGE* left, const STORAGE* right) {
|
|
399
467
|
bool nm_dense_storage_is_hermitian(const DENSE_STORAGE* mat, int lda) {
|
400
468
|
if (mat->dtype == nm::COMPLEX64) {
|
401
469
|
return nm::dense_storage::is_hermitian<nm::Complex64>(mat, lda);
|
402
|
-
|
470
|
+
|
403
471
|
} else if (mat->dtype == nm::COMPLEX128) {
|
404
472
|
return nm::dense_storage::is_hermitian<nm::Complex128>(mat, lda);
|
405
|
-
|
473
|
+
|
406
474
|
} else {
|
407
475
|
return nm_dense_storage_is_symmetric(mat, lda);
|
408
476
|
}
|
@@ -413,7 +481,7 @@ bool nm_dense_storage_is_hermitian(const DENSE_STORAGE* mat, int lda) {
|
|
413
481
|
*/
|
414
482
|
bool nm_dense_storage_is_symmetric(const DENSE_STORAGE* mat, int lda) {
|
415
483
|
DTYPE_TEMPLATE_TABLE(nm::dense_storage::is_symmetric, bool, const DENSE_STORAGE*, int);
|
416
|
-
|
484
|
+
|
417
485
|
return ttable[mat->dtype](mat, lda);
|
418
486
|
}
|
419
487
|
|
@@ -421,24 +489,6 @@ bool nm_dense_storage_is_symmetric(const DENSE_STORAGE* mat, int lda) {
|
|
421
489
|
// Math //
|
422
490
|
//////////
|
423
491
|
|
424
|
-
/*
|
425
|
-
* Dense matrix-matrix and matrix-scalar element-wise operations.
|
426
|
-
*
|
427
|
-
* right or rscalar should be NULL; they should not both be initialized. If right is NULL, it'll use the scalar value instead.
|
428
|
-
*/
|
429
|
-
STORAGE* nm_dense_storage_ew_op(nm::ewop_t op, const STORAGE* left, const STORAGE* right, VALUE scalar) {
|
430
|
-
OP_LR_DTYPE_TEMPLATE_TABLE(nm::dense_storage::ew_op, DENSE_STORAGE*, const DENSE_STORAGE* left, const DENSE_STORAGE* right, const void*);
|
431
|
-
|
432
|
-
if (right)
|
433
|
-
return ttable[op][left->dtype][right->dtype](reinterpret_cast<const DENSE_STORAGE*>(left), reinterpret_cast<const DENSE_STORAGE*>(right), NULL);
|
434
|
-
else {
|
435
|
-
nm::dtype_t r_dtype = nm_dtype_guess(scalar);
|
436
|
-
void* r_scalar = ALLOCA_N(char, DTYPE_SIZES[r_dtype]);
|
437
|
-
rubyval_to_cval(scalar, r_dtype, r_scalar);
|
438
|
-
|
439
|
-
return ttable[op][left->dtype][r_dtype](reinterpret_cast<const DENSE_STORAGE*>(left), NULL, r_scalar);
|
440
|
-
}
|
441
|
-
}
|
442
492
|
|
443
493
|
/*
|
444
494
|
* Dense matrix-matrix multiplication.
|
@@ -468,9 +518,9 @@ size_t nm_dense_storage_pos(const DENSE_STORAGE* s, const size_t* coords) {
|
|
468
518
|
}
|
469
519
|
|
470
520
|
/*
|
471
|
-
* Determine the a set of slice coordinates from linear array position (in elements
|
521
|
+
* Determine the a set of slice coordinates from linear array position (in elements
|
472
522
|
* of s) of some set of coordinates (given by slice). (Inverse of
|
473
|
-
* nm_dense_storage_pos).
|
523
|
+
* nm_dense_storage_pos).
|
474
524
|
*
|
475
525
|
* The parameter coords_out should be a pre-allocated array of size equal to s->dim.
|
476
526
|
*/
|
@@ -510,7 +560,7 @@ static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* le
|
|
510
560
|
for (size_t i = 0; i < lengths[n]; ++i) {
|
511
561
|
slice_copy(dest, src, lengths,
|
512
562
|
pdest + dest->stride[n]*i,
|
513
|
-
psrc + src->stride[n]*i,
|
563
|
+
psrc + src->stride[n]*i,
|
514
564
|
n + 1);
|
515
565
|
}
|
516
566
|
} else {
|
@@ -528,9 +578,14 @@ static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* le
|
|
528
578
|
/*
|
529
579
|
* Copy dense storage, changing dtype if necessary.
|
530
580
|
*/
|
531
|
-
STORAGE* nm_dense_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype) {
|
581
|
+
STORAGE* nm_dense_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype, void* dummy) {
|
532
582
|
NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::cast_copy, DENSE_STORAGE*, const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);
|
533
583
|
|
584
|
+
if (!ttable[new_dtype][rhs->dtype]) {
|
585
|
+
rb_raise(nm_eDataTypeError, "cast between these dtypes is undefined");
|
586
|
+
return NULL;
|
587
|
+
}
|
588
|
+
|
534
589
|
return (STORAGE*)ttable[new_dtype][rhs->dtype]((DENSE_STORAGE*)rhs, new_dtype);
|
535
590
|
}
|
536
591
|
|
@@ -538,7 +593,7 @@ STORAGE* nm_dense_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype) {
|
|
538
593
|
* Copy dense storage without a change in dtype.
|
539
594
|
*/
|
540
595
|
DENSE_STORAGE* nm_dense_storage_copy(const DENSE_STORAGE* rhs) {
|
541
|
-
size_t count = 0;
|
596
|
+
size_t count = 0;
|
542
597
|
size_t *shape = ALLOC_N(size_t, rhs->dim);
|
543
598
|
|
544
599
|
// copy shape and offset
|
@@ -593,6 +648,10 @@ STORAGE* nm_dense_storage_copy_transposed(const STORAGE* rhs_base) {
|
|
593
648
|
nm_math_transpose_generic(rhs->shape[0], rhs->shape[1], rhs->elements, rhs->shape[1], lhs->elements, lhs->shape[1], DTYPE_SIZES[rhs->dtype]);
|
594
649
|
} else {
|
595
650
|
NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::ref_slice_copy_transposed, void, const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs);
|
651
|
+
|
652
|
+
if (!ttable[lhs->dtype][rhs->dtype])
|
653
|
+
rb_raise(nm_eDataTypeError, "transposition between these dtypes is undefined");
|
654
|
+
|
596
655
|
ttable[lhs->dtype][rhs->dtype](rhs, lhs);
|
597
656
|
}
|
598
657
|
|
@@ -640,8 +699,7 @@ DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, dtype_t new_dtype) {
|
|
640
699
|
|
641
700
|
// Ensure that allocation worked before copying.
|
642
701
|
if (lhs && count) {
|
643
|
-
if (rhs->src != rhs) {
|
644
|
-
/* Make a copy of a ref to a matrix. */
|
702
|
+
if (rhs->src != rhs) { // Make a copy of a ref to a matrix.
|
645
703
|
|
646
704
|
DENSE_STORAGE* tmp = nm_dense_storage_copy(rhs);
|
647
705
|
|
@@ -650,13 +708,12 @@ DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, dtype_t new_dtype) {
|
|
650
708
|
lhs_els[count] = tmp_els[count];
|
651
709
|
}
|
652
710
|
nm_dense_storage_delete(tmp);
|
653
|
-
} else {
|
654
|
-
/* Make a regular copy. */
|
655
711
|
|
712
|
+
} else { // Make a regular copy.
|
656
713
|
while (count-- > 0) lhs_els[count] = rhs_els[count];
|
657
714
|
}
|
658
715
|
}
|
659
|
-
|
716
|
+
|
660
717
|
return lhs;
|
661
718
|
}
|
662
719
|
|
@@ -673,7 +730,7 @@ bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right) {
|
|
673
730
|
LDType* left_elements = (LDType*)left->elements;
|
674
731
|
RDType* right_elements = (RDType*)right->elements;
|
675
732
|
|
676
|
-
// Copy elements in temp matrix if you have
|
733
|
+
// Copy elements in temp matrix if you have reference to the right.
|
677
734
|
if (left->src != left) {
|
678
735
|
tmp1 = nm_dense_storage_copy(left);
|
679
736
|
left_elements = (LDType*)tmp1->elements;
|
@@ -682,7 +739,7 @@ bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right) {
|
|
682
739
|
tmp2 = nm_dense_storage_copy(right);
|
683
740
|
right_elements = (RDType*)tmp2->elements;
|
684
741
|
}
|
685
|
-
|
742
|
+
|
686
743
|
|
687
744
|
|
688
745
|
for (index = nm_storage_count_max_elements(left); index-- > 0;) {
|
@@ -704,20 +761,20 @@ template <typename DType>
|
|
704
761
|
bool is_hermitian(const DENSE_STORAGE* mat, int lda) {
|
705
762
|
unsigned int i, j;
|
706
763
|
register DType complex_conj;
|
707
|
-
|
764
|
+
|
708
765
|
const DType* els = (DType*) mat->elements;
|
709
|
-
|
766
|
+
|
710
767
|
for (i = mat->shape[0]; i-- > 0;) {
|
711
768
|
for (j = i + 1; j < mat->shape[1]; ++j) {
|
712
769
|
complex_conj = els[j*lda + 1];
|
713
770
|
complex_conj.i = -complex_conj.i;
|
714
|
-
|
771
|
+
|
715
772
|
if (els[i*lda+j] != complex_conj) {
|
716
773
|
return false;
|
717
774
|
}
|
718
775
|
}
|
719
776
|
}
|
720
|
-
|
777
|
+
|
721
778
|
return true;
|
722
779
|
}
|
723
780
|
|
@@ -725,7 +782,7 @@ template <typename DType>
|
|
725
782
|
bool is_symmetric(const DENSE_STORAGE* mat, int lda) {
|
726
783
|
unsigned int i, j;
|
727
784
|
const DType* els = (DType*) mat->elements;
|
728
|
-
|
785
|
+
|
729
786
|
for (i = mat->shape[0]; i-- > 0;) {
|
730
787
|
for (j = i + 1; j < mat->shape[1]; ++j) {
|
731
788
|
if (els[i*lda+j] != els[j*lda+i]) {
|
@@ -733,137 +790,10 @@ bool is_symmetric(const DENSE_STORAGE* mat, int lda) {
|
|
733
790
|
}
|
734
791
|
}
|
735
792
|
}
|
736
|
-
|
793
|
+
|
737
794
|
return true;
|
738
795
|
}
|
739
796
|
|
740
|
-
/*
|
741
|
-
* Templated dense storage element-wise operations which return the same DType.
|
742
|
-
*/
|
743
|
-
template <ewop_t op, typename LDType, typename RDType>
|
744
|
-
static DENSE_STORAGE* ew_op(const DENSE_STORAGE* left, const DENSE_STORAGE* right, const void* rscalar) {
|
745
|
-
unsigned int count;
|
746
|
-
size_t l_count;
|
747
|
-
size_t r_count;
|
748
|
-
|
749
|
-
size_t* temp_coords = ALLOCA_N(size_t, left->dim);
|
750
|
-
|
751
|
-
size_t* new_shape = ALLOC_N(size_t, left->dim);
|
752
|
-
memcpy(new_shape, left->shape, sizeof(size_t) * left->dim);
|
753
|
-
|
754
|
-
// Determine the return dtype. This depends on the type of operation we're doing. Usually, it's going to be
|
755
|
-
// set by the left matrix, but for comparisons, we'll use BYTE (in lieu of boolean).
|
756
|
-
dtype_t new_dtype = static_cast<uint8_t>(op) < NUM_NONCOMP_EWOPS ? left->dtype : BYTE;
|
757
|
-
|
758
|
-
DENSE_STORAGE* result = nm_dense_storage_create(new_dtype, new_shape, left->dim, NULL, 0);
|
759
|
-
|
760
|
-
LDType* l_elems = reinterpret_cast<LDType*>(left->elements);
|
761
|
-
|
762
|
-
if (right) { // matrix-matrix operation
|
763
|
-
RDType* r_elems = reinterpret_cast<RDType*>(right->elements);
|
764
|
-
|
765
|
-
if (static_cast<uint8_t>(op) < NUM_NONCOMP_EWOPS) { // use left-dtype
|
766
|
-
|
767
|
-
for (count = nm_storage_count_max_elements(result); count-- > 0;) {
|
768
|
-
nm_dense_storage_coords(result, count, temp_coords);
|
769
|
-
l_count = nm_dense_storage_pos(left, temp_coords);
|
770
|
-
r_count = nm_dense_storage_pos(right, temp_coords);
|
771
|
-
|
772
|
-
reinterpret_cast<LDType*>(result->elements)[count] = ew_op_switch<op,LDType,RDType>(l_elems[l_count], r_elems[r_count]);
|
773
|
-
}
|
774
|
-
|
775
|
-
} else { // new_dtype is BYTE: comparison operators
|
776
|
-
uint8_t* res_elems = reinterpret_cast<uint8_t*>(result->elements);
|
777
|
-
|
778
|
-
for (count = nm_storage_count_max_elements(result); count-- > 0;) {
|
779
|
-
nm_dense_storage_coords(result, count, temp_coords);
|
780
|
-
l_count = nm_dense_storage_pos(left, temp_coords);
|
781
|
-
r_count = nm_dense_storage_pos(right, temp_coords);
|
782
|
-
|
783
|
-
switch (op) {
|
784
|
-
case EW_EQEQ:
|
785
|
-
res_elems[count] = l_elems[l_count] == r_elems[r_count];
|
786
|
-
break;
|
787
|
-
|
788
|
-
case EW_NEQ:
|
789
|
-
res_elems[count] = l_elems[l_count] != r_elems[r_count];
|
790
|
-
break;
|
791
|
-
|
792
|
-
case EW_LT:
|
793
|
-
res_elems[count] = l_elems[l_count] < r_elems[r_count];
|
794
|
-
break;
|
795
|
-
|
796
|
-
case EW_GT:
|
797
|
-
res_elems[count] = l_elems[l_count] > r_elems[r_count];
|
798
|
-
break;
|
799
|
-
|
800
|
-
case EW_LEQ:
|
801
|
-
res_elems[count] = l_elems[l_count] <= r_elems[r_count];
|
802
|
-
break;
|
803
|
-
|
804
|
-
case EW_GEQ:
|
805
|
-
res_elems[count] = l_elems[l_count] >= r_elems[r_count];
|
806
|
-
break;
|
807
|
-
|
808
|
-
default:
|
809
|
-
rb_raise(rb_eStandardError, "this should not happen");
|
810
|
-
}
|
811
|
-
}
|
812
|
-
}
|
813
|
-
|
814
|
-
} else { // matrix-scalar operation
|
815
|
-
const RDType* r_elem = reinterpret_cast<const RDType*>(rscalar);
|
816
|
-
|
817
|
-
if (static_cast<uint8_t>(op) < NUM_NONCOMP_EWOPS) { // use left-dtype
|
818
|
-
|
819
|
-
for (count = nm_storage_count_max_elements(result); count-- > 0;) {
|
820
|
-
nm_dense_storage_coords(result, count, temp_coords);
|
821
|
-
l_count = nm_dense_storage_pos(left, temp_coords);
|
822
|
-
|
823
|
-
reinterpret_cast<LDType*>(result->elements)[count] = ew_op_switch<op,LDType,RDType>(l_elems[l_count], *r_elem);
|
824
|
-
}
|
825
|
-
|
826
|
-
} else {
|
827
|
-
uint8_t* res_elems = reinterpret_cast<uint8_t*>(result->elements);
|
828
|
-
|
829
|
-
for (count = nm_storage_count_max_elements(result); count-- > 0;) {
|
830
|
-
nm_dense_storage_coords(result, count, temp_coords);
|
831
|
-
l_count = nm_dense_storage_pos(left, temp_coords);
|
832
|
-
|
833
|
-
switch (op) {
|
834
|
-
case EW_EQEQ:
|
835
|
-
res_elems[count] = l_elems[l_count] == *r_elem;
|
836
|
-
break;
|
837
|
-
|
838
|
-
case EW_NEQ:
|
839
|
-
res_elems[count] = l_elems[l_count] != *r_elem;
|
840
|
-
break;
|
841
|
-
|
842
|
-
case EW_LT:
|
843
|
-
res_elems[count] = l_elems[l_count] < *r_elem;
|
844
|
-
break;
|
845
|
-
|
846
|
-
case EW_GT:
|
847
|
-
res_elems[count] = l_elems[l_count] > *r_elem;
|
848
|
-
break;
|
849
|
-
|
850
|
-
case EW_LEQ:
|
851
|
-
res_elems[count] = l_elems[l_count] <= *r_elem;
|
852
|
-
break;
|
853
|
-
|
854
|
-
case EW_GEQ:
|
855
|
-
res_elems[count] = l_elems[l_count] >= *r_elem;
|
856
|
-
break;
|
857
|
-
|
858
|
-
default:
|
859
|
-
rb_raise(rb_eStandardError, "this should not happen");
|
860
|
-
}
|
861
|
-
}
|
862
|
-
|
863
|
-
}
|
864
|
-
}
|
865
|
-
return result;
|
866
|
-
}
|
867
797
|
|
868
798
|
|
869
799
|
/*
|