nmatrix 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/CONTRIBUTING.md +66 -0
- data/Gemfile +1 -1
- data/History.txt +68 -10
- data/LICENSE.txt +2 -2
- data/Manifest.txt +2 -0
- data/README.rdoc +90 -69
- data/Rakefile +18 -9
- data/ext/nmatrix/data/complex.h +7 -7
- data/ext/nmatrix/data/data.cpp +2 -7
- data/ext/nmatrix/data/data.h +7 -4
- data/ext/nmatrix/data/rational.h +2 -2
- data/ext/nmatrix/data/ruby_object.h +3 -10
- data/ext/nmatrix/extconf.rb +79 -54
- data/ext/nmatrix/new_extconf.rb +11 -12
- data/ext/nmatrix/nmatrix.cpp +94 -125
- data/ext/nmatrix/nmatrix.h +38 -17
- data/ext/nmatrix/ruby_constants.cpp +2 -15
- data/ext/nmatrix/ruby_constants.h +2 -14
- data/ext/nmatrix/storage/common.cpp +2 -2
- data/ext/nmatrix/storage/common.h +2 -2
- data/ext/nmatrix/storage/dense.cpp +206 -31
- data/ext/nmatrix/storage/dense.h +5 -2
- data/ext/nmatrix/storage/list.cpp +52 -4
- data/ext/nmatrix/storage/list.h +3 -2
- data/ext/nmatrix/storage/storage.cpp +6 -6
- data/ext/nmatrix/storage/storage.h +2 -2
- data/ext/nmatrix/storage/yale.cpp +202 -49
- data/ext/nmatrix/storage/yale.h +5 -4
- data/ext/nmatrix/ttable_helper.rb +108 -108
- data/ext/nmatrix/types.h +2 -15
- data/ext/nmatrix/util/io.cpp +2 -2
- data/ext/nmatrix/util/io.h +2 -2
- data/ext/nmatrix/util/lapack.h +2 -2
- data/ext/nmatrix/util/math.cpp +14 -14
- data/ext/nmatrix/util/math.h +2 -2
- data/ext/nmatrix/util/sl_list.cpp +2 -2
- data/ext/nmatrix/util/sl_list.h +2 -2
- data/ext/nmatrix/util/util.h +2 -2
- data/lib/nmatrix.rb +13 -35
- data/lib/nmatrix/blas.rb +182 -56
- data/lib/nmatrix/io/market.rb +38 -14
- data/lib/nmatrix/io/mat5_reader.rb +393 -278
- data/lib/nmatrix/io/mat_reader.rb +121 -107
- data/lib/nmatrix/lapack.rb +59 -14
- data/lib/nmatrix/monkeys.rb +32 -30
- data/lib/nmatrix/nmatrix.rb +204 -100
- data/lib/nmatrix/nvector.rb +166 -57
- data/lib/nmatrix/shortcuts.rb +364 -231
- data/lib/nmatrix/version.rb +8 -4
- data/nmatrix.gemspec +5 -3
- data/scripts/mac-brew-gcc.sh +1 -1
- data/spec/blas_spec.rb +80 -2
- data/spec/math_spec.rb +78 -32
- data/spec/nmatrix_list_spec.rb +55 -55
- data/spec/nmatrix_spec.rb +60 -117
- data/spec/nmatrix_yale_resize_test_associations.yaml +2802 -0
- data/spec/nmatrix_yale_spec.rb +214 -198
- data/spec/nvector_spec.rb +58 -2
- data/spec/shortcuts_spec.rb +156 -32
- data/spec/slice_spec.rb +229 -178
- data/spec/spec_helper.rb +2 -2
- metadata +71 -21
data/ext/nmatrix/nmatrix.h
CHANGED
@@ -9,8 +9,8 @@
|
|
9
9
|
//
|
10
10
|
// == Copyright Information
|
11
11
|
//
|
12
|
-
// SciRuby is Copyright (c) 2010 -
|
13
|
-
// NMatrix is Copyright (c)
|
12
|
+
// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
|
13
|
+
// NMatrix is Copyright (c) 2013, Ruby Science Foundation
|
14
14
|
//
|
15
15
|
// Please see LICENSE.txt for additional copyright notices.
|
16
16
|
//
|
@@ -42,8 +42,6 @@
|
|
42
42
|
#include <string.h>
|
43
43
|
#endif
|
44
44
|
|
45
|
-
|
46
|
-
|
47
45
|
#ifdef BENCHMARK
|
48
46
|
// SOURCE: http://stackoverflow.com/questions/2349776/how-can-i-benchmark-a-c-program-easily
|
49
47
|
#ifdef __cplusplus
|
@@ -55,16 +53,10 @@
|
|
55
53
|
#endif
|
56
54
|
#endif
|
57
55
|
|
58
|
-
/*
|
59
|
-
* Project Includes
|
60
|
-
*/
|
61
|
-
|
62
|
-
|
63
56
|
/*
|
64
57
|
* Macros
|
65
58
|
*/
|
66
59
|
|
67
|
-
|
68
60
|
#define RUBY_ZERO INT2FIX(0)
|
69
61
|
|
70
62
|
#ifndef SIZEOF_INT
|
@@ -112,6 +104,33 @@
|
|
112
104
|
|
113
105
|
#ifdef __cplusplus /* These are the C++ versions of the macros. */
|
114
106
|
|
107
|
+
/*
|
108
|
+
* If no block is given, return an enumerator. This copied straight out of ruby's include/ruby/intern.h.
|
109
|
+
*
|
110
|
+
* rb_enumeratorize is located in enumerator.c.
|
111
|
+
*
|
112
|
+
* VALUE rb_enumeratorize(VALUE obj, VALUE meth, int argc, VALUE *argv) {
|
113
|
+
* return enumerator_init(enumerator_allocate(rb_cEnumerator), obj, meth, argc, argv);
|
114
|
+
* }
|
115
|
+
*/
|
116
|
+
#ifdef RUBY_2
|
117
|
+
#ifndef RETURN_SIZED_ENUMERATOR
|
118
|
+
#undef RETURN_SIZED_ENUMERATOR
|
119
|
+
// Ruby 2.0 and higher has rb_enumeratorize_with_size instead of rb_enumeratorize.
|
120
|
+
// We want to support both in the simplest way possible.
|
121
|
+
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn) do { \
|
122
|
+
if (!rb_block_given_p()) \
|
123
|
+
return rb_enumeratorize_with_size((obj), ID2SYM(rb_frame_this_func()), (argc), (argv), (size_fn)); \
|
124
|
+
} while (0)
|
125
|
+
#endif
|
126
|
+
#else
|
127
|
+
#undef RETURN_SIZED_ENUMERATOR
|
128
|
+
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn) do { \
|
129
|
+
if (!rb_block_given_p()) \
|
130
|
+
return rb_enumeratorize((obj), ID2SYM(rb_frame_this_func()), (argc), (argv)); \
|
131
|
+
} while (0)
|
132
|
+
#endif
|
133
|
+
|
115
134
|
#define NM_DECL_ENUM(enum_type, name) nm::enum_type name
|
116
135
|
#define NM_DECL_STRUCT(type, name) type name;
|
117
136
|
|
@@ -144,7 +163,7 @@
|
|
144
163
|
#else /* These are the C versions of the macros. */
|
145
164
|
|
146
165
|
#define NM_DECL_ENUM(enum_type, name) nm_ ## enum_type name
|
147
|
-
#define NM_DECL_STRUCT(type, name) NM_ ## type name;
|
166
|
+
#define NM_DECL_STRUCT(type, name) struct NM_ ## type name;
|
148
167
|
|
149
168
|
#define NM_DEF_STORAGE_ELEMENTS \
|
150
169
|
NM_DECL_ENUM(dtype_t, dtype); \
|
@@ -313,28 +332,30 @@ NM_DEF_STRUCT_POST(NMATRIX); // };
|
|
313
332
|
(rb_obj_is_kind_of(obj, cNVector) == Qtrue)
|
314
333
|
|
315
334
|
|
335
|
+
#ifdef __cplusplus
|
316
336
|
typedef VALUE (*METHOD)(...);
|
317
337
|
|
318
|
-
#ifdef __cplusplus
|
319
338
|
//}; // end of namespace nm
|
320
339
|
#endif
|
321
340
|
|
322
|
-
/*
|
323
|
-
* Data
|
324
|
-
*/
|
325
|
-
|
326
341
|
/*
|
327
342
|
* Functions
|
328
343
|
*/
|
329
344
|
|
345
|
+
#ifdef __cplusplus
|
330
346
|
extern "C" {
|
347
|
+
#endif
|
348
|
+
|
331
349
|
void Init_nmatrix();
|
332
|
-
|
350
|
+
|
333
351
|
// External API
|
334
352
|
VALUE rb_nmatrix_dense_create(NM_DECL_ENUM(dtype_t, dtype), size_t* shape, size_t dim, void* elements, size_t length);
|
335
353
|
VALUE rb_nvector_dense_create(NM_DECL_ENUM(dtype_t, dtype), void* elements, size_t length);
|
336
354
|
|
337
355
|
NM_DECL_ENUM(dtype_t, nm_dtype_guess(VALUE)); // (This is a function)
|
356
|
+
|
357
|
+
#ifdef __cplusplus
|
338
358
|
}
|
359
|
+
#endif
|
339
360
|
|
340
361
|
#endif // NMATRIX_H
|
@@ -9,8 +9,8 @@
|
|
9
9
|
//
|
10
10
|
// == Copyright Information
|
11
11
|
//
|
12
|
-
// SciRuby is Copyright (c) 2010 -
|
13
|
-
// NMatrix is Copyright (c)
|
12
|
+
// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
|
13
|
+
// NMatrix is Copyright (c) 2013, Ruby Science Foundation
|
14
14
|
//
|
15
15
|
// Please see LICENSE.txt for additional copyright notices.
|
16
16
|
//
|
@@ -31,14 +31,6 @@
|
|
31
31
|
|
32
32
|
#include <ruby.h>
|
33
33
|
|
34
|
-
/*
|
35
|
-
* Project Includes
|
36
|
-
*/
|
37
|
-
|
38
|
-
/*
|
39
|
-
* Macros
|
40
|
-
*/
|
41
|
-
|
42
34
|
/*
|
43
35
|
* Global Variables
|
44
36
|
*/
|
@@ -89,10 +81,6 @@ VALUE cNMatrix,
|
|
89
81
|
nm_eDataTypeError,
|
90
82
|
nm_eStorageTypeError;
|
91
83
|
|
92
|
-
/*
|
93
|
-
* Forward Declarations
|
94
|
-
*/
|
95
|
-
|
96
84
|
/*
|
97
85
|
* Functions
|
98
86
|
*/
|
@@ -135,4 +123,3 @@ void nm_init_ruby_constants(void) {
|
|
135
123
|
nm_rb_unit = rb_intern("unit");
|
136
124
|
nm_rb_nonunit = rb_intern("nonunit");
|
137
125
|
}
|
138
|
-
|
@@ -9,8 +9,8 @@
|
|
9
9
|
//
|
10
10
|
// == Copyright Information
|
11
11
|
//
|
12
|
-
// SciRuby is Copyright (c) 2010 -
|
13
|
-
// NMatrix is Copyright (c)
|
12
|
+
// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
|
13
|
+
// NMatrix is Copyright (c) 2013, Ruby Science Foundation
|
14
14
|
//
|
15
15
|
// Please see LICENSE.txt for additional copyright notices.
|
16
16
|
//
|
@@ -34,18 +34,6 @@
|
|
34
34
|
|
35
35
|
#include <ruby.h>
|
36
36
|
|
37
|
-
/*
|
38
|
-
* Project Includes
|
39
|
-
*/
|
40
|
-
|
41
|
-
/*
|
42
|
-
* Macros
|
43
|
-
*/
|
44
|
-
|
45
|
-
/*
|
46
|
-
* Types
|
47
|
-
*/
|
48
|
-
|
49
37
|
/*
|
50
38
|
* Data
|
51
39
|
*/
|
@@ -9,8 +9,8 @@
|
|
9
9
|
//
|
10
10
|
// == Copyright Information
|
11
11
|
//
|
12
|
-
// SciRuby is Copyright (c) 2010 -
|
13
|
-
// NMatrix is Copyright (c)
|
12
|
+
// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
|
13
|
+
// NMatrix is Copyright (c) 2013, Ruby Science Foundation
|
14
14
|
//
|
15
15
|
// Please see LICENSE.txt for additional copyright notices.
|
16
16
|
//
|
@@ -9,8 +9,8 @@
|
|
9
9
|
//
|
10
10
|
// == Copyright Information
|
11
11
|
//
|
12
|
-
// SciRuby is Copyright (c) 2010 -
|
13
|
-
// NMatrix is Copyright (c)
|
12
|
+
// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
|
13
|
+
// NMatrix is Copyright (c) 2013, Ruby Science Foundation
|
14
14
|
//
|
15
15
|
// Please see LICENSE.txt for additional copyright notices.
|
16
16
|
//
|
@@ -9,8 +9,8 @@
|
|
9
9
|
//
|
10
10
|
// == Copyright Information
|
11
11
|
//
|
12
|
-
// SciRuby is Copyright (c) 2010 -
|
13
|
-
// NMatrix is Copyright (c)
|
12
|
+
// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
|
13
|
+
// NMatrix is Copyright (c) 2013, Ruby Science Foundation
|
14
14
|
//
|
15
15
|
// Please see LICENSE.txt for additional copyright notices.
|
16
16
|
//
|
@@ -55,6 +55,9 @@
|
|
55
55
|
|
56
56
|
namespace nm { namespace dense_storage {
|
57
57
|
|
58
|
+
template<typename LDType, typename RDType>
|
59
|
+
void ref_slice_copy_transposed(const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs);
|
60
|
+
|
58
61
|
template <typename LDType, typename RDType>
|
59
62
|
DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);
|
60
63
|
|
@@ -89,13 +92,14 @@ static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* le
|
|
89
92
|
// Lifecycle //
|
90
93
|
///////////////
|
91
94
|
|
95
|
+
|
92
96
|
/*
|
93
|
-
*
|
94
|
-
*
|
95
|
-
* will be
|
96
|
-
*
|
97
|
+
* This creates a dummy with all the properties of dense storage, but no actual elements allocation.
|
98
|
+
*
|
99
|
+
* elements will be NULL when this function finishes. You can clean up with nm_dense_storage_delete, which will
|
100
|
+
* check for that NULL pointer before freeing elements.
|
97
101
|
*/
|
98
|
-
DENSE_STORAGE*
|
102
|
+
static DENSE_STORAGE* nm_dense_storage_create_dummy(nm::dtype_t dtype, size_t* shape, size_t dim) {
|
99
103
|
DENSE_STORAGE* s = ALLOC( DENSE_STORAGE );
|
100
104
|
|
101
105
|
s->dim = dim;
|
@@ -108,12 +112,27 @@ DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t
|
|
108
112
|
s->stride = stride(shape, dim);
|
109
113
|
s->count = 1;
|
110
114
|
s->src = s;
|
111
|
-
|
112
|
-
|
115
|
+
|
116
|
+
s->elements = NULL;
|
117
|
+
|
118
|
+
return s;
|
119
|
+
}
|
120
|
+
|
121
|
+
|
122
|
+
/*
|
123
|
+
* Note that elements and elements_length are for initial value(s) passed in.
|
124
|
+
* If they are the correct length, they will be used directly. If not, they
|
125
|
+
* will be concatenated over and over again into a new elements array. If
|
126
|
+
* elements is NULL, the new elements array will not be initialized.
|
127
|
+
*/
|
128
|
+
DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t dim, void* elements, size_t elements_length) {
|
129
|
+
|
130
|
+
DENSE_STORAGE* s = nm_dense_storage_create_dummy(dtype, shape, dim);
|
131
|
+
size_t count = nm_storage_count_max_elements(s);
|
113
132
|
|
114
133
|
if (elements_length == count) {
|
115
134
|
s->elements = elements;
|
116
|
-
|
135
|
+
|
117
136
|
} else {
|
118
137
|
s->elements = ALLOC_N(char, DTYPE_SIZES[dtype]*count);
|
119
138
|
|
@@ -138,8 +157,9 @@ DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t
|
|
138
157
|
return s;
|
139
158
|
}
|
140
159
|
|
160
|
+
|
141
161
|
/*
|
142
|
-
* Destructor for dense storage
|
162
|
+
* Destructor for dense storage. Make sure when you update this you also update nm_dense_storage_delete_dummy.
|
143
163
|
*/
|
144
164
|
void nm_dense_storage_delete(STORAGE* s) {
|
145
165
|
// Sometimes Ruby passes in NULL storage for some reason (probably on copy construction failure).
|
@@ -149,7 +169,8 @@ void nm_dense_storage_delete(STORAGE* s) {
|
|
149
169
|
free(storage->shape);
|
150
170
|
free(storage->offset);
|
151
171
|
free(storage->stride);
|
152
|
-
|
172
|
+
if (storage->elements != NULL) // happens with dummy objects
|
173
|
+
free(storage->elements);
|
153
174
|
free(storage);
|
154
175
|
}
|
155
176
|
}
|
@@ -188,6 +209,101 @@ void nm_dense_storage_mark(void* storage_base) {
|
|
188
209
|
// Accessors //
|
189
210
|
///////////////
|
190
211
|
|
212
|
+
|
213
|
+
// Helper function used only for the RETURN_SIZED_ENUMERATOR macro. Returns the length of
|
214
|
+
// the matrix's storage.
|
215
|
+
static VALUE nm_dense_enumerator_length(VALUE nmatrix) {
|
216
|
+
long len = nm_storage_count_max_elements(NM_STORAGE_DENSE(nmatrix));
|
217
|
+
return LONG2NUM(len);
|
218
|
+
}
|
219
|
+
|
220
|
+
|
221
|
+
VALUE nm_dense_each_with_indices(VALUE nmatrix) {
|
222
|
+
volatile VALUE nm = nmatrix;
|
223
|
+
|
224
|
+
DENSE_STORAGE* s = NM_STORAGE_DENSE(nm);
|
225
|
+
|
226
|
+
RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_dense_enumerator_length); // fourth argument only used by Ruby2+
|
227
|
+
|
228
|
+
// Create indices and initialize them to zero
|
229
|
+
size_t* coords = ALLOCA_N(size_t, s->dim);
|
230
|
+
memset(coords, 0, sizeof(size_t) * s->dim);
|
231
|
+
|
232
|
+
size_t slice_index;
|
233
|
+
size_t* shape_copy = ALLOC_N(size_t, s->dim);
|
234
|
+
memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
|
235
|
+
|
236
|
+
DENSE_STORAGE* sliced_dummy = nm_dense_storage_create_dummy(s->dtype, shape_copy, s->dim);
|
237
|
+
|
238
|
+
for (size_t k = 0; k < nm_storage_count_max_elements(s); ++k) {
|
239
|
+
nm_dense_storage_coords(sliced_dummy, k, coords);
|
240
|
+
slice_index = nm_dense_storage_pos(s, coords);
|
241
|
+
VALUE ary = rb_ary_new();
|
242
|
+
if (NM_DTYPE(nm) == nm::RUBYOBJ) rb_ary_push(ary, reinterpret_cast<VALUE*>(s->elements)[slice_index]);
|
243
|
+
else rb_ary_push(ary, rubyobj_from_cval((char*)(s->elements) + slice_index*DTYPE_SIZES[NM_DTYPE(nm)], NM_DTYPE(nm)).rval);
|
244
|
+
|
245
|
+
for (size_t p = 0; p < s->dim; ++p) {
|
246
|
+
rb_ary_push(ary, INT2FIX(coords[p]));
|
247
|
+
}
|
248
|
+
|
249
|
+
// yield the array which now consists of the value and the indices
|
250
|
+
rb_yield(ary);
|
251
|
+
|
252
|
+
}
|
253
|
+
|
254
|
+
nm_dense_storage_delete(sliced_dummy);
|
255
|
+
|
256
|
+
return Qnil;
|
257
|
+
|
258
|
+
}
|
259
|
+
|
260
|
+
|
261
|
+
/*
|
262
|
+
* Borrowed this function from NArray. Handles 'each' iteration on a dense
|
263
|
+
* matrix.
|
264
|
+
*
|
265
|
+
* Additionally, handles separately matrices containing VALUEs and matrices
|
266
|
+
* containing other types of data.
|
267
|
+
*/
|
268
|
+
VALUE nm_dense_each(VALUE nmatrix) {
|
269
|
+
volatile VALUE nm = nmatrix; // Not sure this actually does anything.
|
270
|
+
DENSE_STORAGE* s = NM_STORAGE_DENSE(nm);
|
271
|
+
|
272
|
+
RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_storage_count_max_elements(s));
|
273
|
+
|
274
|
+
size_t* temp_coords = ALLOCA_N(size_t, s->dim);
|
275
|
+
size_t sliced_index;
|
276
|
+
size_t* shape_copy = ALLOC_N(size_t, s->dim);
|
277
|
+
memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
|
278
|
+
DENSE_STORAGE* sliced_dummy = nm_dense_storage_create_dummy(s->dtype, shape_copy, s->dim);
|
279
|
+
|
280
|
+
if (NM_DTYPE(nm) == nm::RUBYOBJ) {
|
281
|
+
|
282
|
+
// matrix of Ruby objects -- yield those objects directly
|
283
|
+
for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i)
|
284
|
+
nm_dense_storage_coords(sliced_dummy, i, temp_coords);
|
285
|
+
sliced_index = nm_dense_storage_pos(s, temp_coords);
|
286
|
+
rb_yield( reinterpret_cast<VALUE*>(s->elements)[sliced_index] );
|
287
|
+
|
288
|
+
} else {
|
289
|
+
|
290
|
+
// We're going to copy the matrix element into a Ruby VALUE and then operate on it. This way user can't accidentally
|
291
|
+
// modify it and cause a seg fault.
|
292
|
+
for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i) {
|
293
|
+
nm_dense_storage_coords(sliced_dummy, i, temp_coords);
|
294
|
+
sliced_index = nm_dense_storage_pos(s, temp_coords);
|
295
|
+
VALUE v = rubyobj_from_cval((char*)(s->elements) + sliced_index*DTYPE_SIZES[NM_DTYPE(nm)], NM_DTYPE(nm)).rval;
|
296
|
+
rb_yield( v ); // yield to the copy we made
|
297
|
+
}
|
298
|
+
}
|
299
|
+
|
300
|
+
nm_dense_storage_delete(sliced_dummy);
|
301
|
+
|
302
|
+
return Qnil;
|
303
|
+
|
304
|
+
}
|
305
|
+
|
306
|
+
|
191
307
|
/*
|
192
308
|
* Get a slice or one element, using copying.
|
193
309
|
*
|
@@ -348,6 +464,25 @@ size_t nm_dense_storage_pos(const DENSE_STORAGE* s, const size_t* coords) {
|
|
348
464
|
pos += (coords[i] + s->offset[i]) * s->stride[i];
|
349
465
|
|
350
466
|
return pos;
|
467
|
+
|
468
|
+
}
|
469
|
+
|
470
|
+
/*
|
471
|
+
* Determine the a set of slice coordinates from linear array position (in elements
|
472
|
+
* of s) of some set of coordinates (given by slice). (Inverse of
|
473
|
+
* nm_dense_storage_pos).
|
474
|
+
*
|
475
|
+
* The parameter coords_out should be a pre-allocated array of size equal to s->dim.
|
476
|
+
*/
|
477
|
+
void nm_dense_storage_coords(const DENSE_STORAGE* s, const size_t slice_pos, size_t* coords_out) {
|
478
|
+
|
479
|
+
size_t temp_pos = slice_pos;
|
480
|
+
|
481
|
+
for (size_t i = 0; i < s->dim; ++i) {
|
482
|
+
coords_out[i] = (temp_pos - temp_pos % s->stride[i])/s->stride[i] - s->offset[i];
|
483
|
+
temp_pos = temp_pos % s->stride[i];
|
484
|
+
}
|
485
|
+
|
351
486
|
}
|
352
487
|
|
353
488
|
/*
|
@@ -454,7 +589,12 @@ STORAGE* nm_dense_storage_copy_transposed(const STORAGE* rhs_base) {
|
|
454
589
|
lhs->offset[0] = rhs->offset[1];
|
455
590
|
lhs->offset[1] = rhs->offset[0];
|
456
591
|
|
457
|
-
|
592
|
+
if (rhs_base->src == rhs_base) {
|
593
|
+
nm_math_transpose_generic(rhs->shape[0], rhs->shape[1], rhs->elements, rhs->shape[1], lhs->elements, lhs->shape[1], DTYPE_SIZES[rhs->dtype]);
|
594
|
+
} else {
|
595
|
+
NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::ref_slice_copy_transposed, void, const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs);
|
596
|
+
ttable[lhs->dtype][rhs->dtype](rhs, lhs);
|
597
|
+
}
|
458
598
|
|
459
599
|
return (STORAGE*)lhs;
|
460
600
|
}
|
@@ -467,6 +607,25 @@ namespace nm { namespace dense_storage {
|
|
467
607
|
// Templated Functions //
|
468
608
|
/////////////////////////
|
469
609
|
|
610
|
+
template<typename LDType, typename RDType>
|
611
|
+
void ref_slice_copy_transposed(const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs) {
|
612
|
+
|
613
|
+
LDType* lhs_els = reinterpret_cast<LDType*>(lhs->elements);
|
614
|
+
RDType* rhs_els = reinterpret_cast<RDType*>(rhs->elements);
|
615
|
+
|
616
|
+
size_t count = nm_storage_count_max_elements(lhs);
|
617
|
+
size_t* temp_coords = ALLOCA_N(size_t, lhs->dim);
|
618
|
+
size_t coord_swap_temp;
|
619
|
+
|
620
|
+
while (count-- > 0) {
|
621
|
+
nm_dense_storage_coords(lhs, count, temp_coords);
|
622
|
+
NM_SWAP(temp_coords[0], temp_coords[1], coord_swap_temp);
|
623
|
+
size_t r_coord = nm_dense_storage_pos(rhs, temp_coords);
|
624
|
+
lhs_els[count] = rhs_els[r_coord];
|
625
|
+
}
|
626
|
+
|
627
|
+
}
|
628
|
+
|
470
629
|
template <typename LDType, typename RDType>
|
471
630
|
DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, dtype_t new_dtype) {
|
472
631
|
size_t count = nm_storage_count_max_elements(rhs);
|
@@ -584,8 +743,12 @@ bool is_symmetric(const DENSE_STORAGE* mat, int lda) {
|
|
584
743
|
template <ewop_t op, typename LDType, typename RDType>
|
585
744
|
static DENSE_STORAGE* ew_op(const DENSE_STORAGE* left, const DENSE_STORAGE* right, const void* rscalar) {
|
586
745
|
unsigned int count;
|
587
|
-
|
588
|
-
|
746
|
+
size_t l_count;
|
747
|
+
size_t r_count;
|
748
|
+
|
749
|
+
size_t* temp_coords = ALLOCA_N(size_t, left->dim);
|
750
|
+
|
751
|
+
size_t* new_shape = ALLOC_N(size_t, left->dim);
|
589
752
|
memcpy(new_shape, left->shape, sizeof(size_t) * left->dim);
|
590
753
|
|
591
754
|
// Determine the return dtype. This depends on the type of operation we're doing. Usually, it's going to be
|
@@ -602,36 +765,44 @@ static DENSE_STORAGE* ew_op(const DENSE_STORAGE* left, const DENSE_STORAGE* righ
|
|
602
765
|
if (static_cast<uint8_t>(op) < NUM_NONCOMP_EWOPS) { // use left-dtype
|
603
766
|
|
604
767
|
for (count = nm_storage_count_max_elements(result); count-- > 0;) {
|
605
|
-
|
768
|
+
nm_dense_storage_coords(result, count, temp_coords);
|
769
|
+
l_count = nm_dense_storage_pos(left, temp_coords);
|
770
|
+
r_count = nm_dense_storage_pos(right, temp_coords);
|
771
|
+
|
772
|
+
reinterpret_cast<LDType*>(result->elements)[count] = ew_op_switch<op,LDType,RDType>(l_elems[l_count], r_elems[r_count]);
|
606
773
|
}
|
607
774
|
|
608
775
|
} else { // new_dtype is BYTE: comparison operators
|
609
776
|
uint8_t* res_elems = reinterpret_cast<uint8_t*>(result->elements);
|
610
777
|
|
611
778
|
for (count = nm_storage_count_max_elements(result); count-- > 0;) {
|
779
|
+
nm_dense_storage_coords(result, count, temp_coords);
|
780
|
+
l_count = nm_dense_storage_pos(left, temp_coords);
|
781
|
+
r_count = nm_dense_storage_pos(right, temp_coords);
|
782
|
+
|
612
783
|
switch (op) {
|
613
784
|
case EW_EQEQ:
|
614
|
-
res_elems[count] = l_elems[
|
785
|
+
res_elems[count] = l_elems[l_count] == r_elems[r_count];
|
615
786
|
break;
|
616
787
|
|
617
788
|
case EW_NEQ:
|
618
|
-
res_elems[count] = l_elems[
|
789
|
+
res_elems[count] = l_elems[l_count] != r_elems[r_count];
|
619
790
|
break;
|
620
791
|
|
621
792
|
case EW_LT:
|
622
|
-
res_elems[count] = l_elems[
|
793
|
+
res_elems[count] = l_elems[l_count] < r_elems[r_count];
|
623
794
|
break;
|
624
795
|
|
625
796
|
case EW_GT:
|
626
|
-
res_elems[count] = l_elems[
|
797
|
+
res_elems[count] = l_elems[l_count] > r_elems[r_count];
|
627
798
|
break;
|
628
799
|
|
629
800
|
case EW_LEQ:
|
630
|
-
res_elems[count] = l_elems[
|
801
|
+
res_elems[count] = l_elems[l_count] <= r_elems[r_count];
|
631
802
|
break;
|
632
803
|
|
633
804
|
case EW_GEQ:
|
634
|
-
res_elems[count] = l_elems[
|
805
|
+
res_elems[count] = l_elems[l_count] >= r_elems[r_count];
|
635
806
|
break;
|
636
807
|
|
637
808
|
default:
|
@@ -646,36 +817,42 @@ static DENSE_STORAGE* ew_op(const DENSE_STORAGE* left, const DENSE_STORAGE* righ
|
|
646
817
|
if (static_cast<uint8_t>(op) < NUM_NONCOMP_EWOPS) { // use left-dtype
|
647
818
|
|
648
819
|
for (count = nm_storage_count_max_elements(result); count-- > 0;) {
|
649
|
-
|
820
|
+
nm_dense_storage_coords(result, count, temp_coords);
|
821
|
+
l_count = nm_dense_storage_pos(left, temp_coords);
|
822
|
+
|
823
|
+
reinterpret_cast<LDType*>(result->elements)[count] = ew_op_switch<op,LDType,RDType>(l_elems[l_count], *r_elem);
|
650
824
|
}
|
651
825
|
|
652
826
|
} else {
|
653
827
|
uint8_t* res_elems = reinterpret_cast<uint8_t*>(result->elements);
|
654
828
|
|
655
829
|
for (count = nm_storage_count_max_elements(result); count-- > 0;) {
|
830
|
+
nm_dense_storage_coords(result, count, temp_coords);
|
831
|
+
l_count = nm_dense_storage_pos(left, temp_coords);
|
832
|
+
|
656
833
|
switch (op) {
|
657
834
|
case EW_EQEQ:
|
658
|
-
res_elems[count] = l_elems[
|
835
|
+
res_elems[count] = l_elems[l_count] == *r_elem;
|
659
836
|
break;
|
660
837
|
|
661
838
|
case EW_NEQ:
|
662
|
-
res_elems[count] = l_elems[
|
839
|
+
res_elems[count] = l_elems[l_count] != *r_elem;
|
663
840
|
break;
|
664
841
|
|
665
842
|
case EW_LT:
|
666
|
-
res_elems[count] = l_elems[
|
843
|
+
res_elems[count] = l_elems[l_count] < *r_elem;
|
667
844
|
break;
|
668
845
|
|
669
846
|
case EW_GT:
|
670
|
-
res_elems[count] = l_elems[
|
847
|
+
res_elems[count] = l_elems[l_count] > *r_elem;
|
671
848
|
break;
|
672
849
|
|
673
850
|
case EW_LEQ:
|
674
|
-
res_elems[count] = l_elems[
|
851
|
+
res_elems[count] = l_elems[l_count] <= *r_elem;
|
675
852
|
break;
|
676
853
|
|
677
854
|
case EW_GEQ:
|
678
|
-
res_elems[count] = l_elems[
|
855
|
+
res_elems[count] = l_elems[l_count] >= *r_elem;
|
679
856
|
break;
|
680
857
|
|
681
858
|
default:
|
@@ -685,7 +862,6 @@ static DENSE_STORAGE* ew_op(const DENSE_STORAGE* left, const DENSE_STORAGE* righ
|
|
685
862
|
|
686
863
|
}
|
687
864
|
}
|
688
|
-
|
689
865
|
return result;
|
690
866
|
}
|
691
867
|
|
@@ -707,7 +883,6 @@ static DENSE_STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t
|
|
707
883
|
*pAlpha = 1;
|
708
884
|
*pBeta = 0;
|
709
885
|
// Do the multiplication
|
710
|
-
|
711
886
|
if (vector) nm::math::gemv<DType>(CblasNoTrans, left->shape[0], left->shape[1], pAlpha,
|
712
887
|
reinterpret_cast<DType*>(left->elements), left->shape[1],
|
713
888
|
reinterpret_cast<DType*>(right->elements), 1, pBeta,
|