nmatrix 0.0.9 → 0.1.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -0
- data/History.txt +95 -1
- data/LICENSE.txt +2 -2
- data/README.rdoc +24 -26
- data/Rakefile +32 -16
- data/ext/nmatrix/data/complex.h +2 -2
- data/ext/nmatrix/data/data.cpp +27 -51
- data/ext/nmatrix/data/data.h +92 -4
- data/ext/nmatrix/data/meta.h +2 -2
- data/ext/nmatrix/data/rational.h +2 -2
- data/ext/nmatrix/data/ruby_object.h +2 -2
- data/ext/nmatrix/extconf.rb +87 -86
- data/ext/nmatrix/math.cpp +45 -40
- data/ext/nmatrix/math/asum.h +3 -3
- data/ext/nmatrix/math/geev.h +2 -2
- data/ext/nmatrix/math/gemm.h +6 -2
- data/ext/nmatrix/math/gemv.h +6 -2
- data/ext/nmatrix/math/ger.h +2 -2
- data/ext/nmatrix/math/gesdd.h +2 -2
- data/ext/nmatrix/math/gesvd.h +2 -2
- data/ext/nmatrix/math/getf2.h +2 -2
- data/ext/nmatrix/math/getrf.h +2 -2
- data/ext/nmatrix/math/getri.h +2 -2
- data/ext/nmatrix/math/getrs.h +7 -3
- data/ext/nmatrix/math/idamax.h +2 -2
- data/ext/nmatrix/math/inc.h +12 -6
- data/ext/nmatrix/math/laswp.h +2 -2
- data/ext/nmatrix/math/long_dtype.h +2 -2
- data/ext/nmatrix/math/math.h +16 -10
- data/ext/nmatrix/math/nrm2.h +3 -3
- data/ext/nmatrix/math/potrs.h +7 -3
- data/ext/nmatrix/math/rot.h +2 -2
- data/ext/nmatrix/math/rotg.h +2 -2
- data/ext/nmatrix/math/scal.h +2 -2
- data/ext/nmatrix/math/swap.h +2 -2
- data/ext/nmatrix/math/trsm.h +7 -3
- data/ext/nmatrix/nm_memory.h +60 -0
- data/ext/nmatrix/nmatrix.cpp +13 -47
- data/ext/nmatrix/nmatrix.h +37 -12
- data/ext/nmatrix/ruby_constants.cpp +4 -2
- data/ext/nmatrix/ruby_constants.h +4 -2
- data/ext/nmatrix/ruby_nmatrix.c +937 -170
- data/ext/nmatrix/storage/common.cpp +2 -2
- data/ext/nmatrix/storage/common.h +2 -2
- data/ext/nmatrix/storage/{dense.cpp → dense/dense.cpp} +253 -100
- data/ext/nmatrix/storage/{dense.h → dense/dense.h} +6 -5
- data/ext/nmatrix/storage/{list.cpp → list/list.cpp} +517 -98
- data/ext/nmatrix/storage/{list.h → list/list.h} +13 -6
- data/ext/nmatrix/storage/storage.cpp +48 -19
- data/ext/nmatrix/storage/storage.h +4 -4
- data/ext/nmatrix/storage/yale/class.h +112 -43
- data/ext/nmatrix/storage/yale/iterators/base.h +2 -2
- data/ext/nmatrix/storage/yale/iterators/iterator.h +2 -2
- data/ext/nmatrix/storage/yale/iterators/row.h +2 -2
- data/ext/nmatrix/storage/yale/iterators/row_stored.h +2 -2
- data/ext/nmatrix/storage/yale/iterators/row_stored_nd.h +4 -3
- data/ext/nmatrix/storage/yale/iterators/stored_diagonal.h +2 -2
- data/ext/nmatrix/storage/yale/math/transpose.h +2 -2
- data/ext/nmatrix/storage/yale/yale.cpp +343 -52
- data/ext/nmatrix/storage/yale/yale.h +7 -3
- data/ext/nmatrix/types.h +2 -2
- data/ext/nmatrix/util/io.cpp +5 -5
- data/ext/nmatrix/util/io.h +2 -2
- data/ext/nmatrix/util/sl_list.cpp +40 -27
- data/ext/nmatrix/util/sl_list.h +3 -3
- data/ext/nmatrix/util/util.h +2 -2
- data/lib/nmatrix.rb +2 -2
- data/lib/nmatrix/blas.rb +2 -2
- data/lib/nmatrix/enumerate.rb +17 -6
- data/lib/nmatrix/io/market.rb +2 -3
- data/lib/nmatrix/io/mat5_reader.rb +2 -2
- data/lib/nmatrix/io/mat_reader.rb +2 -2
- data/lib/nmatrix/lapack.rb +46 -46
- data/lib/nmatrix/math.rb +213 -20
- data/lib/nmatrix/monkeys.rb +24 -2
- data/lib/nmatrix/nmatrix.rb +394 -9
- data/lib/nmatrix/nvector.rb +2 -64
- data/lib/nmatrix/rspec.rb +2 -2
- data/lib/nmatrix/shortcuts.rb +14 -61
- data/lib/nmatrix/version.rb +11 -3
- data/lib/nmatrix/yale_functions.rb +4 -4
- data/nmatrix.gemspec +2 -7
- data/scripts/mac-brew-gcc.sh +11 -8
- data/scripts/mac-mavericks-brew-gcc.sh +22 -0
- data/spec/00_nmatrix_spec.rb +116 -7
- data/spec/01_enum_spec.rb +17 -3
- data/spec/02_slice_spec.rb +11 -3
- data/spec/blas_spec.rb +5 -2
- data/spec/elementwise_spec.rb +5 -2
- data/spec/io_spec.rb +27 -17
- data/spec/lapack_spec.rb +157 -9
- data/spec/math_spec.rb +95 -4
- data/spec/nmatrix_yale_spec.rb +21 -26
- data/spec/rspec_monkeys.rb +27 -0
- data/spec/rspec_spec.rb +2 -2
- data/spec/shortcuts_spec.rb +5 -10
- data/spec/slice_set_spec.rb +6 -2
- data/spec/spec_helper.rb +3 -2
- data/spec/stat_spec.rb +174 -158
- metadata +15 -15
@@ -9,8 +9,8 @@
|
|
9
9
|
//
|
10
10
|
// == Copyright Information
|
11
11
|
//
|
12
|
-
// SciRuby is Copyright (c) 2010 -
|
13
|
-
// NMatrix is Copyright (c)
|
12
|
+
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
|
13
|
+
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
|
14
14
|
//
|
15
15
|
// Please see LICENSE.txt for additional copyright notices.
|
16
16
|
//
|
@@ -9,8 +9,8 @@
|
|
9
9
|
//
|
10
10
|
// == Copyright Information
|
11
11
|
//
|
12
|
-
// SciRuby is Copyright (c) 2010 -
|
13
|
-
// NMatrix is Copyright (c)
|
12
|
+
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
|
13
|
+
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
|
14
14
|
//
|
15
15
|
// Please see LICENSE.txt for additional copyright notices.
|
16
16
|
//
|
@@ -9,8 +9,8 @@
|
|
9
9
|
//
|
10
10
|
// == Copyright Information
|
11
11
|
//
|
12
|
-
// SciRuby is Copyright (c) 2010 -
|
13
|
-
// NMatrix is Copyright (c)
|
12
|
+
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
|
13
|
+
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
|
14
14
|
//
|
15
15
|
// Please see LICENSE.txt for additional copyright notices.
|
16
16
|
//
|
@@ -34,13 +34,12 @@
|
|
34
34
|
/*
|
35
35
|
* Project Includes
|
36
36
|
*/
|
37
|
-
|
38
|
-
#include "
|
39
|
-
#include "math/
|
40
|
-
#include "math/
|
41
|
-
#include "math/
|
42
|
-
#include "
|
43
|
-
#include "common.h"
|
37
|
+
#include "../../data/data.h"
|
38
|
+
#include "../../math/long_dtype.h"
|
39
|
+
#include "../../math/gemm.h"
|
40
|
+
#include "../../math/gemv.h"
|
41
|
+
#include "../../math/math.h"
|
42
|
+
#include "../common.h"
|
44
43
|
#include "dense.h"
|
45
44
|
|
46
45
|
/*
|
@@ -124,10 +123,13 @@ namespace nm { namespace dense_storage {
|
|
124
123
|
*/
|
125
124
|
template <typename D>
|
126
125
|
void set(VALUE left, SLICE* slice, VALUE right) {
|
126
|
+
NM_CONSERVATIVE(nm_register_value(left));
|
127
|
+
NM_CONSERVATIVE(nm_register_value(right));
|
128
|
+
|
127
129
|
DENSE_STORAGE* s = NM_STORAGE_DENSE(left);
|
128
130
|
|
129
131
|
std::pair<NMATRIX*,bool> nm_and_free =
|
130
|
-
interpret_arg_as_dense_nmatrix(right,
|
132
|
+
interpret_arg_as_dense_nmatrix(right, s->dtype);
|
131
133
|
|
132
134
|
// Map the data onto D* v.
|
133
135
|
D* v;
|
@@ -139,13 +141,20 @@ namespace nm { namespace dense_storage {
|
|
139
141
|
v_size = nm_storage_count_max_elements(t);
|
140
142
|
|
141
143
|
} else if (TYPE(right) == T_ARRAY) {
|
144
|
+
|
142
145
|
v_size = RARRAY_LEN(right);
|
143
|
-
v =
|
146
|
+
v = NM_ALLOC_N(D, v_size);
|
147
|
+
if (s->dtype == nm::RUBYOBJ)
|
148
|
+
nm_register_values(reinterpret_cast<VALUE*>(v), v_size);
|
149
|
+
|
144
150
|
for (size_t m = 0; m < v_size; ++m) {
|
145
151
|
rubyval_to_cval(rb_ary_entry(right, m), s->dtype, &(v[m]));
|
146
152
|
}
|
153
|
+
|
147
154
|
} else {
|
148
155
|
v = reinterpret_cast<D*>(rubyobj_to_cval(right, NM_DTYPE(left)));
|
156
|
+
if (s->dtype == nm::RUBYOBJ)
|
157
|
+
nm_register_values(reinterpret_cast<VALUE*>(v), v_size);
|
149
158
|
}
|
150
159
|
|
151
160
|
if (slice->single) {
|
@@ -156,10 +165,18 @@ namespace nm { namespace dense_storage {
|
|
156
165
|
}
|
157
166
|
|
158
167
|
// Only free v if it was allocated in this function.
|
159
|
-
if (nm_and_free.first
|
160
|
-
|
161
|
-
|
162
|
-
|
168
|
+
if (nm_and_free.first) {
|
169
|
+
if (nm_and_free.second) {
|
170
|
+
nm_delete(nm_and_free.first);
|
171
|
+
}
|
172
|
+
} else {
|
173
|
+
if (s->dtype == nm::RUBYOBJ)
|
174
|
+
nm_unregister_values(reinterpret_cast<VALUE*>(v), v_size);
|
175
|
+
NM_FREE(v);
|
176
|
+
}
|
177
|
+
NM_CONSERVATIVE(nm_unregister_value(left));
|
178
|
+
NM_CONSERVATIVE(nm_unregister_value(right));
|
179
|
+
|
163
180
|
}
|
164
181
|
|
165
182
|
}} // end of namespace nm::dense_storage
|
@@ -186,13 +203,13 @@ static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* le
|
|
186
203
|
* check for that NULL pointer before freeing elements.
|
187
204
|
*/
|
188
205
|
static DENSE_STORAGE* nm_dense_storage_create_dummy(nm::dtype_t dtype, size_t* shape, size_t dim) {
|
189
|
-
DENSE_STORAGE* s =
|
206
|
+
DENSE_STORAGE* s = NM_ALLOC( DENSE_STORAGE );
|
190
207
|
|
191
208
|
s->dim = dim;
|
192
209
|
s->shape = shape;
|
193
210
|
s->dtype = dtype;
|
194
211
|
|
195
|
-
s->offset =
|
212
|
+
s->offset = NM_ALLOC_N(size_t, dim);
|
196
213
|
memset(s->offset, 0, sizeof(size_t)*dim);
|
197
214
|
|
198
215
|
s->stride = stride(shape, dim);
|
@@ -212,15 +229,24 @@ static DENSE_STORAGE* nm_dense_storage_create_dummy(nm::dtype_t dtype, size_t* s
|
|
212
229
|
* elements is NULL, the new elements array will not be initialized.
|
213
230
|
*/
|
214
231
|
DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t dim, void* elements, size_t elements_length) {
|
232
|
+
if (dtype == nm::RUBYOBJ)
|
233
|
+
nm_register_values(reinterpret_cast<VALUE*>(elements), elements_length);
|
215
234
|
|
216
235
|
DENSE_STORAGE* s = nm_dense_storage_create_dummy(dtype, shape, dim);
|
217
236
|
size_t count = nm_storage_count_max_elements(s);
|
218
237
|
|
219
238
|
if (elements_length == count) {
|
220
|
-
|
239
|
+
s->elements = elements;
|
240
|
+
|
241
|
+
if (dtype == nm::RUBYOBJ)
|
242
|
+
nm_unregister_values(reinterpret_cast<VALUE*>(elements), elements_length);
|
221
243
|
|
222
244
|
} else {
|
223
|
-
|
245
|
+
|
246
|
+
s->elements = NM_ALLOC_N(char, DTYPE_SIZES[dtype]*count);
|
247
|
+
|
248
|
+
if (dtype == nm::RUBYOBJ)
|
249
|
+
nm_unregister_values(reinterpret_cast<VALUE*>(elements), elements_length);
|
224
250
|
|
225
251
|
size_t copy_length = elements_length;
|
226
252
|
|
@@ -236,7 +262,7 @@ DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t
|
|
236
262
|
}
|
237
263
|
|
238
264
|
// Get rid of the init_val.
|
239
|
-
|
265
|
+
NM_FREE(elements);
|
240
266
|
}
|
241
267
|
}
|
242
268
|
|
@@ -252,12 +278,13 @@ void nm_dense_storage_delete(STORAGE* s) {
|
|
252
278
|
if (s) {
|
253
279
|
DENSE_STORAGE* storage = (DENSE_STORAGE*)s;
|
254
280
|
if(storage->count-- == 1) {
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
if (storage->elements != NULL) // happens with dummy objects
|
259
|
-
|
260
|
-
|
281
|
+
NM_FREE(storage->shape);
|
282
|
+
NM_FREE(storage->offset);
|
283
|
+
NM_FREE(storage->stride);
|
284
|
+
if (storage->elements != NULL) {// happens with dummy objects
|
285
|
+
NM_FREE(storage->elements);
|
286
|
+
}
|
287
|
+
NM_FREE(storage);
|
261
288
|
}
|
262
289
|
}
|
263
290
|
}
|
@@ -270,9 +297,9 @@ void nm_dense_storage_delete_ref(STORAGE* s) {
|
|
270
297
|
if (s) {
|
271
298
|
DENSE_STORAGE* storage = (DENSE_STORAGE*)s;
|
272
299
|
nm_dense_storage_delete( reinterpret_cast<STORAGE*>(storage->src) );
|
273
|
-
|
274
|
-
|
275
|
-
|
300
|
+
NM_FREE(storage->shape);
|
301
|
+
NM_FREE(storage->offset);
|
302
|
+
NM_FREE(storage);
|
276
303
|
}
|
277
304
|
}
|
278
305
|
|
@@ -286,14 +313,43 @@ void nm_dense_storage_mark(STORAGE* storage_base) {
|
|
286
313
|
if (storage && storage->dtype == nm::RUBYOBJ) {
|
287
314
|
VALUE* els = reinterpret_cast<VALUE*>(storage->elements);
|
288
315
|
|
289
|
-
|
290
|
-
|
316
|
+
if (els) {
|
317
|
+
rb_gc_mark_locations(els, &(els[nm_storage_count_max_elements(storage)-1]));
|
318
|
+
}
|
291
319
|
//for (size_t index = nm_storage_count_max_elements(storage); index-- > 0;) {
|
292
320
|
// rb_gc_mark(els[index]);
|
293
321
|
//}
|
294
322
|
}
|
295
323
|
}
|
296
324
|
|
325
|
+
/**
|
326
|
+
* Register a dense storage struct as in-use to avoid garbage collection of the
|
327
|
+
* elements stored.
|
328
|
+
*
|
329
|
+
* This function will check dtype and ignore non-object dtype, so its safe to pass any dense storage in.
|
330
|
+
*
|
331
|
+
*/
|
332
|
+
void nm_dense_storage_register(const STORAGE* s) {
|
333
|
+
const DENSE_STORAGE* storage = reinterpret_cast<const DENSE_STORAGE*>(s);
|
334
|
+
if (storage->dtype == nm::RUBYOBJ && storage->elements) {
|
335
|
+
nm_register_values(reinterpret_cast<VALUE*>(storage->elements), nm_storage_count_max_elements(storage));
|
336
|
+
}
|
337
|
+
}
|
338
|
+
|
339
|
+
/**
|
340
|
+
* Unregister a dense storage struct to allow normal garbage collection of the
|
341
|
+
* elements stored.
|
342
|
+
*
|
343
|
+
* This function will check dtype and ignore non-object dtype, so its safe to pass any dense storage in.
|
344
|
+
*
|
345
|
+
*/
|
346
|
+
void nm_dense_storage_unregister(const STORAGE* s) {
|
347
|
+
const DENSE_STORAGE* storage = reinterpret_cast<const DENSE_STORAGE*>(s);
|
348
|
+
if (storage->dtype == nm::RUBYOBJ && storage->elements) {
|
349
|
+
nm_unregister_values(reinterpret_cast<VALUE*>(storage->elements), nm_storage_count_max_elements(storage));
|
350
|
+
}
|
351
|
+
}
|
352
|
+
|
297
353
|
///////////////
|
298
354
|
// Accessors //
|
299
355
|
///////////////
|
@@ -304,23 +360,30 @@ void nm_dense_storage_mark(STORAGE* storage_base) {
|
|
304
360
|
* map_pair iterator for dense matrices (for element-wise operations)
|
305
361
|
*/
|
306
362
|
VALUE nm_dense_map_pair(VALUE self, VALUE right) {
|
307
|
-
DENSE_STORAGE *s = NM_STORAGE_DENSE(self),
|
308
|
-
*t = NM_STORAGE_DENSE(right);
|
309
363
|
|
364
|
+
NM_CONSERVATIVE(nm_register_value(self));
|
365
|
+
NM_CONSERVATIVE(nm_register_value(right));
|
366
|
+
|
367
|
+
RETURN_SIZED_ENUMERATOR_PRE
|
368
|
+
NM_CONSERVATIVE(nm_unregister_value(right));
|
369
|
+
NM_CONSERVATIVE(nm_unregister_value(self));
|
310
370
|
RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_enumerator_length);
|
311
371
|
|
312
|
-
|
372
|
+
DENSE_STORAGE *s = NM_STORAGE_DENSE(self),
|
373
|
+
*t = NM_STORAGE_DENSE(right);
|
374
|
+
|
375
|
+
size_t* coords = NM_ALLOCA_N(size_t, s->dim);
|
313
376
|
memset(coords, 0, sizeof(size_t) * s->dim);
|
314
377
|
|
315
|
-
size_t *shape_copy =
|
378
|
+
size_t *shape_copy = NM_ALLOC_N(size_t, s->dim);
|
316
379
|
memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
|
317
380
|
|
318
381
|
size_t count = nm_storage_count_max_elements(s);
|
319
382
|
|
320
383
|
DENSE_STORAGE* result = nm_dense_storage_create(nm::RUBYOBJ, shape_copy, s->dim, NULL, 0);
|
321
|
-
VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);
|
322
384
|
|
323
|
-
|
385
|
+
VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);
|
386
|
+
nm_dense_storage_register(result);
|
324
387
|
|
325
388
|
for (size_t k = 0; k < count; ++k) {
|
326
389
|
nm_dense_storage_coords(result, k, coords);
|
@@ -328,17 +391,23 @@ VALUE nm_dense_map_pair(VALUE self, VALUE right) {
|
|
328
391
|
t_index = nm_dense_storage_pos(t, coords);
|
329
392
|
|
330
393
|
VALUE sval = NM_DTYPE(self) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(s->elements)[s_index] : rubyobj_from_cval((char*)(s->elements) + s_index*DTYPE_SIZES[NM_DTYPE(self)], NM_DTYPE(self)).rval;
|
394
|
+
nm_register_value(sval);
|
331
395
|
VALUE tval = NM_DTYPE(right) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(t->elements)[t_index] : rubyobj_from_cval((char*)(t->elements) + t_index*DTYPE_SIZES[NM_DTYPE(right)], NM_DTYPE(right)).rval;
|
332
|
-
|
333
396
|
result_elem[k] = rb_yield_values(2, sval, tval);
|
397
|
+
nm_unregister_value(sval);
|
334
398
|
}
|
335
399
|
|
400
|
+
VALUE klass = CLASS_OF(self);
|
336
401
|
NMATRIX* m = nm_create(nm::DENSE_STORE, reinterpret_cast<STORAGE*>(result));
|
337
|
-
|
402
|
+
nm_register_nmatrix(m);
|
403
|
+
VALUE to_return = Data_Wrap_Struct(klass, nm_mark, nm_delete, m);
|
338
404
|
|
339
|
-
|
405
|
+
nm_unregister_nmatrix(m);
|
406
|
+
nm_dense_storage_unregister(result);
|
407
|
+
NM_CONSERVATIVE(nm_unregister_value(self));
|
408
|
+
NM_CONSERVATIVE(nm_unregister_value(right));
|
340
409
|
|
341
|
-
return
|
410
|
+
return to_return;
|
342
411
|
|
343
412
|
}
|
344
413
|
|
@@ -346,22 +415,28 @@ VALUE nm_dense_map_pair(VALUE self, VALUE right) {
|
|
346
415
|
* map enumerator for dense matrices.
|
347
416
|
*/
|
348
417
|
VALUE nm_dense_map(VALUE self) {
|
349
|
-
DENSE_STORAGE *s = NM_STORAGE_DENSE(self);
|
350
418
|
|
419
|
+
NM_CONSERVATIVE(nm_register_value(self));
|
420
|
+
|
421
|
+
RETURN_SIZED_ENUMERATOR_PRE
|
422
|
+
NM_CONSERVATIVE(nm_unregister_value(self));
|
351
423
|
RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_enumerator_length);
|
352
424
|
|
353
|
-
|
425
|
+
DENSE_STORAGE *s = NM_STORAGE_DENSE(self);
|
426
|
+
|
427
|
+
size_t* coords = NM_ALLOCA_N(size_t, s->dim);
|
354
428
|
memset(coords, 0, sizeof(size_t) * s->dim);
|
355
429
|
|
356
|
-
size_t *shape_copy =
|
430
|
+
size_t *shape_copy = NM_ALLOC_N(size_t, s->dim);
|
357
431
|
memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
|
358
432
|
|
359
433
|
size_t count = nm_storage_count_max_elements(s);
|
360
434
|
|
361
435
|
DENSE_STORAGE* result = nm_dense_storage_create(nm::RUBYOBJ, shape_copy, s->dim, NULL, 0);
|
436
|
+
|
362
437
|
VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);
|
363
438
|
|
364
|
-
|
439
|
+
nm_dense_storage_register(result);
|
365
440
|
|
366
441
|
for (size_t k = 0; k < count; ++k) {
|
367
442
|
nm_dense_storage_coords(result, k, coords);
|
@@ -370,13 +445,18 @@ VALUE nm_dense_map(VALUE self) {
|
|
370
445
|
result_elem[k] = rb_yield(NM_DTYPE(self) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(s->elements)[s_index] : rubyobj_from_cval((char*)(s->elements) + s_index*DTYPE_SIZES[NM_DTYPE(self)], NM_DTYPE(self)).rval);
|
371
446
|
}
|
372
447
|
|
448
|
+
VALUE klass = CLASS_OF(self);
|
449
|
+
|
373
450
|
NMATRIX* m = nm_create(nm::DENSE_STORE, reinterpret_cast<STORAGE*>(result));
|
374
|
-
|
451
|
+
nm_register_nmatrix(m);
|
375
452
|
|
376
|
-
|
453
|
+
VALUE to_return = Data_Wrap_Struct(klass, nm_mark, nm_delete, m);
|
377
454
|
|
378
|
-
|
455
|
+
nm_unregister_nmatrix(m);
|
456
|
+
nm_dense_storage_unregister(result);
|
457
|
+
NM_CONSERVATIVE(nm_unregister_value(self));
|
379
458
|
|
459
|
+
return to_return;
|
380
460
|
}
|
381
461
|
|
382
462
|
|
@@ -384,18 +464,20 @@ VALUE nm_dense_map(VALUE self) {
|
|
384
464
|
* each_with_indices iterator for dense matrices.
|
385
465
|
*/
|
386
466
|
VALUE nm_dense_each_with_indices(VALUE nmatrix) {
|
387
|
-
volatile VALUE nm = nmatrix;
|
388
|
-
|
389
|
-
DENSE_STORAGE* s = NM_STORAGE_DENSE(nm);
|
390
467
|
|
391
|
-
|
468
|
+
NM_CONSERVATIVE(nm_register_value(nmatrix));
|
469
|
+
|
470
|
+
RETURN_SIZED_ENUMERATOR_PRE
|
471
|
+
NM_CONSERVATIVE(nm_unregister_value(nmatrix));
|
472
|
+
RETURN_SIZED_ENUMERATOR(nmatrix, 0, 0, nm_enumerator_length); // fourth argument only used by Ruby2+
|
473
|
+
DENSE_STORAGE* s = NM_STORAGE_DENSE(nmatrix);
|
392
474
|
|
393
475
|
// Create indices and initialize them to zero
|
394
|
-
size_t* coords =
|
476
|
+
size_t* coords = NM_ALLOCA_N(size_t, s->dim);
|
395
477
|
memset(coords, 0, sizeof(size_t) * s->dim);
|
396
478
|
|
397
479
|
size_t slice_index;
|
398
|
-
size_t* shape_copy =
|
480
|
+
size_t* shape_copy = NM_ALLOC_N(size_t, s->dim);
|
399
481
|
memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
|
400
482
|
|
401
483
|
DENSE_STORAGE* sliced_dummy = nm_dense_storage_create_dummy(s->dtype, shape_copy, s->dim);
|
@@ -404,8 +486,9 @@ VALUE nm_dense_each_with_indices(VALUE nmatrix) {
|
|
404
486
|
nm_dense_storage_coords(sliced_dummy, k, coords);
|
405
487
|
slice_index = nm_dense_storage_pos(s, coords);
|
406
488
|
VALUE ary = rb_ary_new();
|
407
|
-
|
408
|
-
|
489
|
+
nm_register_value(ary);
|
490
|
+
if (NM_DTYPE(nmatrix) == nm::RUBYOBJ) rb_ary_push(ary, reinterpret_cast<VALUE*>(s->elements)[slice_index]);
|
491
|
+
else rb_ary_push(ary, rubyobj_from_cval((char*)(s->elements) + slice_index*DTYPE_SIZES[NM_DTYPE(nmatrix)], NM_DTYPE(nmatrix)).rval);
|
409
492
|
|
410
493
|
for (size_t p = 0; p < s->dim; ++p) {
|
411
494
|
rb_ary_push(ary, INT2FIX(coords[p]));
|
@@ -413,11 +496,13 @@ VALUE nm_dense_each_with_indices(VALUE nmatrix) {
|
|
413
496
|
|
414
497
|
// yield the array which now consists of the value and the indices
|
415
498
|
rb_yield(ary);
|
416
|
-
|
499
|
+
nm_unregister_value(ary);
|
417
500
|
}
|
418
501
|
|
419
502
|
nm_dense_storage_delete(sliced_dummy);
|
420
503
|
|
504
|
+
NM_CONSERVATIVE(nm_unregister_value(nmatrix));
|
505
|
+
|
421
506
|
return nmatrix;
|
422
507
|
|
423
508
|
}
|
@@ -431,18 +516,22 @@ VALUE nm_dense_each_with_indices(VALUE nmatrix) {
|
|
431
516
|
* containing other types of data.
|
432
517
|
*/
|
433
518
|
VALUE nm_dense_each(VALUE nmatrix) {
|
434
|
-
volatile VALUE nm = nmatrix; // Not sure this actually does anything.
|
435
|
-
DENSE_STORAGE* s = NM_STORAGE_DENSE(nm);
|
436
519
|
|
437
|
-
|
520
|
+
NM_CONSERVATIVE(nm_register_value(nmatrix));
|
521
|
+
|
522
|
+
RETURN_SIZED_ENUMERATOR_PRE
|
523
|
+
NM_CONSERVATIVE(nm_unregister_value(nmatrix));
|
524
|
+
RETURN_SIZED_ENUMERATOR(nmatrix, 0, 0, nm_enumerator_length);
|
525
|
+
|
526
|
+
DENSE_STORAGE* s = NM_STORAGE_DENSE(nmatrix);
|
438
527
|
|
439
|
-
size_t* temp_coords =
|
528
|
+
size_t* temp_coords = NM_ALLOCA_N(size_t, s->dim);
|
440
529
|
size_t sliced_index;
|
441
|
-
size_t* shape_copy =
|
530
|
+
size_t* shape_copy = NM_ALLOC_N(size_t, s->dim);
|
442
531
|
memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
|
443
532
|
DENSE_STORAGE* sliced_dummy = nm_dense_storage_create_dummy(s->dtype, shape_copy, s->dim);
|
444
533
|
|
445
|
-
if (NM_DTYPE(
|
534
|
+
if (NM_DTYPE(nmatrix) == nm::RUBYOBJ) {
|
446
535
|
|
447
536
|
// matrix of Ruby objects -- yield those objects directly
|
448
537
|
for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i) {
|
@@ -458,12 +547,13 @@ VALUE nm_dense_each(VALUE nmatrix) {
|
|
458
547
|
for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i) {
|
459
548
|
nm_dense_storage_coords(sliced_dummy, i, temp_coords);
|
460
549
|
sliced_index = nm_dense_storage_pos(s, temp_coords);
|
461
|
-
VALUE v = rubyobj_from_cval((char*)(s->elements) + sliced_index*DTYPE_SIZES[NM_DTYPE(
|
550
|
+
VALUE v = rubyobj_from_cval((char*)(s->elements) + sliced_index*DTYPE_SIZES[NM_DTYPE(nmatrix)], NM_DTYPE(nmatrix)).rval;
|
462
551
|
rb_yield( v ); // yield to the copy we made
|
463
552
|
}
|
464
553
|
}
|
465
554
|
|
466
555
|
nm_dense_storage_delete(sliced_dummy);
|
556
|
+
NM_CONSERVATIVE(nm_unregister_value(nmatrix));
|
467
557
|
|
468
558
|
return nmatrix;
|
469
559
|
|
@@ -487,11 +577,11 @@ static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* le
|
|
487
577
|
*/
|
488
578
|
void* nm_dense_storage_get(const STORAGE* storage, SLICE* slice) {
|
489
579
|
DENSE_STORAGE* s = (DENSE_STORAGE*)storage;
|
490
|
-
|
491
580
|
if (slice->single)
|
492
581
|
return (char*)(s->elements) + nm_dense_storage_pos(s, slice->coords) * DTYPE_SIZES[s->dtype];
|
493
582
|
else {
|
494
|
-
|
583
|
+
nm_dense_storage_register(s);
|
584
|
+
size_t *shape = NM_ALLOC_N(size_t, s->dim);
|
495
585
|
for (size_t i = 0; i < s->dim; ++i) {
|
496
586
|
shape[i] = slice->lengths[i];
|
497
587
|
}
|
@@ -505,6 +595,7 @@ void* nm_dense_storage_get(const STORAGE* storage, SLICE* slice) {
|
|
505
595
|
nm_dense_storage_pos(s, slice->coords),
|
506
596
|
0);
|
507
597
|
|
598
|
+
nm_dense_storage_unregister(s);
|
508
599
|
return ns;
|
509
600
|
}
|
510
601
|
}
|
@@ -521,11 +612,12 @@ void* nm_dense_storage_ref(const STORAGE* storage, SLICE* slice) {
|
|
521
612
|
return (char*)(s->elements) + nm_dense_storage_pos(s, slice->coords) * DTYPE_SIZES[s->dtype];
|
522
613
|
|
523
614
|
else {
|
524
|
-
|
615
|
+
nm_dense_storage_register(s);
|
616
|
+
DENSE_STORAGE* ns = NM_ALLOC( DENSE_STORAGE );
|
525
617
|
ns->dim = s->dim;
|
526
618
|
ns->dtype = s->dtype;
|
527
|
-
ns->offset =
|
528
|
-
ns->shape =
|
619
|
+
ns->offset = NM_ALLOC_N(size_t, ns->dim);
|
620
|
+
ns->shape = NM_ALLOC_N(size_t, ns->dim);
|
529
621
|
|
530
622
|
for (size_t i = 0; i < ns->dim; ++i) {
|
531
623
|
ns->offset[i] = slice->coords[i] + s->offset[i];
|
@@ -538,6 +630,7 @@ void* nm_dense_storage_ref(const STORAGE* storage, SLICE* slice) {
|
|
538
630
|
s->src->count++;
|
539
631
|
ns->src = s->src;
|
540
632
|
|
633
|
+
nm_dense_storage_unregister(s);
|
541
634
|
return ns;
|
542
635
|
}
|
543
636
|
}
|
@@ -550,8 +643,8 @@ void* nm_dense_storage_ref(const STORAGE* storage, SLICE* slice) {
|
|
550
643
|
*/
|
551
644
|
void nm_dense_storage_set(VALUE left, SLICE* slice, VALUE right) {
|
552
645
|
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::set, void, VALUE, SLICE*, VALUE)
|
553
|
-
|
554
|
-
ttable[
|
646
|
+
nm::dtype_t dtype = NM_DTYPE(left);
|
647
|
+
ttable[dtype](left, slice, right);
|
555
648
|
}
|
556
649
|
|
557
650
|
|
@@ -567,7 +660,7 @@ void nm_dense_storage_set(VALUE left, SLICE* slice, VALUE right) {
|
|
567
660
|
* have the same dtype.
|
568
661
|
*/
|
569
662
|
bool nm_dense_storage_eqeq(const STORAGE* left, const STORAGE* right) {
|
570
|
-
|
663
|
+
LR_DTYPE_TEMPLATE_TABLE(nm::dense_storage::eqeq, bool, const DENSE_STORAGE*, const DENSE_STORAGE*)
|
571
664
|
|
572
665
|
if (!ttable[left->dtype][right->dtype]) {
|
573
666
|
rb_raise(nm_eDataTypeError, "comparison between these dtypes is undefined");
|
@@ -657,7 +750,7 @@ void nm_dense_storage_coords(const DENSE_STORAGE* s, const size_t slice_pos, siz
|
|
657
750
|
*/
|
658
751
|
static size_t* stride(size_t* shape, size_t dim) {
|
659
752
|
size_t i, j;
|
660
|
-
size_t* stride =
|
753
|
+
size_t* stride = NM_ALLOC_N(size_t, dim);
|
661
754
|
|
662
755
|
for (i = 0; i < dim; ++i) {
|
663
756
|
stride[i] = 1;
|
@@ -678,22 +771,24 @@ static size_t* stride(size_t* shape, size_t dim) {
|
|
678
771
|
* Copy dense storage, changing dtype if necessary.
|
679
772
|
*/
|
680
773
|
STORAGE* nm_dense_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype, void* dummy) {
|
681
|
-
|
774
|
+
NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::cast_copy, DENSE_STORAGE*, const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);
|
682
775
|
|
683
776
|
if (!ttable[new_dtype][rhs->dtype]) {
|
684
777
|
rb_raise(nm_eDataTypeError, "cast between these dtypes is undefined");
|
685
778
|
return NULL;
|
686
779
|
}
|
687
780
|
|
688
|
-
|
781
|
+
return (STORAGE*)ttable[new_dtype][rhs->dtype]((DENSE_STORAGE*)rhs, new_dtype);
|
689
782
|
}
|
690
783
|
|
691
784
|
/*
|
692
785
|
* Copy dense storage without a change in dtype.
|
693
786
|
*/
|
694
787
|
DENSE_STORAGE* nm_dense_storage_copy(const DENSE_STORAGE* rhs) {
|
788
|
+
nm_dense_storage_register(rhs);
|
789
|
+
|
695
790
|
size_t count = 0;
|
696
|
-
size_t *shape =
|
791
|
+
size_t *shape = NM_ALLOC_N(size_t, rhs->dim);
|
697
792
|
|
698
793
|
// copy shape and offset
|
699
794
|
for (size_t i = 0; i < rhs->dim; ++i) {
|
@@ -709,7 +804,8 @@ DENSE_STORAGE* nm_dense_storage_copy(const DENSE_STORAGE* rhs) {
|
|
709
804
|
if (rhs == rhs->src) // not a reference
|
710
805
|
memcpy(lhs->elements, rhs->elements, DTYPE_SIZES[rhs->dtype] * count);
|
711
806
|
else { // slice whole matrix
|
712
|
-
|
807
|
+
nm_dense_storage_register(lhs);
|
808
|
+
size_t *offset = NM_ALLOC_N(size_t, rhs->dim);
|
713
809
|
memset(offset, 0, sizeof(size_t) * rhs->dim);
|
714
810
|
|
715
811
|
slice_copy(lhs,
|
@@ -718,9 +814,13 @@ DENSE_STORAGE* nm_dense_storage_copy(const DENSE_STORAGE* rhs) {
|
|
718
814
|
0,
|
719
815
|
nm_dense_storage_pos(rhs, offset),
|
720
816
|
0);
|
817
|
+
|
818
|
+
nm_dense_storage_unregister(lhs);
|
721
819
|
}
|
722
820
|
}
|
723
821
|
|
822
|
+
nm_dense_storage_unregister(rhs);
|
823
|
+
|
724
824
|
return lhs;
|
725
825
|
}
|
726
826
|
|
@@ -733,7 +833,9 @@ DENSE_STORAGE* nm_dense_storage_copy(const DENSE_STORAGE* rhs) {
|
|
733
833
|
STORAGE* nm_dense_storage_copy_transposed(const STORAGE* rhs_base) {
|
734
834
|
DENSE_STORAGE* rhs = (DENSE_STORAGE*)rhs_base;
|
735
835
|
|
736
|
-
|
836
|
+
nm_dense_storage_register(rhs);
|
837
|
+
|
838
|
+
size_t *shape = NM_ALLOC_N(size_t, rhs->dim);
|
737
839
|
|
738
840
|
// swap shape and offset
|
739
841
|
shape[0] = rhs->shape[1];
|
@@ -743,17 +845,25 @@ STORAGE* nm_dense_storage_copy_transposed(const STORAGE* rhs_base) {
|
|
743
845
|
lhs->offset[0] = rhs->offset[1];
|
744
846
|
lhs->offset[1] = rhs->offset[0];
|
745
847
|
|
848
|
+
nm_dense_storage_register(lhs);
|
849
|
+
|
746
850
|
if (rhs_base->src == rhs_base) {
|
747
851
|
nm_math_transpose_generic(rhs->shape[0], rhs->shape[1], rhs->elements, rhs->shape[1], lhs->elements, lhs->shape[1], DTYPE_SIZES[rhs->dtype]);
|
748
852
|
} else {
|
749
853
|
NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::ref_slice_copy_transposed, void, const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs);
|
750
854
|
|
751
|
-
if (!ttable[lhs->dtype][rhs->dtype])
|
855
|
+
if (!ttable[lhs->dtype][rhs->dtype]) {
|
856
|
+
nm_dense_storage_unregister(rhs);
|
857
|
+
nm_dense_storage_unregister(lhs);
|
752
858
|
rb_raise(nm_eDataTypeError, "transposition between these dtypes is undefined");
|
859
|
+
}
|
753
860
|
|
754
861
|
ttable[lhs->dtype][rhs->dtype](rhs, lhs);
|
755
862
|
}
|
756
863
|
|
864
|
+
nm_dense_storage_unregister(rhs);
|
865
|
+
nm_dense_storage_unregister(lhs);
|
866
|
+
|
757
867
|
return (STORAGE*)lhs;
|
758
868
|
}
|
759
869
|
|
@@ -768,21 +878,26 @@ namespace nm {
|
|
768
878
|
* Otherwise, the NMATRIX* still belongs to Ruby and Ruby will free it.
|
769
879
|
*/
|
770
880
|
std::pair<NMATRIX*,bool> interpret_arg_as_dense_nmatrix(VALUE right, nm::dtype_t dtype) {
|
881
|
+
NM_CONSERVATIVE(nm_register_value(right));
|
771
882
|
if (TYPE(right) == T_DATA && (RDATA(right)->dfree == (RUBY_DATA_FUNC)nm_delete || RDATA(right)->dfree == (RUBY_DATA_FUNC)nm_delete_ref)) {
|
772
883
|
NMATRIX *r;
|
773
884
|
if (NM_STYPE(right) != DENSE_STORE || NM_DTYPE(right) != dtype || NM_SRC(right) != NM_STORAGE(right)) {
|
774
885
|
UnwrapNMatrix( right, r );
|
775
886
|
NMATRIX* ldtype_r = nm_cast_with_ctype_args(r, nm::DENSE_STORE, dtype, NULL);
|
887
|
+
NM_CONSERVATIVE(nm_unregister_value(right));
|
776
888
|
return std::make_pair(ldtype_r,true);
|
777
889
|
} else { // simple case -- right-hand matrix is dense and is not a reference and has same dtype
|
778
890
|
UnwrapNMatrix( right, r );
|
891
|
+
NM_CONSERVATIVE(nm_unregister_value(right));
|
779
892
|
return std::make_pair(r, false);
|
780
893
|
}
|
781
894
|
// Do not set v_alloc = true for either of these. It is the responsibility of r/ldtype_r
|
782
895
|
} else if (TYPE(right) == T_DATA) {
|
896
|
+
NM_CONSERVATIVE(nm_unregister_value(right));
|
783
897
|
rb_raise(rb_eTypeError, "unrecognized type for slice assignment");
|
784
898
|
}
|
785
899
|
|
900
|
+
NM_CONSERVATIVE(nm_unregister_value(right));
|
786
901
|
return std::make_pair<NMATRIX*,bool>(NULL, false);
|
787
902
|
}
|
788
903
|
|
@@ -796,11 +911,14 @@ namespace dense_storage {
|
|
796
911
|
template<typename LDType, typename RDType>
|
797
912
|
void ref_slice_copy_transposed(const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs) {
|
798
913
|
|
914
|
+
nm_dense_storage_register(rhs);
|
915
|
+
nm_dense_storage_register(lhs);
|
916
|
+
|
799
917
|
LDType* lhs_els = reinterpret_cast<LDType*>(lhs->elements);
|
800
918
|
RDType* rhs_els = reinterpret_cast<RDType*>(rhs->elements);
|
801
919
|
|
802
920
|
size_t count = nm_storage_count_max_elements(lhs);
|
803
|
-
size_t* temp_coords =
|
921
|
+
size_t* temp_coords = NM_ALLOCA_N(size_t, lhs->dim);
|
804
922
|
size_t coord_swap_temp;
|
805
923
|
|
806
924
|
while (count-- > 0) {
|
@@ -810,21 +928,28 @@ void ref_slice_copy_transposed(const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs) {
|
|
810
928
|
lhs_els[count] = rhs_els[r_coord];
|
811
929
|
}
|
812
930
|
|
931
|
+
nm_dense_storage_unregister(rhs);
|
932
|
+
nm_dense_storage_unregister(lhs);
|
933
|
+
|
813
934
|
}
|
814
935
|
|
815
936
|
template <typename LDType, typename RDType>
|
816
937
|
DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, dtype_t new_dtype) {
|
938
|
+
nm_dense_storage_register(rhs);
|
939
|
+
|
817
940
|
size_t count = nm_storage_count_max_elements(rhs);
|
818
941
|
|
819
|
-
size_t *shape =
|
942
|
+
size_t *shape = NM_ALLOC_N(size_t, rhs->dim);
|
820
943
|
memcpy(shape, rhs->shape, sizeof(size_t) * rhs->dim);
|
821
944
|
|
822
|
-
DENSE_STORAGE* lhs
|
945
|
+
DENSE_STORAGE* lhs = nm_dense_storage_create(new_dtype, shape, rhs->dim, NULL, 0);
|
946
|
+
|
947
|
+
nm_dense_storage_register(lhs);
|
823
948
|
|
824
949
|
// Ensure that allocation worked before copying.
|
825
950
|
if (lhs && count) {
|
826
951
|
if (rhs->src != rhs) { // Make a copy of a ref to a matrix.
|
827
|
-
size_t* offset =
|
952
|
+
size_t* offset = NM_ALLOCA_N(size_t, rhs->dim);
|
828
953
|
memset(offset, 0, sizeof(size_t) * rhs->dim);
|
829
954
|
|
830
955
|
slice_copy(lhs, reinterpret_cast<const DENSE_STORAGE*>(rhs->src),
|
@@ -832,54 +957,72 @@ DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, dtype_t new_dtype) {
|
|
832
957
|
nm_dense_storage_pos(rhs, offset), 0);
|
833
958
|
|
834
959
|
} else { // Make a regular copy.
|
835
|
-
RDType*
|
836
|
-
LDType* lhs_els
|
960
|
+
RDType* rhs_els = reinterpret_cast<RDType*>(rhs->elements);
|
961
|
+
LDType* lhs_els = reinterpret_cast<LDType*>(lhs->elements);
|
837
962
|
|
838
|
-
|
963
|
+
for (size_t i = 0; i < count; ++i)
|
964
|
+
lhs_els[i] = rhs_els[i];
|
839
965
|
}
|
840
966
|
}
|
841
967
|
|
968
|
+
nm_dense_storage_unregister(rhs);
|
969
|
+
nm_dense_storage_unregister(lhs);
|
970
|
+
|
842
971
|
return lhs;
|
843
972
|
}
|
844
973
|
|
845
974
|
template <typename LDType, typename RDType>
|
846
975
|
bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right) {
|
976
|
+
nm_dense_storage_register(left);
|
977
|
+
nm_dense_storage_register(right);
|
978
|
+
|
847
979
|
size_t index;
|
848
980
|
DENSE_STORAGE *tmp1, *tmp2;
|
849
981
|
tmp1 = NULL; tmp2 = NULL;
|
850
982
|
bool result = true;
|
851
983
|
/* FIXME: Very strange behavior! The GC calls the method directly with non-initialized data. */
|
852
|
-
if (left->dim != right->dim)
|
853
|
-
|
984
|
+
if (left->dim != right->dim) {
|
985
|
+
nm_dense_storage_unregister(right);
|
986
|
+
nm_dense_storage_unregister(left);
|
987
|
+
return false;
|
988
|
+
}
|
854
989
|
|
855
|
-
|
856
|
-
RDType* right_elements
|
990
|
+
LDType* left_elements = (LDType*)left->elements;
|
991
|
+
RDType* right_elements = (RDType*)right->elements;
|
857
992
|
|
858
993
|
// Copy elements in temp matrix if you have reference to the right.
|
859
994
|
if (left->src != left) {
|
860
995
|
tmp1 = nm_dense_storage_copy(left);
|
996
|
+
nm_dense_storage_register(tmp1);
|
861
997
|
left_elements = (LDType*)tmp1->elements;
|
862
998
|
}
|
863
999
|
if (right->src != right) {
|
864
1000
|
tmp2 = nm_dense_storage_copy(right);
|
1001
|
+
nm_dense_storage_register(tmp2);
|
865
1002
|
right_elements = (RDType*)tmp2->elements;
|
866
1003
|
}
|
867
1004
|
|
868
1005
|
|
869
1006
|
|
870
|
-
|
871
|
-
|
1007
|
+
for (index = nm_storage_count_max_elements(left); index-- > 0;) {
|
1008
|
+
if (left_elements[index] != right_elements[index]) {
|
872
1009
|
result = false;
|
873
1010
|
break;
|
874
1011
|
}
|
875
|
-
|
1012
|
+
}
|
876
1013
|
|
877
|
-
if (tmp1)
|
878
|
-
|
879
|
-
|
880
|
-
|
1014
|
+
if (tmp1) {
|
1015
|
+
nm_dense_storage_unregister(tmp1);
|
1016
|
+
NM_FREE(tmp1);
|
1017
|
+
}
|
1018
|
+
if (tmp2) {
|
1019
|
+
nm_dense_storage_unregister(tmp2);
|
1020
|
+
NM_FREE(tmp2);
|
1021
|
+
}
|
881
1022
|
|
882
|
-
|
1023
|
+
nm_dense_storage_unregister(left);
|
1024
|
+
nm_dense_storage_unregister(right);
|
1025
|
+
return result;
|
883
1026
|
}
|
884
1027
|
|
885
1028
|
template <typename DType>
|
@@ -929,11 +1072,16 @@ static DENSE_STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t
|
|
929
1072
|
DENSE_STORAGE *left = (DENSE_STORAGE*)(casted_storage.left),
|
930
1073
|
*right = (DENSE_STORAGE*)(casted_storage.right);
|
931
1074
|
|
1075
|
+
nm_dense_storage_register(left);
|
1076
|
+
nm_dense_storage_register(right);
|
1077
|
+
|
932
1078
|
// Create result storage.
|
933
1079
|
DENSE_STORAGE* result = nm_dense_storage_create(left->dtype, resulting_shape, 2, NULL, 0);
|
934
1080
|
|
935
|
-
|
936
|
-
|
1081
|
+
nm_dense_storage_register(result);
|
1082
|
+
|
1083
|
+
DType *pAlpha = NM_ALLOCA_N(DType, 1),
|
1084
|
+
*pBeta = NM_ALLOCA_N(DType, 1);
|
937
1085
|
|
938
1086
|
*pAlpha = 1;
|
939
1087
|
*pBeta = 0;
|
@@ -947,6 +1095,11 @@ static DENSE_STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t
|
|
947
1095
|
reinterpret_cast<DType*>(right->elements), right->shape[1], pBeta,
|
948
1096
|
reinterpret_cast<DType*>(result->elements), result->shape[1]);
|
949
1097
|
|
1098
|
+
|
1099
|
+
nm_dense_storage_unregister(left);
|
1100
|
+
nm_dense_storage_unregister(right);
|
1101
|
+
nm_dense_storage_unregister(result);
|
1102
|
+
|
950
1103
|
return result;
|
951
1104
|
}
|
952
1105
|
|