nmatrix 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. data/.gitignore +3 -0
  2. data/CONTRIBUTING.md +66 -0
  3. data/Gemfile +1 -1
  4. data/History.txt +68 -10
  5. data/LICENSE.txt +2 -2
  6. data/Manifest.txt +2 -0
  7. data/README.rdoc +90 -69
  8. data/Rakefile +18 -9
  9. data/ext/nmatrix/data/complex.h +7 -7
  10. data/ext/nmatrix/data/data.cpp +2 -7
  11. data/ext/nmatrix/data/data.h +7 -4
  12. data/ext/nmatrix/data/rational.h +2 -2
  13. data/ext/nmatrix/data/ruby_object.h +3 -10
  14. data/ext/nmatrix/extconf.rb +79 -54
  15. data/ext/nmatrix/new_extconf.rb +11 -12
  16. data/ext/nmatrix/nmatrix.cpp +94 -125
  17. data/ext/nmatrix/nmatrix.h +38 -17
  18. data/ext/nmatrix/ruby_constants.cpp +2 -15
  19. data/ext/nmatrix/ruby_constants.h +2 -14
  20. data/ext/nmatrix/storage/common.cpp +2 -2
  21. data/ext/nmatrix/storage/common.h +2 -2
  22. data/ext/nmatrix/storage/dense.cpp +206 -31
  23. data/ext/nmatrix/storage/dense.h +5 -2
  24. data/ext/nmatrix/storage/list.cpp +52 -4
  25. data/ext/nmatrix/storage/list.h +3 -2
  26. data/ext/nmatrix/storage/storage.cpp +6 -6
  27. data/ext/nmatrix/storage/storage.h +2 -2
  28. data/ext/nmatrix/storage/yale.cpp +202 -49
  29. data/ext/nmatrix/storage/yale.h +5 -4
  30. data/ext/nmatrix/ttable_helper.rb +108 -108
  31. data/ext/nmatrix/types.h +2 -15
  32. data/ext/nmatrix/util/io.cpp +2 -2
  33. data/ext/nmatrix/util/io.h +2 -2
  34. data/ext/nmatrix/util/lapack.h +2 -2
  35. data/ext/nmatrix/util/math.cpp +14 -14
  36. data/ext/nmatrix/util/math.h +2 -2
  37. data/ext/nmatrix/util/sl_list.cpp +2 -2
  38. data/ext/nmatrix/util/sl_list.h +2 -2
  39. data/ext/nmatrix/util/util.h +2 -2
  40. data/lib/nmatrix.rb +13 -35
  41. data/lib/nmatrix/blas.rb +182 -56
  42. data/lib/nmatrix/io/market.rb +38 -14
  43. data/lib/nmatrix/io/mat5_reader.rb +393 -278
  44. data/lib/nmatrix/io/mat_reader.rb +121 -107
  45. data/lib/nmatrix/lapack.rb +59 -14
  46. data/lib/nmatrix/monkeys.rb +32 -30
  47. data/lib/nmatrix/nmatrix.rb +204 -100
  48. data/lib/nmatrix/nvector.rb +166 -57
  49. data/lib/nmatrix/shortcuts.rb +364 -231
  50. data/lib/nmatrix/version.rb +8 -4
  51. data/nmatrix.gemspec +5 -3
  52. data/scripts/mac-brew-gcc.sh +1 -1
  53. data/spec/blas_spec.rb +80 -2
  54. data/spec/math_spec.rb +78 -32
  55. data/spec/nmatrix_list_spec.rb +55 -55
  56. data/spec/nmatrix_spec.rb +60 -117
  57. data/spec/nmatrix_yale_resize_test_associations.yaml +2802 -0
  58. data/spec/nmatrix_yale_spec.rb +214 -198
  59. data/spec/nvector_spec.rb +58 -2
  60. data/spec/shortcuts_spec.rb +156 -32
  61. data/spec/slice_spec.rb +229 -178
  62. data/spec/spec_helper.rb +2 -2
  63. metadata +71 -21
@@ -9,8 +9,8 @@
9
9
  //
10
10
  // == Copyright Information
11
11
  //
12
- // SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
13
- // NMatrix is Copyright (c) 2012, Ruby Science Foundation
12
+ // SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2013, Ruby Science Foundation
14
14
  //
15
15
  // Please see LICENSE.txt for additional copyright notices.
16
16
  //
@@ -42,8 +42,6 @@
42
42
  #include <string.h>
43
43
  #endif
44
44
 
45
-
46
-
47
45
  #ifdef BENCHMARK
48
46
  // SOURCE: http://stackoverflow.com/questions/2349776/how-can-i-benchmark-a-c-program-easily
49
47
  #ifdef __cplusplus
@@ -55,16 +53,10 @@
55
53
  #endif
56
54
  #endif
57
55
 
58
- /*
59
- * Project Includes
60
- */
61
-
62
-
63
56
  /*
64
57
  * Macros
65
58
  */
66
59
 
67
-
68
60
  #define RUBY_ZERO INT2FIX(0)
69
61
 
70
62
  #ifndef SIZEOF_INT
@@ -112,6 +104,33 @@
112
104
 
113
105
  #ifdef __cplusplus /* These are the C++ versions of the macros. */
114
106
 
107
+ /*
108
+ * If no block is given, return an enumerator. This copied straight out of ruby's include/ruby/intern.h.
109
+ *
110
+ * rb_enumeratorize is located in enumerator.c.
111
+ *
112
+ * VALUE rb_enumeratorize(VALUE obj, VALUE meth, int argc, VALUE *argv) {
113
+ * return enumerator_init(enumerator_allocate(rb_cEnumerator), obj, meth, argc, argv);
114
+ * }
115
+ */
116
+ #ifdef RUBY_2
117
+ #ifndef RETURN_SIZED_ENUMERATOR
118
+ #undef RETURN_SIZED_ENUMERATOR
119
+ // Ruby 2.0 and higher has rb_enumeratorize_with_size instead of rb_enumeratorize.
120
+ // We want to support both in the simplest way possible.
121
+ #define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn) do { \
122
+ if (!rb_block_given_p()) \
123
+ return rb_enumeratorize_with_size((obj), ID2SYM(rb_frame_this_func()), (argc), (argv), (size_fn)); \
124
+ } while (0)
125
+ #endif
126
+ #else
127
+ #undef RETURN_SIZED_ENUMERATOR
128
+ #define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn) do { \
129
+ if (!rb_block_given_p()) \
130
+ return rb_enumeratorize((obj), ID2SYM(rb_frame_this_func()), (argc), (argv)); \
131
+ } while (0)
132
+ #endif
133
+
115
134
  #define NM_DECL_ENUM(enum_type, name) nm::enum_type name
116
135
  #define NM_DECL_STRUCT(type, name) type name;
117
136
 
@@ -144,7 +163,7 @@
144
163
  #else /* These are the C versions of the macros. */
145
164
 
146
165
  #define NM_DECL_ENUM(enum_type, name) nm_ ## enum_type name
147
- #define NM_DECL_STRUCT(type, name) NM_ ## type name;
166
+ #define NM_DECL_STRUCT(type, name) struct NM_ ## type name;
148
167
 
149
168
  #define NM_DEF_STORAGE_ELEMENTS \
150
169
  NM_DECL_ENUM(dtype_t, dtype); \
@@ -313,28 +332,30 @@ NM_DEF_STRUCT_POST(NMATRIX); // };
313
332
  (rb_obj_is_kind_of(obj, cNVector) == Qtrue)
314
333
 
315
334
 
335
+ #ifdef __cplusplus
316
336
  typedef VALUE (*METHOD)(...);
317
337
 
318
- #ifdef __cplusplus
319
338
  //}; // end of namespace nm
320
339
  #endif
321
340
 
322
- /*
323
- * Data
324
- */
325
-
326
341
  /*
327
342
  * Functions
328
343
  */
329
344
 
345
+ #ifdef __cplusplus
330
346
  extern "C" {
347
+ #endif
348
+
331
349
  void Init_nmatrix();
332
-
350
+
333
351
  // External API
334
352
  VALUE rb_nmatrix_dense_create(NM_DECL_ENUM(dtype_t, dtype), size_t* shape, size_t dim, void* elements, size_t length);
335
353
  VALUE rb_nvector_dense_create(NM_DECL_ENUM(dtype_t, dtype), void* elements, size_t length);
336
354
 
337
355
  NM_DECL_ENUM(dtype_t, nm_dtype_guess(VALUE)); // (This is a function)
356
+
357
+ #ifdef __cplusplus
338
358
  }
359
+ #endif
339
360
 
340
361
  #endif // NMATRIX_H
@@ -9,8 +9,8 @@
9
9
  //
10
10
  // == Copyright Information
11
11
  //
12
- // SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
13
- // NMatrix is Copyright (c) 2012, Ruby Science Foundation
12
+ // SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2013, Ruby Science Foundation
14
14
  //
15
15
  // Please see LICENSE.txt for additional copyright notices.
16
16
  //
@@ -31,14 +31,6 @@
31
31
 
32
32
  #include <ruby.h>
33
33
 
34
- /*
35
- * Project Includes
36
- */
37
-
38
- /*
39
- * Macros
40
- */
41
-
42
34
  /*
43
35
  * Global Variables
44
36
  */
@@ -89,10 +81,6 @@ VALUE cNMatrix,
89
81
  nm_eDataTypeError,
90
82
  nm_eStorageTypeError;
91
83
 
92
- /*
93
- * Forward Declarations
94
- */
95
-
96
84
  /*
97
85
  * Functions
98
86
  */
@@ -135,4 +123,3 @@ void nm_init_ruby_constants(void) {
135
123
  nm_rb_unit = rb_intern("unit");
136
124
  nm_rb_nonunit = rb_intern("nonunit");
137
125
  }
138
-
@@ -9,8 +9,8 @@
9
9
  //
10
10
  // == Copyright Information
11
11
  //
12
- // SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
13
- // NMatrix is Copyright (c) 2012, Ruby Science Foundation
12
+ // SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2013, Ruby Science Foundation
14
14
  //
15
15
  // Please see LICENSE.txt for additional copyright notices.
16
16
  //
@@ -34,18 +34,6 @@
34
34
 
35
35
  #include <ruby.h>
36
36
 
37
- /*
38
- * Project Includes
39
- */
40
-
41
- /*
42
- * Macros
43
- */
44
-
45
- /*
46
- * Types
47
- */
48
-
49
37
  /*
50
38
  * Data
51
39
  */
@@ -9,8 +9,8 @@
9
9
  //
10
10
  // == Copyright Information
11
11
  //
12
- // SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
13
- // NMatrix is Copyright (c) 2012, Ruby Science Foundation
12
+ // SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2013, Ruby Science Foundation
14
14
  //
15
15
  // Please see LICENSE.txt for additional copyright notices.
16
16
  //
@@ -9,8 +9,8 @@
9
9
  //
10
10
  // == Copyright Information
11
11
  //
12
- // SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
13
- // NMatrix is Copyright (c) 2012, Ruby Science Foundation
12
+ // SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2013, Ruby Science Foundation
14
14
  //
15
15
  // Please see LICENSE.txt for additional copyright notices.
16
16
  //
@@ -9,8 +9,8 @@
9
9
  //
10
10
  // == Copyright Information
11
11
  //
12
- // SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
13
- // NMatrix is Copyright (c) 2012, Ruby Science Foundation
12
+ // SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2013, Ruby Science Foundation
14
14
  //
15
15
  // Please see LICENSE.txt for additional copyright notices.
16
16
  //
@@ -55,6 +55,9 @@
55
55
 
56
56
  namespace nm { namespace dense_storage {
57
57
 
58
+ template<typename LDType, typename RDType>
59
+ void ref_slice_copy_transposed(const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs);
60
+
58
61
  template <typename LDType, typename RDType>
59
62
  DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);
60
63
 
@@ -89,13 +92,14 @@ static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* le
89
92
  // Lifecycle //
90
93
  ///////////////
91
94
 
95
+
92
96
  /*
93
- * Note that elements and elements_length are for initial value(s) passed in.
94
- * If they are the correct length, they will be used directly. If not, they
95
- * will be concatenated over and over again into a new elements array. If
96
- * elements is NULL, the new elements array will not be initialized.
97
+ * This creates a dummy with all the properties of dense storage, but no actual elements allocation.
98
+ *
99
+ * elements will be NULL when this function finishes. You can clean up with nm_dense_storage_delete, which will
100
+ * check for that NULL pointer before freeing elements.
97
101
  */
98
- DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t dim, void* elements, size_t elements_length) {
102
+ static DENSE_STORAGE* nm_dense_storage_create_dummy(nm::dtype_t dtype, size_t* shape, size_t dim) {
99
103
  DENSE_STORAGE* s = ALLOC( DENSE_STORAGE );
100
104
 
101
105
  s->dim = dim;
@@ -108,12 +112,27 @@ DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t
108
112
  s->stride = stride(shape, dim);
109
113
  s->count = 1;
110
114
  s->src = s;
111
-
112
- size_t count = nm_storage_count_max_elements(s);
115
+
116
+ s->elements = NULL;
117
+
118
+ return s;
119
+ }
120
+
121
+
122
+ /*
123
+ * Note that elements and elements_length are for initial value(s) passed in.
124
+ * If they are the correct length, they will be used directly. If not, they
125
+ * will be concatenated over and over again into a new elements array. If
126
+ * elements is NULL, the new elements array will not be initialized.
127
+ */
128
+ DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t dim, void* elements, size_t elements_length) {
129
+
130
+ DENSE_STORAGE* s = nm_dense_storage_create_dummy(dtype, shape, dim);
131
+ size_t count = nm_storage_count_max_elements(s);
113
132
 
114
133
  if (elements_length == count) {
115
134
  s->elements = elements;
116
-
135
+
117
136
  } else {
118
137
  s->elements = ALLOC_N(char, DTYPE_SIZES[dtype]*count);
119
138
 
@@ -138,8 +157,9 @@ DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t
138
157
  return s;
139
158
  }
140
159
 
160
+
141
161
  /*
142
- * Destructor for dense storage
162
+ * Destructor for dense storage. Make sure when you update this you also update nm_dense_storage_delete_dummy.
143
163
  */
144
164
  void nm_dense_storage_delete(STORAGE* s) {
145
165
  // Sometimes Ruby passes in NULL storage for some reason (probably on copy construction failure).
@@ -149,7 +169,8 @@ void nm_dense_storage_delete(STORAGE* s) {
149
169
  free(storage->shape);
150
170
  free(storage->offset);
151
171
  free(storage->stride);
152
- free(storage->elements);
172
+ if (storage->elements != NULL) // happens with dummy objects
173
+ free(storage->elements);
153
174
  free(storage);
154
175
  }
155
176
  }
@@ -188,6 +209,101 @@ void nm_dense_storage_mark(void* storage_base) {
188
209
  // Accessors //
189
210
  ///////////////
190
211
 
212
+
213
+ // Helper function used only for the RETURN_SIZED_ENUMERATOR macro. Returns the length of
214
+ // the matrix's storage.
215
+ static VALUE nm_dense_enumerator_length(VALUE nmatrix) {
216
+ long len = nm_storage_count_max_elements(NM_STORAGE_DENSE(nmatrix));
217
+ return LONG2NUM(len);
218
+ }
219
+
220
+
221
+ VALUE nm_dense_each_with_indices(VALUE nmatrix) {
222
+ volatile VALUE nm = nmatrix;
223
+
224
+ DENSE_STORAGE* s = NM_STORAGE_DENSE(nm);
225
+
226
+ RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_dense_enumerator_length); // fourth argument only used by Ruby2+
227
+
228
+ // Create indices and initialize them to zero
229
+ size_t* coords = ALLOCA_N(size_t, s->dim);
230
+ memset(coords, 0, sizeof(size_t) * s->dim);
231
+
232
+ size_t slice_index;
233
+ size_t* shape_copy = ALLOC_N(size_t, s->dim);
234
+ memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
235
+
236
+ DENSE_STORAGE* sliced_dummy = nm_dense_storage_create_dummy(s->dtype, shape_copy, s->dim);
237
+
238
+ for (size_t k = 0; k < nm_storage_count_max_elements(s); ++k) {
239
+ nm_dense_storage_coords(sliced_dummy, k, coords);
240
+ slice_index = nm_dense_storage_pos(s, coords);
241
+ VALUE ary = rb_ary_new();
242
+ if (NM_DTYPE(nm) == nm::RUBYOBJ) rb_ary_push(ary, reinterpret_cast<VALUE*>(s->elements)[slice_index]);
243
+ else rb_ary_push(ary, rubyobj_from_cval((char*)(s->elements) + slice_index*DTYPE_SIZES[NM_DTYPE(nm)], NM_DTYPE(nm)).rval);
244
+
245
+ for (size_t p = 0; p < s->dim; ++p) {
246
+ rb_ary_push(ary, INT2FIX(coords[p]));
247
+ }
248
+
249
+ // yield the array which now consists of the value and the indices
250
+ rb_yield(ary);
251
+
252
+ }
253
+
254
+ nm_dense_storage_delete(sliced_dummy);
255
+
256
+ return Qnil;
257
+
258
+ }
259
+
260
+
261
+ /*
262
+ * Borrowed this function from NArray. Handles 'each' iteration on a dense
263
+ * matrix.
264
+ *
265
+ * Additionally, handles separately matrices containing VALUEs and matrices
266
+ * containing other types of data.
267
+ */
268
+ VALUE nm_dense_each(VALUE nmatrix) {
269
+ volatile VALUE nm = nmatrix; // Not sure this actually does anything.
270
+ DENSE_STORAGE* s = NM_STORAGE_DENSE(nm);
271
+
272
+ RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_storage_count_max_elements(s));
273
+
274
+ size_t* temp_coords = ALLOCA_N(size_t, s->dim);
275
+ size_t sliced_index;
276
+ size_t* shape_copy = ALLOC_N(size_t, s->dim);
277
+ memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
278
+ DENSE_STORAGE* sliced_dummy = nm_dense_storage_create_dummy(s->dtype, shape_copy, s->dim);
279
+
280
+ if (NM_DTYPE(nm) == nm::RUBYOBJ) {
281
+
282
+ // matrix of Ruby objects -- yield those objects directly
283
+ for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i)
284
+ nm_dense_storage_coords(sliced_dummy, i, temp_coords);
285
+ sliced_index = nm_dense_storage_pos(s, temp_coords);
286
+ rb_yield( reinterpret_cast<VALUE*>(s->elements)[sliced_index] );
287
+
288
+ } else {
289
+
290
+ // We're going to copy the matrix element into a Ruby VALUE and then operate on it. This way user can't accidentally
291
+ // modify it and cause a seg fault.
292
+ for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i) {
293
+ nm_dense_storage_coords(sliced_dummy, i, temp_coords);
294
+ sliced_index = nm_dense_storage_pos(s, temp_coords);
295
+ VALUE v = rubyobj_from_cval((char*)(s->elements) + sliced_index*DTYPE_SIZES[NM_DTYPE(nm)], NM_DTYPE(nm)).rval;
296
+ rb_yield( v ); // yield to the copy we made
297
+ }
298
+ }
299
+
300
+ nm_dense_storage_delete(sliced_dummy);
301
+
302
+ return Qnil;
303
+
304
+ }
305
+
306
+
191
307
  /*
192
308
  * Get a slice or one element, using copying.
193
309
  *
@@ -348,6 +464,25 @@ size_t nm_dense_storage_pos(const DENSE_STORAGE* s, const size_t* coords) {
348
464
  pos += (coords[i] + s->offset[i]) * s->stride[i];
349
465
 
350
466
  return pos;
467
+
468
+ }
469
+
470
+ /*
471
+ * Determine the a set of slice coordinates from linear array position (in elements
472
+ * of s) of some set of coordinates (given by slice). (Inverse of
473
+ * nm_dense_storage_pos).
474
+ *
475
+ * The parameter coords_out should be a pre-allocated array of size equal to s->dim.
476
+ */
477
+ void nm_dense_storage_coords(const DENSE_STORAGE* s, const size_t slice_pos, size_t* coords_out) {
478
+
479
+ size_t temp_pos = slice_pos;
480
+
481
+ for (size_t i = 0; i < s->dim; ++i) {
482
+ coords_out[i] = (temp_pos - temp_pos % s->stride[i])/s->stride[i] - s->offset[i];
483
+ temp_pos = temp_pos % s->stride[i];
484
+ }
485
+
351
486
  }
352
487
 
353
488
  /*
@@ -454,7 +589,12 @@ STORAGE* nm_dense_storage_copy_transposed(const STORAGE* rhs_base) {
454
589
  lhs->offset[0] = rhs->offset[1];
455
590
  lhs->offset[1] = rhs->offset[0];
456
591
 
457
- nm_math_transpose_generic(rhs->shape[0], rhs->shape[1], rhs->elements, rhs->shape[1], lhs->elements, lhs->shape[1], DTYPE_SIZES[rhs->dtype]);
592
+ if (rhs_base->src == rhs_base) {
593
+ nm_math_transpose_generic(rhs->shape[0], rhs->shape[1], rhs->elements, rhs->shape[1], lhs->elements, lhs->shape[1], DTYPE_SIZES[rhs->dtype]);
594
+ } else {
595
+ NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::ref_slice_copy_transposed, void, const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs);
596
+ ttable[lhs->dtype][rhs->dtype](rhs, lhs);
597
+ }
458
598
 
459
599
  return (STORAGE*)lhs;
460
600
  }
@@ -467,6 +607,25 @@ namespace nm { namespace dense_storage {
467
607
  // Templated Functions //
468
608
  /////////////////////////
469
609
 
610
+ template<typename LDType, typename RDType>
611
+ void ref_slice_copy_transposed(const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs) {
612
+
613
+ LDType* lhs_els = reinterpret_cast<LDType*>(lhs->elements);
614
+ RDType* rhs_els = reinterpret_cast<RDType*>(rhs->elements);
615
+
616
+ size_t count = nm_storage_count_max_elements(lhs);
617
+ size_t* temp_coords = ALLOCA_N(size_t, lhs->dim);
618
+ size_t coord_swap_temp;
619
+
620
+ while (count-- > 0) {
621
+ nm_dense_storage_coords(lhs, count, temp_coords);
622
+ NM_SWAP(temp_coords[0], temp_coords[1], coord_swap_temp);
623
+ size_t r_coord = nm_dense_storage_pos(rhs, temp_coords);
624
+ lhs_els[count] = rhs_els[r_coord];
625
+ }
626
+
627
+ }
628
+
470
629
  template <typename LDType, typename RDType>
471
630
  DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, dtype_t new_dtype) {
472
631
  size_t count = nm_storage_count_max_elements(rhs);
@@ -584,8 +743,12 @@ bool is_symmetric(const DENSE_STORAGE* mat, int lda) {
584
743
  template <ewop_t op, typename LDType, typename RDType>
585
744
  static DENSE_STORAGE* ew_op(const DENSE_STORAGE* left, const DENSE_STORAGE* right, const void* rscalar) {
586
745
  unsigned int count;
587
-
588
- size_t* new_shape = (size_t*)calloc(left->dim, sizeof(size_t));
746
+ size_t l_count;
747
+ size_t r_count;
748
+
749
+ size_t* temp_coords = ALLOCA_N(size_t, left->dim);
750
+
751
+ size_t* new_shape = ALLOC_N(size_t, left->dim);
589
752
  memcpy(new_shape, left->shape, sizeof(size_t) * left->dim);
590
753
 
591
754
  // Determine the return dtype. This depends on the type of operation we're doing. Usually, it's going to be
@@ -602,36 +765,44 @@ static DENSE_STORAGE* ew_op(const DENSE_STORAGE* left, const DENSE_STORAGE* righ
602
765
  if (static_cast<uint8_t>(op) < NUM_NONCOMP_EWOPS) { // use left-dtype
603
766
 
604
767
  for (count = nm_storage_count_max_elements(result); count-- > 0;) {
605
- reinterpret_cast<LDType*>(result->elements)[count] = ew_op_switch<op,LDType,RDType>(l_elems[count], r_elems[count]);
768
+ nm_dense_storage_coords(result, count, temp_coords);
769
+ l_count = nm_dense_storage_pos(left, temp_coords);
770
+ r_count = nm_dense_storage_pos(right, temp_coords);
771
+
772
+ reinterpret_cast<LDType*>(result->elements)[count] = ew_op_switch<op,LDType,RDType>(l_elems[l_count], r_elems[r_count]);
606
773
  }
607
774
 
608
775
  } else { // new_dtype is BYTE: comparison operators
609
776
  uint8_t* res_elems = reinterpret_cast<uint8_t*>(result->elements);
610
777
 
611
778
  for (count = nm_storage_count_max_elements(result); count-- > 0;) {
779
+ nm_dense_storage_coords(result, count, temp_coords);
780
+ l_count = nm_dense_storage_pos(left, temp_coords);
781
+ r_count = nm_dense_storage_pos(right, temp_coords);
782
+
612
783
  switch (op) {
613
784
  case EW_EQEQ:
614
- res_elems[count] = l_elems[count] == r_elems[count];
785
+ res_elems[count] = l_elems[l_count] == r_elems[r_count];
615
786
  break;
616
787
 
617
788
  case EW_NEQ:
618
- res_elems[count] = l_elems[count] != r_elems[count];
789
+ res_elems[count] = l_elems[l_count] != r_elems[r_count];
619
790
  break;
620
791
 
621
792
  case EW_LT:
622
- res_elems[count] = l_elems[count] < r_elems[count];
793
+ res_elems[count] = l_elems[l_count] < r_elems[r_count];
623
794
  break;
624
795
 
625
796
  case EW_GT:
626
- res_elems[count] = l_elems[count] > r_elems[count];
797
+ res_elems[count] = l_elems[l_count] > r_elems[r_count];
627
798
  break;
628
799
 
629
800
  case EW_LEQ:
630
- res_elems[count] = l_elems[count] <= r_elems[count];
801
+ res_elems[count] = l_elems[l_count] <= r_elems[r_count];
631
802
  break;
632
803
 
633
804
  case EW_GEQ:
634
- res_elems[count] = l_elems[count] >= r_elems[count];
805
+ res_elems[count] = l_elems[l_count] >= r_elems[r_count];
635
806
  break;
636
807
 
637
808
  default:
@@ -646,36 +817,42 @@ static DENSE_STORAGE* ew_op(const DENSE_STORAGE* left, const DENSE_STORAGE* righ
646
817
  if (static_cast<uint8_t>(op) < NUM_NONCOMP_EWOPS) { // use left-dtype
647
818
 
648
819
  for (count = nm_storage_count_max_elements(result); count-- > 0;) {
649
- reinterpret_cast<LDType*>(result->elements)[count] = ew_op_switch<op,LDType,RDType>(l_elems[count], *r_elem);
820
+ nm_dense_storage_coords(result, count, temp_coords);
821
+ l_count = nm_dense_storage_pos(left, temp_coords);
822
+
823
+ reinterpret_cast<LDType*>(result->elements)[count] = ew_op_switch<op,LDType,RDType>(l_elems[l_count], *r_elem);
650
824
  }
651
825
 
652
826
  } else {
653
827
  uint8_t* res_elems = reinterpret_cast<uint8_t*>(result->elements);
654
828
 
655
829
  for (count = nm_storage_count_max_elements(result); count-- > 0;) {
830
+ nm_dense_storage_coords(result, count, temp_coords);
831
+ l_count = nm_dense_storage_pos(left, temp_coords);
832
+
656
833
  switch (op) {
657
834
  case EW_EQEQ:
658
- res_elems[count] = l_elems[count] == *r_elem;
835
+ res_elems[count] = l_elems[l_count] == *r_elem;
659
836
  break;
660
837
 
661
838
  case EW_NEQ:
662
- res_elems[count] = l_elems[count] != *r_elem;
839
+ res_elems[count] = l_elems[l_count] != *r_elem;
663
840
  break;
664
841
 
665
842
  case EW_LT:
666
- res_elems[count] = l_elems[count] < *r_elem;
843
+ res_elems[count] = l_elems[l_count] < *r_elem;
667
844
  break;
668
845
 
669
846
  case EW_GT:
670
- res_elems[count] = l_elems[count] > *r_elem;
847
+ res_elems[count] = l_elems[l_count] > *r_elem;
671
848
  break;
672
849
 
673
850
  case EW_LEQ:
674
- res_elems[count] = l_elems[count] <= *r_elem;
851
+ res_elems[count] = l_elems[l_count] <= *r_elem;
675
852
  break;
676
853
 
677
854
  case EW_GEQ:
678
- res_elems[count] = l_elems[count] >= *r_elem;
855
+ res_elems[count] = l_elems[l_count] >= *r_elem;
679
856
  break;
680
857
 
681
858
  default:
@@ -685,7 +862,6 @@ static DENSE_STORAGE* ew_op(const DENSE_STORAGE* left, const DENSE_STORAGE* righ
685
862
 
686
863
  }
687
864
  }
688
-
689
865
  return result;
690
866
  }
691
867
 
@@ -707,7 +883,6 @@ static DENSE_STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t
707
883
  *pAlpha = 1;
708
884
  *pBeta = 0;
709
885
  // Do the multiplication
710
-
711
886
  if (vector) nm::math::gemv<DType>(CblasNoTrans, left->shape[0], left->shape[1], pAlpha,
712
887
  reinterpret_cast<DType*>(left->elements), left->shape[1],
713
888
  reinterpret_cast<DType*>(right->elements), 1, pBeta,