nmatrix 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. data/.gitignore +3 -0
  2. data/CONTRIBUTING.md +66 -0
  3. data/Gemfile +1 -1
  4. data/History.txt +68 -10
  5. data/LICENSE.txt +2 -2
  6. data/Manifest.txt +2 -0
  7. data/README.rdoc +90 -69
  8. data/Rakefile +18 -9
  9. data/ext/nmatrix/data/complex.h +7 -7
  10. data/ext/nmatrix/data/data.cpp +2 -7
  11. data/ext/nmatrix/data/data.h +7 -4
  12. data/ext/nmatrix/data/rational.h +2 -2
  13. data/ext/nmatrix/data/ruby_object.h +3 -10
  14. data/ext/nmatrix/extconf.rb +79 -54
  15. data/ext/nmatrix/new_extconf.rb +11 -12
  16. data/ext/nmatrix/nmatrix.cpp +94 -125
  17. data/ext/nmatrix/nmatrix.h +38 -17
  18. data/ext/nmatrix/ruby_constants.cpp +2 -15
  19. data/ext/nmatrix/ruby_constants.h +2 -14
  20. data/ext/nmatrix/storage/common.cpp +2 -2
  21. data/ext/nmatrix/storage/common.h +2 -2
  22. data/ext/nmatrix/storage/dense.cpp +206 -31
  23. data/ext/nmatrix/storage/dense.h +5 -2
  24. data/ext/nmatrix/storage/list.cpp +52 -4
  25. data/ext/nmatrix/storage/list.h +3 -2
  26. data/ext/nmatrix/storage/storage.cpp +6 -6
  27. data/ext/nmatrix/storage/storage.h +2 -2
  28. data/ext/nmatrix/storage/yale.cpp +202 -49
  29. data/ext/nmatrix/storage/yale.h +5 -4
  30. data/ext/nmatrix/ttable_helper.rb +108 -108
  31. data/ext/nmatrix/types.h +2 -15
  32. data/ext/nmatrix/util/io.cpp +2 -2
  33. data/ext/nmatrix/util/io.h +2 -2
  34. data/ext/nmatrix/util/lapack.h +2 -2
  35. data/ext/nmatrix/util/math.cpp +14 -14
  36. data/ext/nmatrix/util/math.h +2 -2
  37. data/ext/nmatrix/util/sl_list.cpp +2 -2
  38. data/ext/nmatrix/util/sl_list.h +2 -2
  39. data/ext/nmatrix/util/util.h +2 -2
  40. data/lib/nmatrix.rb +13 -35
  41. data/lib/nmatrix/blas.rb +182 -56
  42. data/lib/nmatrix/io/market.rb +38 -14
  43. data/lib/nmatrix/io/mat5_reader.rb +393 -278
  44. data/lib/nmatrix/io/mat_reader.rb +121 -107
  45. data/lib/nmatrix/lapack.rb +59 -14
  46. data/lib/nmatrix/monkeys.rb +32 -30
  47. data/lib/nmatrix/nmatrix.rb +204 -100
  48. data/lib/nmatrix/nvector.rb +166 -57
  49. data/lib/nmatrix/shortcuts.rb +364 -231
  50. data/lib/nmatrix/version.rb +8 -4
  51. data/nmatrix.gemspec +5 -3
  52. data/scripts/mac-brew-gcc.sh +1 -1
  53. data/spec/blas_spec.rb +80 -2
  54. data/spec/math_spec.rb +78 -32
  55. data/spec/nmatrix_list_spec.rb +55 -55
  56. data/spec/nmatrix_spec.rb +60 -117
  57. data/spec/nmatrix_yale_resize_test_associations.yaml +2802 -0
  58. data/spec/nmatrix_yale_spec.rb +214 -198
  59. data/spec/nvector_spec.rb +58 -2
  60. data/spec/shortcuts_spec.rb +156 -32
  61. data/spec/slice_spec.rb +229 -178
  62. data/spec/spec_helper.rb +2 -2
  63. metadata +71 -21
@@ -9,8 +9,8 @@
9
9
  //
10
10
  // == Copyright Information
11
11
  //
12
- // SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
13
- // NMatrix is Copyright (c) 2012, Ruby Science Foundation
12
+ // SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2013, Ruby Science Foundation
14
14
  //
15
15
  // Please see LICENSE.txt for additional copyright notices.
16
16
  //
@@ -42,8 +42,6 @@
42
42
  #include <string.h>
43
43
  #endif
44
44
 
45
-
46
-
47
45
  #ifdef BENCHMARK
48
46
  // SOURCE: http://stackoverflow.com/questions/2349776/how-can-i-benchmark-a-c-program-easily
49
47
  #ifdef __cplusplus
@@ -55,16 +53,10 @@
55
53
  #endif
56
54
  #endif
57
55
 
58
- /*
59
- * Project Includes
60
- */
61
-
62
-
63
56
  /*
64
57
  * Macros
65
58
  */
66
59
 
67
-
68
60
  #define RUBY_ZERO INT2FIX(0)
69
61
 
70
62
  #ifndef SIZEOF_INT
@@ -112,6 +104,33 @@
112
104
 
113
105
  #ifdef __cplusplus /* These are the C++ versions of the macros. */
114
106
 
107
+ /*
108
+ * If no block is given, return an enumerator. This copied straight out of ruby's include/ruby/intern.h.
109
+ *
110
+ * rb_enumeratorize is located in enumerator.c.
111
+ *
112
+ * VALUE rb_enumeratorize(VALUE obj, VALUE meth, int argc, VALUE *argv) {
113
+ * return enumerator_init(enumerator_allocate(rb_cEnumerator), obj, meth, argc, argv);
114
+ * }
115
+ */
116
+ #ifdef RUBY_2
117
+ #ifndef RETURN_SIZED_ENUMERATOR
118
+ #undef RETURN_SIZED_ENUMERATOR
119
+ // Ruby 2.0 and higher has rb_enumeratorize_with_size instead of rb_enumeratorize.
120
+ // We want to support both in the simplest way possible.
121
+ #define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn) do { \
122
+ if (!rb_block_given_p()) \
123
+ return rb_enumeratorize_with_size((obj), ID2SYM(rb_frame_this_func()), (argc), (argv), (size_fn)); \
124
+ } while (0)
125
+ #endif
126
+ #else
127
+ #undef RETURN_SIZED_ENUMERATOR
128
+ #define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn) do { \
129
+ if (!rb_block_given_p()) \
130
+ return rb_enumeratorize((obj), ID2SYM(rb_frame_this_func()), (argc), (argv)); \
131
+ } while (0)
132
+ #endif
133
+
115
134
  #define NM_DECL_ENUM(enum_type, name) nm::enum_type name
116
135
  #define NM_DECL_STRUCT(type, name) type name;
117
136
 
@@ -144,7 +163,7 @@
144
163
  #else /* These are the C versions of the macros. */
145
164
 
146
165
  #define NM_DECL_ENUM(enum_type, name) nm_ ## enum_type name
147
- #define NM_DECL_STRUCT(type, name) NM_ ## type name;
166
+ #define NM_DECL_STRUCT(type, name) struct NM_ ## type name;
148
167
 
149
168
  #define NM_DEF_STORAGE_ELEMENTS \
150
169
  NM_DECL_ENUM(dtype_t, dtype); \
@@ -313,28 +332,30 @@ NM_DEF_STRUCT_POST(NMATRIX); // };
313
332
  (rb_obj_is_kind_of(obj, cNVector) == Qtrue)
314
333
 
315
334
 
335
+ #ifdef __cplusplus
316
336
  typedef VALUE (*METHOD)(...);
317
337
 
318
- #ifdef __cplusplus
319
338
  //}; // end of namespace nm
320
339
  #endif
321
340
 
322
- /*
323
- * Data
324
- */
325
-
326
341
  /*
327
342
  * Functions
328
343
  */
329
344
 
345
+ #ifdef __cplusplus
330
346
  extern "C" {
347
+ #endif
348
+
331
349
  void Init_nmatrix();
332
-
350
+
333
351
  // External API
334
352
  VALUE rb_nmatrix_dense_create(NM_DECL_ENUM(dtype_t, dtype), size_t* shape, size_t dim, void* elements, size_t length);
335
353
  VALUE rb_nvector_dense_create(NM_DECL_ENUM(dtype_t, dtype), void* elements, size_t length);
336
354
 
337
355
  NM_DECL_ENUM(dtype_t, nm_dtype_guess(VALUE)); // (This is a function)
356
+
357
+ #ifdef __cplusplus
338
358
  }
359
+ #endif
339
360
 
340
361
  #endif // NMATRIX_H
@@ -9,8 +9,8 @@
9
9
  //
10
10
  // == Copyright Information
11
11
  //
12
- // SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
13
- // NMatrix is Copyright (c) 2012, Ruby Science Foundation
12
+ // SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2013, Ruby Science Foundation
14
14
  //
15
15
  // Please see LICENSE.txt for additional copyright notices.
16
16
  //
@@ -31,14 +31,6 @@
31
31
 
32
32
  #include <ruby.h>
33
33
 
34
- /*
35
- * Project Includes
36
- */
37
-
38
- /*
39
- * Macros
40
- */
41
-
42
34
  /*
43
35
  * Global Variables
44
36
  */
@@ -89,10 +81,6 @@ VALUE cNMatrix,
89
81
  nm_eDataTypeError,
90
82
  nm_eStorageTypeError;
91
83
 
92
- /*
93
- * Forward Declarations
94
- */
95
-
96
84
  /*
97
85
  * Functions
98
86
  */
@@ -135,4 +123,3 @@ void nm_init_ruby_constants(void) {
135
123
  nm_rb_unit = rb_intern("unit");
136
124
  nm_rb_nonunit = rb_intern("nonunit");
137
125
  }
138
-
@@ -9,8 +9,8 @@
9
9
  //
10
10
  // == Copyright Information
11
11
  //
12
- // SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
13
- // NMatrix is Copyright (c) 2012, Ruby Science Foundation
12
+ // SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2013, Ruby Science Foundation
14
14
  //
15
15
  // Please see LICENSE.txt for additional copyright notices.
16
16
  //
@@ -34,18 +34,6 @@
34
34
 
35
35
  #include <ruby.h>
36
36
 
37
- /*
38
- * Project Includes
39
- */
40
-
41
- /*
42
- * Macros
43
- */
44
-
45
- /*
46
- * Types
47
- */
48
-
49
37
  /*
50
38
  * Data
51
39
  */
@@ -9,8 +9,8 @@
9
9
  //
10
10
  // == Copyright Information
11
11
  //
12
- // SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
13
- // NMatrix is Copyright (c) 2012, Ruby Science Foundation
12
+ // SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2013, Ruby Science Foundation
14
14
  //
15
15
  // Please see LICENSE.txt for additional copyright notices.
16
16
  //
@@ -9,8 +9,8 @@
9
9
  //
10
10
  // == Copyright Information
11
11
  //
12
- // SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
13
- // NMatrix is Copyright (c) 2012, Ruby Science Foundation
12
+ // SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2013, Ruby Science Foundation
14
14
  //
15
15
  // Please see LICENSE.txt for additional copyright notices.
16
16
  //
@@ -9,8 +9,8 @@
9
9
  //
10
10
  // == Copyright Information
11
11
  //
12
- // SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
13
- // NMatrix is Copyright (c) 2012, Ruby Science Foundation
12
+ // SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2013, Ruby Science Foundation
14
14
  //
15
15
  // Please see LICENSE.txt for additional copyright notices.
16
16
  //
@@ -55,6 +55,9 @@
55
55
 
56
56
  namespace nm { namespace dense_storage {
57
57
 
58
+ template<typename LDType, typename RDType>
59
+ void ref_slice_copy_transposed(const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs);
60
+
58
61
  template <typename LDType, typename RDType>
59
62
  DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);
60
63
 
@@ -89,13 +92,14 @@ static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* le
89
92
  // Lifecycle //
90
93
  ///////////////
91
94
 
95
+
92
96
  /*
93
- * Note that elements and elements_length are for initial value(s) passed in.
94
- * If they are the correct length, they will be used directly. If not, they
95
- * will be concatenated over and over again into a new elements array. If
96
- * elements is NULL, the new elements array will not be initialized.
97
+ * This creates a dummy with all the properties of dense storage, but no actual elements allocation.
98
+ *
99
+ * elements will be NULL when this function finishes. You can clean up with nm_dense_storage_delete, which will
100
+ * check for that NULL pointer before freeing elements.
97
101
  */
98
- DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t dim, void* elements, size_t elements_length) {
102
+ static DENSE_STORAGE* nm_dense_storage_create_dummy(nm::dtype_t dtype, size_t* shape, size_t dim) {
99
103
  DENSE_STORAGE* s = ALLOC( DENSE_STORAGE );
100
104
 
101
105
  s->dim = dim;
@@ -108,12 +112,27 @@ DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t
108
112
  s->stride = stride(shape, dim);
109
113
  s->count = 1;
110
114
  s->src = s;
111
-
112
- size_t count = nm_storage_count_max_elements(s);
115
+
116
+ s->elements = NULL;
117
+
118
+ return s;
119
+ }
120
+
121
+
122
+ /*
123
+ * Note that elements and elements_length are for initial value(s) passed in.
124
+ * If they are the correct length, they will be used directly. If not, they
125
+ * will be concatenated over and over again into a new elements array. If
126
+ * elements is NULL, the new elements array will not be initialized.
127
+ */
128
+ DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t dim, void* elements, size_t elements_length) {
129
+
130
+ DENSE_STORAGE* s = nm_dense_storage_create_dummy(dtype, shape, dim);
131
+ size_t count = nm_storage_count_max_elements(s);
113
132
 
114
133
  if (elements_length == count) {
115
134
  s->elements = elements;
116
-
135
+
117
136
  } else {
118
137
  s->elements = ALLOC_N(char, DTYPE_SIZES[dtype]*count);
119
138
 
@@ -138,8 +157,9 @@ DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t
138
157
  return s;
139
158
  }
140
159
 
160
+
141
161
  /*
142
- * Destructor for dense storage
162
+ * Destructor for dense storage. Make sure when you update this you also update nm_dense_storage_delete_dummy.
143
163
  */
144
164
  void nm_dense_storage_delete(STORAGE* s) {
145
165
  // Sometimes Ruby passes in NULL storage for some reason (probably on copy construction failure).
@@ -149,7 +169,8 @@ void nm_dense_storage_delete(STORAGE* s) {
149
169
  free(storage->shape);
150
170
  free(storage->offset);
151
171
  free(storage->stride);
152
- free(storage->elements);
172
+ if (storage->elements != NULL) // happens with dummy objects
173
+ free(storage->elements);
153
174
  free(storage);
154
175
  }
155
176
  }
@@ -188,6 +209,101 @@ void nm_dense_storage_mark(void* storage_base) {
188
209
  // Accessors //
189
210
  ///////////////
190
211
 
212
+
213
+ // Helper function used only for the RETURN_SIZED_ENUMERATOR macro. Returns the length of
214
+ // the matrix's storage.
215
+ static VALUE nm_dense_enumerator_length(VALUE nmatrix) {
216
+ long len = nm_storage_count_max_elements(NM_STORAGE_DENSE(nmatrix));
217
+ return LONG2NUM(len);
218
+ }
219
+
220
+
221
+ VALUE nm_dense_each_with_indices(VALUE nmatrix) {
222
+ volatile VALUE nm = nmatrix;
223
+
224
+ DENSE_STORAGE* s = NM_STORAGE_DENSE(nm);
225
+
226
+ RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_dense_enumerator_length); // fourth argument only used by Ruby2+
227
+
228
+ // Create indices and initialize them to zero
229
+ size_t* coords = ALLOCA_N(size_t, s->dim);
230
+ memset(coords, 0, sizeof(size_t) * s->dim);
231
+
232
+ size_t slice_index;
233
+ size_t* shape_copy = ALLOC_N(size_t, s->dim);
234
+ memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
235
+
236
+ DENSE_STORAGE* sliced_dummy = nm_dense_storage_create_dummy(s->dtype, shape_copy, s->dim);
237
+
238
+ for (size_t k = 0; k < nm_storage_count_max_elements(s); ++k) {
239
+ nm_dense_storage_coords(sliced_dummy, k, coords);
240
+ slice_index = nm_dense_storage_pos(s, coords);
241
+ VALUE ary = rb_ary_new();
242
+ if (NM_DTYPE(nm) == nm::RUBYOBJ) rb_ary_push(ary, reinterpret_cast<VALUE*>(s->elements)[slice_index]);
243
+ else rb_ary_push(ary, rubyobj_from_cval((char*)(s->elements) + slice_index*DTYPE_SIZES[NM_DTYPE(nm)], NM_DTYPE(nm)).rval);
244
+
245
+ for (size_t p = 0; p < s->dim; ++p) {
246
+ rb_ary_push(ary, INT2FIX(coords[p]));
247
+ }
248
+
249
+ // yield the array which now consists of the value and the indices
250
+ rb_yield(ary);
251
+
252
+ }
253
+
254
+ nm_dense_storage_delete(sliced_dummy);
255
+
256
+ return Qnil;
257
+
258
+ }
259
+
260
+
261
+ /*
262
+ * Borrowed this function from NArray. Handles 'each' iteration on a dense
263
+ * matrix.
264
+ *
265
+ * Additionally, handles separately matrices containing VALUEs and matrices
266
+ * containing other types of data.
267
+ */
268
+ VALUE nm_dense_each(VALUE nmatrix) {
269
+ volatile VALUE nm = nmatrix; // Not sure this actually does anything.
270
+ DENSE_STORAGE* s = NM_STORAGE_DENSE(nm);
271
+
272
+ RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_storage_count_max_elements(s));
273
+
274
+ size_t* temp_coords = ALLOCA_N(size_t, s->dim);
275
+ size_t sliced_index;
276
+ size_t* shape_copy = ALLOC_N(size_t, s->dim);
277
+ memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
278
+ DENSE_STORAGE* sliced_dummy = nm_dense_storage_create_dummy(s->dtype, shape_copy, s->dim);
279
+
280
+ if (NM_DTYPE(nm) == nm::RUBYOBJ) {
281
+
282
+ // matrix of Ruby objects -- yield those objects directly
283
+ for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i)
284
+ nm_dense_storage_coords(sliced_dummy, i, temp_coords);
285
+ sliced_index = nm_dense_storage_pos(s, temp_coords);
286
+ rb_yield( reinterpret_cast<VALUE*>(s->elements)[sliced_index] );
287
+
288
+ } else {
289
+
290
+ // We're going to copy the matrix element into a Ruby VALUE and then operate on it. This way user can't accidentally
291
+ // modify it and cause a seg fault.
292
+ for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i) {
293
+ nm_dense_storage_coords(sliced_dummy, i, temp_coords);
294
+ sliced_index = nm_dense_storage_pos(s, temp_coords);
295
+ VALUE v = rubyobj_from_cval((char*)(s->elements) + sliced_index*DTYPE_SIZES[NM_DTYPE(nm)], NM_DTYPE(nm)).rval;
296
+ rb_yield( v ); // yield to the copy we made
297
+ }
298
+ }
299
+
300
+ nm_dense_storage_delete(sliced_dummy);
301
+
302
+ return Qnil;
303
+
304
+ }
305
+
306
+
191
307
  /*
192
308
  * Get a slice or one element, using copying.
193
309
  *
@@ -348,6 +464,25 @@ size_t nm_dense_storage_pos(const DENSE_STORAGE* s, const size_t* coords) {
348
464
  pos += (coords[i] + s->offset[i]) * s->stride[i];
349
465
 
350
466
  return pos;
467
+
468
+ }
469
+
470
+ /*
471
+ * Determine the a set of slice coordinates from linear array position (in elements
472
+ * of s) of some set of coordinates (given by slice). (Inverse of
473
+ * nm_dense_storage_pos).
474
+ *
475
+ * The parameter coords_out should be a pre-allocated array of size equal to s->dim.
476
+ */
477
+ void nm_dense_storage_coords(const DENSE_STORAGE* s, const size_t slice_pos, size_t* coords_out) {
478
+
479
+ size_t temp_pos = slice_pos;
480
+
481
+ for (size_t i = 0; i < s->dim; ++i) {
482
+ coords_out[i] = (temp_pos - temp_pos % s->stride[i])/s->stride[i] - s->offset[i];
483
+ temp_pos = temp_pos % s->stride[i];
484
+ }
485
+
351
486
  }
352
487
 
353
488
  /*
@@ -454,7 +589,12 @@ STORAGE* nm_dense_storage_copy_transposed(const STORAGE* rhs_base) {
454
589
  lhs->offset[0] = rhs->offset[1];
455
590
  lhs->offset[1] = rhs->offset[0];
456
591
 
457
- nm_math_transpose_generic(rhs->shape[0], rhs->shape[1], rhs->elements, rhs->shape[1], lhs->elements, lhs->shape[1], DTYPE_SIZES[rhs->dtype]);
592
+ if (rhs_base->src == rhs_base) {
593
+ nm_math_transpose_generic(rhs->shape[0], rhs->shape[1], rhs->elements, rhs->shape[1], lhs->elements, lhs->shape[1], DTYPE_SIZES[rhs->dtype]);
594
+ } else {
595
+ NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::ref_slice_copy_transposed, void, const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs);
596
+ ttable[lhs->dtype][rhs->dtype](rhs, lhs);
597
+ }
458
598
 
459
599
  return (STORAGE*)lhs;
460
600
  }
@@ -467,6 +607,25 @@ namespace nm { namespace dense_storage {
467
607
  // Templated Functions //
468
608
  /////////////////////////
469
609
 
610
+ template<typename LDType, typename RDType>
611
+ void ref_slice_copy_transposed(const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs) {
612
+
613
+ LDType* lhs_els = reinterpret_cast<LDType*>(lhs->elements);
614
+ RDType* rhs_els = reinterpret_cast<RDType*>(rhs->elements);
615
+
616
+ size_t count = nm_storage_count_max_elements(lhs);
617
+ size_t* temp_coords = ALLOCA_N(size_t, lhs->dim);
618
+ size_t coord_swap_temp;
619
+
620
+ while (count-- > 0) {
621
+ nm_dense_storage_coords(lhs, count, temp_coords);
622
+ NM_SWAP(temp_coords[0], temp_coords[1], coord_swap_temp);
623
+ size_t r_coord = nm_dense_storage_pos(rhs, temp_coords);
624
+ lhs_els[count] = rhs_els[r_coord];
625
+ }
626
+
627
+ }
628
+
470
629
  template <typename LDType, typename RDType>
471
630
  DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, dtype_t new_dtype) {
472
631
  size_t count = nm_storage_count_max_elements(rhs);
@@ -584,8 +743,12 @@ bool is_symmetric(const DENSE_STORAGE* mat, int lda) {
584
743
  template <ewop_t op, typename LDType, typename RDType>
585
744
  static DENSE_STORAGE* ew_op(const DENSE_STORAGE* left, const DENSE_STORAGE* right, const void* rscalar) {
586
745
  unsigned int count;
587
-
588
- size_t* new_shape = (size_t*)calloc(left->dim, sizeof(size_t));
746
+ size_t l_count;
747
+ size_t r_count;
748
+
749
+ size_t* temp_coords = ALLOCA_N(size_t, left->dim);
750
+
751
+ size_t* new_shape = ALLOC_N(size_t, left->dim);
589
752
  memcpy(new_shape, left->shape, sizeof(size_t) * left->dim);
590
753
 
591
754
  // Determine the return dtype. This depends on the type of operation we're doing. Usually, it's going to be
@@ -602,36 +765,44 @@ static DENSE_STORAGE* ew_op(const DENSE_STORAGE* left, const DENSE_STORAGE* righ
602
765
  if (static_cast<uint8_t>(op) < NUM_NONCOMP_EWOPS) { // use left-dtype
603
766
 
604
767
  for (count = nm_storage_count_max_elements(result); count-- > 0;) {
605
- reinterpret_cast<LDType*>(result->elements)[count] = ew_op_switch<op,LDType,RDType>(l_elems[count], r_elems[count]);
768
+ nm_dense_storage_coords(result, count, temp_coords);
769
+ l_count = nm_dense_storage_pos(left, temp_coords);
770
+ r_count = nm_dense_storage_pos(right, temp_coords);
771
+
772
+ reinterpret_cast<LDType*>(result->elements)[count] = ew_op_switch<op,LDType,RDType>(l_elems[l_count], r_elems[r_count]);
606
773
  }
607
774
 
608
775
  } else { // new_dtype is BYTE: comparison operators
609
776
  uint8_t* res_elems = reinterpret_cast<uint8_t*>(result->elements);
610
777
 
611
778
  for (count = nm_storage_count_max_elements(result); count-- > 0;) {
779
+ nm_dense_storage_coords(result, count, temp_coords);
780
+ l_count = nm_dense_storage_pos(left, temp_coords);
781
+ r_count = nm_dense_storage_pos(right, temp_coords);
782
+
612
783
  switch (op) {
613
784
  case EW_EQEQ:
614
- res_elems[count] = l_elems[count] == r_elems[count];
785
+ res_elems[count] = l_elems[l_count] == r_elems[r_count];
615
786
  break;
616
787
 
617
788
  case EW_NEQ:
618
- res_elems[count] = l_elems[count] != r_elems[count];
789
+ res_elems[count] = l_elems[l_count] != r_elems[r_count];
619
790
  break;
620
791
 
621
792
  case EW_LT:
622
- res_elems[count] = l_elems[count] < r_elems[count];
793
+ res_elems[count] = l_elems[l_count] < r_elems[r_count];
623
794
  break;
624
795
 
625
796
  case EW_GT:
626
- res_elems[count] = l_elems[count] > r_elems[count];
797
+ res_elems[count] = l_elems[l_count] > r_elems[r_count];
627
798
  break;
628
799
 
629
800
  case EW_LEQ:
630
- res_elems[count] = l_elems[count] <= r_elems[count];
801
+ res_elems[count] = l_elems[l_count] <= r_elems[r_count];
631
802
  break;
632
803
 
633
804
  case EW_GEQ:
634
- res_elems[count] = l_elems[count] >= r_elems[count];
805
+ res_elems[count] = l_elems[l_count] >= r_elems[r_count];
635
806
  break;
636
807
 
637
808
  default:
@@ -646,36 +817,42 @@ static DENSE_STORAGE* ew_op(const DENSE_STORAGE* left, const DENSE_STORAGE* righ
646
817
  if (static_cast<uint8_t>(op) < NUM_NONCOMP_EWOPS) { // use left-dtype
647
818
 
648
819
  for (count = nm_storage_count_max_elements(result); count-- > 0;) {
649
- reinterpret_cast<LDType*>(result->elements)[count] = ew_op_switch<op,LDType,RDType>(l_elems[count], *r_elem);
820
+ nm_dense_storage_coords(result, count, temp_coords);
821
+ l_count = nm_dense_storage_pos(left, temp_coords);
822
+
823
+ reinterpret_cast<LDType*>(result->elements)[count] = ew_op_switch<op,LDType,RDType>(l_elems[l_count], *r_elem);
650
824
  }
651
825
 
652
826
  } else {
653
827
  uint8_t* res_elems = reinterpret_cast<uint8_t*>(result->elements);
654
828
 
655
829
  for (count = nm_storage_count_max_elements(result); count-- > 0;) {
830
+ nm_dense_storage_coords(result, count, temp_coords);
831
+ l_count = nm_dense_storage_pos(left, temp_coords);
832
+
656
833
  switch (op) {
657
834
  case EW_EQEQ:
658
- res_elems[count] = l_elems[count] == *r_elem;
835
+ res_elems[count] = l_elems[l_count] == *r_elem;
659
836
  break;
660
837
 
661
838
  case EW_NEQ:
662
- res_elems[count] = l_elems[count] != *r_elem;
839
+ res_elems[count] = l_elems[l_count] != *r_elem;
663
840
  break;
664
841
 
665
842
  case EW_LT:
666
- res_elems[count] = l_elems[count] < *r_elem;
843
+ res_elems[count] = l_elems[l_count] < *r_elem;
667
844
  break;
668
845
 
669
846
  case EW_GT:
670
- res_elems[count] = l_elems[count] > *r_elem;
847
+ res_elems[count] = l_elems[l_count] > *r_elem;
671
848
  break;
672
849
 
673
850
  case EW_LEQ:
674
- res_elems[count] = l_elems[count] <= *r_elem;
851
+ res_elems[count] = l_elems[l_count] <= *r_elem;
675
852
  break;
676
853
 
677
854
  case EW_GEQ:
678
- res_elems[count] = l_elems[count] >= *r_elem;
855
+ res_elems[count] = l_elems[l_count] >= *r_elem;
679
856
  break;
680
857
 
681
858
  default:
@@ -685,7 +862,6 @@ static DENSE_STORAGE* ew_op(const DENSE_STORAGE* left, const DENSE_STORAGE* righ
685
862
 
686
863
  }
687
864
  }
688
-
689
865
  return result;
690
866
  }
691
867
 
@@ -707,7 +883,6 @@ static DENSE_STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t
707
883
  *pAlpha = 1;
708
884
  *pBeta = 0;
709
885
  // Do the multiplication
710
-
711
886
  if (vector) nm::math::gemv<DType>(CblasNoTrans, left->shape[0], left->shape[1], pAlpha,
712
887
  reinterpret_cast<DType*>(left->elements), left->shape[1],
713
888
  reinterpret_cast<DType*>(right->elements), 1, pBeta,