nmatrix 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/History.txt +102 -10
  3. data/README.rdoc +24 -32
  4. data/Rakefile +1 -1
  5. data/ext/nmatrix/data/complex.h +9 -0
  6. data/ext/nmatrix/data/data.cpp +78 -4
  7. data/ext/nmatrix/data/data.h +86 -54
  8. data/ext/nmatrix/data/rational.h +2 -0
  9. data/ext/nmatrix/data/ruby_object.h +38 -8
  10. data/ext/nmatrix/extconf.rb +13 -7
  11. data/ext/nmatrix/nmatrix.cpp +262 -139
  12. data/ext/nmatrix/nmatrix.h +11 -4
  13. data/ext/nmatrix/storage/common.cpp +20 -13
  14. data/ext/nmatrix/storage/common.h +18 -12
  15. data/ext/nmatrix/storage/dense.cpp +122 -192
  16. data/ext/nmatrix/storage/dense.h +4 -2
  17. data/ext/nmatrix/storage/list.cpp +467 -636
  18. data/ext/nmatrix/storage/list.h +6 -3
  19. data/ext/nmatrix/storage/storage.cpp +83 -46
  20. data/ext/nmatrix/storage/storage.h +7 -7
  21. data/ext/nmatrix/storage/yale.cpp +621 -361
  22. data/ext/nmatrix/storage/yale.h +21 -9
  23. data/ext/nmatrix/ttable_helper.rb +27 -31
  24. data/ext/nmatrix/types.h +1 -1
  25. data/ext/nmatrix/util/math.cpp +9 -10
  26. data/ext/nmatrix/util/sl_list.cpp +1 -7
  27. data/ext/nmatrix/util/sl_list.h +0 -118
  28. data/lib/nmatrix/blas.rb +59 -18
  29. data/lib/nmatrix/monkeys.rb +0 -52
  30. data/lib/nmatrix/nmatrix.rb +136 -9
  31. data/lib/nmatrix/nvector.rb +33 -0
  32. data/lib/nmatrix/shortcuts.rb +95 -16
  33. data/lib/nmatrix/version.rb +1 -1
  34. data/lib/nmatrix/yale_functions.rb +25 -19
  35. data/spec/blas_spec.rb +1 -19
  36. data/spec/elementwise_spec.rb +132 -17
  37. data/spec/lapack_spec.rb +0 -3
  38. data/spec/nmatrix_list_spec.rb +18 -0
  39. data/spec/nmatrix_spec.rb +44 -18
  40. data/spec/nmatrix_yale_spec.rb +1 -3
  41. data/spec/shortcuts_spec.rb +26 -36
  42. data/spec/slice_spec.rb +2 -4
  43. metadata +2 -2
@@ -136,10 +136,10 @@
136
136
 
137
137
  #define NM_DEF_STORAGE_ELEMENTS \
138
138
  NM_DECL_ENUM(dtype_t, dtype); \
139
- size_t dim; \
140
- size_t* shape; \
141
- size_t* offset; \
142
- int count; \
139
+ size_t dim; \
140
+ size_t* shape; \
141
+ size_t* offset; \
142
+ int count; \
143
143
  STORAGE* src;
144
144
 
145
145
  #define NM_DEF_STORAGE_CHILD_STRUCT_PRE(name) struct name : STORAGE {
@@ -312,6 +312,7 @@ NM_DEF_STRUCT_POST(NMATRIX); // };
312
312
  #define NM_SHAPE(val,i) (NM_STORAGE(val)->shape[(i)])
313
313
  #define NM_SHAPE0(val) (NM_STORAGE(val)->shape[0])
314
314
  #define NM_SHAPE1(val) (NM_STORAGE(val)->shape[1])
315
+ #define NM_DEFAULT_VAL(val) (NM_STORAGE_LIST(val)->default_val)
315
316
 
316
317
  #define NM_DENSE_COUNT(val) (storage_count_max_elements(NM_STORAGE_DENSE(val)))
317
318
  #define NM_SIZEOF_DTYPE(val) (DTYPE_SIZES[NM_DTYPE(val)])
@@ -355,6 +356,12 @@ extern "C" {
355
356
  VALUE rb_nvector_dense_create(NM_DECL_ENUM(dtype_t, dtype), void* elements, size_t length);
356
357
 
357
358
  NM_DECL_ENUM(dtype_t, nm_dtype_guess(VALUE)); // (This is a function)
359
+ NM_DECL_ENUM(dtype_t, nm_dtype_min(VALUE));
360
+
361
+ // Non-API functions needed by other cpp files.
362
+ NMATRIX* nm_create(nm::stype_t stype, STORAGE* storage);
363
+ void nm_delete(NMATRIX* mat);
364
+ void nm_delete_ref(NMATRIX* mat);
358
365
 
359
366
  #ifdef __cplusplus
360
367
  }
@@ -52,19 +52,26 @@
52
52
  */
53
53
 
54
54
  extern "C" {
55
- /*
56
- * Calculate the number of elements in the dense storage structure, based on
57
- * shape and dim.
58
- */
59
- size_t nm_storage_count_max_elements(const STORAGE* storage) {
60
- unsigned int i;
61
- size_t count = 1;
62
-
63
- for (i = storage->dim; i-- > 0;) {
64
- count *= storage->shape[i];
55
+ /*
56
+ * Calculate the number of elements in the dense storage structure, based on
57
+ * shape and dim.
58
+ */
59
+ size_t nm_storage_count_max_elements(const STORAGE* storage) {
60
+ unsigned int i;
61
+ size_t count = 1;
62
+
63
+ for (i = storage->dim; i-- > 0;) {
64
+ count *= storage->shape[i];
65
+ }
66
+
67
+ return count;
68
+ }
69
+
70
+ // Helper function used only for the RETURN_SIZED_ENUMERATOR macro. Returns the length of
71
+ // the matrix's storage.
72
+ VALUE nm_enumerator_length(VALUE nmatrix) {
73
+ long len = nm_storage_count_max_elements(NM_STORAGE_DENSE(nmatrix));
74
+ return LONG2NUM(len);
65
75
  }
66
-
67
- return count;
68
- }
69
76
 
70
77
  } // end of extern "C" block
@@ -32,6 +32,8 @@
32
32
  * Standard Includes
33
33
  */
34
34
 
35
+ #include <cmath> // pow().
36
+
35
37
  /*
36
38
  * Project Includes
37
39
  */
@@ -41,7 +43,7 @@
41
43
  /*
42
44
  * Macros
43
45
  */
44
-
46
+
45
47
  extern "C" {
46
48
 
47
49
  /*
@@ -69,6 +71,7 @@ struct SLICE {
69
71
  */
70
72
 
71
73
  size_t nm_storage_count_max_elements(const STORAGE* storage);
74
+ VALUE nm_enumerator_length(VALUE nmatrix);
72
75
 
73
76
  } // end of extern "C" block
74
77
 
@@ -78,19 +81,22 @@ namespace nm {
78
81
  * Templated helper function for element-wise operations, used by dense, yale, and list.
79
82
  */
80
83
  template <ewop_t op, typename LDType, typename RDType>
81
- inline LDType ew_op_switch(LDType left, RDType right) {
84
+ inline VALUE ew_op_switch(LDType left, RDType right) {
82
85
  switch (op) {
83
86
  case EW_ADD:
84
- return left + right;
87
+ return RubyObject(left + right).rval;
85
88
 
86
89
  case EW_SUB:
87
- return left - right;
90
+ return RubyObject(left - right).rval;
88
91
 
89
92
  case EW_MUL:
90
- return left * right;
93
+ return RubyObject(left * right).rval;
91
94
 
92
95
  case EW_DIV:
93
- return left / right;
96
+ return RubyObject(left / right).rval;
97
+
98
+ case EW_POW:
99
+ return RubyObject(pow(left, right)).rval;
94
100
 
95
101
  case EW_MOD:
96
102
  rb_raise(rb_eNotImpError, "Element-wise modulo is currently not supported.");
@@ -99,11 +105,11 @@ namespace nm {
99
105
  default:
100
106
  rb_raise(rb_eStandardError, "This should not happen.");
101
107
  }
102
- return 0;
108
+ return Qnil;
103
109
  }
104
110
 
105
111
  #define EWOP_INT_INT_DIV(ltype, rtype) template <> \
106
- inline ltype ew_op_switch<EW_DIV>( ltype left, rtype right) { \
112
+ inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
107
113
  if (right == 0) rb_raise(rb_eZeroDivError, "cannot divide type by 0, would throw SIGFPE"); \
108
114
  if ((left > 0 && right > 0) || (left < 0 && right < 0)) \
109
115
  return left / right; \
@@ -112,27 +118,27 @@ namespace nm {
112
118
  }
113
119
 
114
120
  #define EWOP_UINT_UINT_DIV(ltype, rtype) template <> \
115
- inline ltype ew_op_switch<EW_DIV>( ltype left, rtype right) { \
121
+ inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
116
122
  if (right == 0) rb_raise(rb_eZeroDivError, "cannot divide type by 0, would throw SIGFPE"); \
117
123
  return left / right; \
118
124
  }
119
125
 
120
126
  #define EWOP_INT_UINT_DIV(ltype, rtype) template <> \
121
- inline ltype ew_op_switch<EW_DIV>( ltype left, rtype right) { \
127
+ inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
122
128
  if (right == 0) rb_raise(rb_eZeroDivError, "cannot divide type by 0, would throw SIGFPE"); \
123
129
  if (left > 0 ) return left / right; \
124
130
  else return ( ltype )(std::floor((double)(left) / (double)(right))); \
125
131
  }
126
132
 
127
133
  #define EWOP_UINT_INT_DIV(ltype, rtype) template <> \
128
- inline ltype ew_op_switch<EW_DIV>( ltype left, rtype right) { \
134
+ inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
129
135
  if (right == 0) rb_raise(rb_eZeroDivError, "cannot divide type by 0, would throw SIGFPE"); \
130
136
  if (right > 0) return left / right; \
131
137
  else return ( ltype )(std::floor((double)(left) / (double)(right))); \
132
138
  }
133
139
 
134
140
  #define EWOP_FLOAT_INT_DIV(ltype, rtype) template <> \
135
- inline ltype ew_op_switch<EW_DIV>( ltype left, rtype right) { \
141
+ inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
136
142
  return left / (ltype)(right); \
137
143
  }
138
144
 
@@ -60,12 +60,9 @@ namespace nm { namespace dense_storage {
60
60
 
61
61
  template <typename LDType, typename RDType>
62
62
  DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);
63
-
63
+
64
64
  template <typename LDType, typename RDType>
65
65
  bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right);
66
-
67
- template <ewop_t op, typename LDType, typename RDType>
68
- static DENSE_STORAGE* ew_op(const DENSE_STORAGE* left, const DENSE_STORAGE* right, const void* rscalar);
69
66
 
70
67
  template <typename DType>
71
68
  static DENSE_STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector);
@@ -132,7 +129,7 @@ DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t
132
129
 
133
130
  if (elements_length == count) {
134
131
  s->elements = elements;
135
-
132
+
136
133
  } else {
137
134
  s->elements = ALLOC_N(char, DTYPE_SIZES[dtype]*count);
138
135
 
@@ -145,12 +142,12 @@ DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t
145
142
  if (i + elements_length > count) {
146
143
  copy_length = count - i;
147
144
  }
148
-
145
+
149
146
  memcpy((char*)(s->elements)+i*DTYPE_SIZES[dtype], (char*)(elements)+(i % elements_length)*DTYPE_SIZES[dtype], copy_length*DTYPE_SIZES[dtype]);
150
147
  }
151
148
 
152
149
  // Get rid of the init_val.
153
- free(elements);
150
+ xfree(elements);
154
151
  }
155
152
  }
156
153
 
@@ -210,20 +207,85 @@ void nm_dense_storage_mark(void* storage_base) {
210
207
  ///////////////
211
208
 
212
209
 
213
- // Helper function used only for the RETURN_SIZED_ENUMERATOR macro. Returns the length of
214
- // the matrix's storage.
215
- static VALUE nm_dense_enumerator_length(VALUE nmatrix) {
216
- long len = nm_storage_count_max_elements(NM_STORAGE_DENSE(nmatrix));
217
- return LONG2NUM(len);
210
+
211
+ /*
212
+ * map_pair iterator for dense matrices (for element-wise operations)
213
+ */
214
+ VALUE nm_dense_map_pair(VALUE self, VALUE right) {
215
+ DENSE_STORAGE *s = NM_STORAGE_DENSE(self),
216
+ *t = NM_STORAGE_DENSE(right);
217
+
218
+ RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_enumerator_length);
219
+
220
+ size_t* coords = ALLOCA_N(size_t, s->dim);
221
+ memset(coords, 0, sizeof(size_t) * s->dim);
222
+
223
+ size_t *shape_copy = ALLOC_N(size_t, s->dim);
224
+ memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
225
+
226
+ size_t count = nm_storage_count_max_elements(s);
227
+
228
+ DENSE_STORAGE* result = nm_dense_storage_create(nm::RUBYOBJ, shape_copy, s->dim, NULL, 0);
229
+ VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);
230
+
231
+ for (size_t k = 0; k < count; ++k) {
232
+ nm_dense_storage_coords(result, k, coords);
233
+ size_t s_index = nm_dense_storage_pos(s, coords),
234
+ t_index = nm_dense_storage_pos(t, coords);
235
+
236
+ VALUE sval = NM_DTYPE(self) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(s->elements)[s_index] : rubyobj_from_cval((char*)(s->elements) + s_index*DTYPE_SIZES[NM_DTYPE(self)], NM_DTYPE(self)).rval;
237
+ VALUE tval = NM_DTYPE(right) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(t->elements)[t_index] : rubyobj_from_cval((char*)(t->elements) + t_index*DTYPE_SIZES[NM_DTYPE(right)], NM_DTYPE(right)).rval;
238
+
239
+ result_elem[k] = rb_yield_values(2, sval, tval);
240
+ }
241
+
242
+ NMATRIX* m = nm_create(nm::DENSE_STORE, reinterpret_cast<STORAGE*>(result));
243
+
244
+ return Data_Wrap_Struct(CLASS_OF(self), nm_dense_storage_mark, nm_delete, m);
245
+ }
246
+
247
+
248
+ /*
249
+ * map enumerator for dense matrices.
250
+ */
251
+ VALUE nm_dense_map(VALUE self) {
252
+ DENSE_STORAGE *s = NM_STORAGE_DENSE(self);
253
+
254
+ RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_enumerator_length);
255
+
256
+ size_t* coords = ALLOCA_N(size_t, s->dim);
257
+ memset(coords, 0, sizeof(size_t) * s->dim);
258
+
259
+ size_t *shape_copy = ALLOC_N(size_t, s->dim);
260
+ memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
261
+
262
+ size_t count = nm_storage_count_max_elements(s);
263
+
264
+ DENSE_STORAGE* result = nm_dense_storage_create(nm::RUBYOBJ, shape_copy, s->dim, NULL, 0);
265
+ VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);
266
+
267
+ for (size_t k = 0; k < count; ++k) {
268
+ nm_dense_storage_coords(result, k, coords);
269
+ size_t s_index = nm_dense_storage_pos(s, coords);
270
+
271
+ result_elem[k] = rb_yield(NM_DTYPE(self) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(s->elements)[s_index] : rubyobj_from_cval((char*)(s->elements) + s_index*DTYPE_SIZES[NM_DTYPE(self)], NM_DTYPE(self)).rval);
272
+ }
273
+
274
+ NMATRIX* m = nm_create(nm::DENSE_STORE, reinterpret_cast<STORAGE*>(result));
275
+
276
+ return Data_Wrap_Struct(CLASS_OF(self), nm_dense_storage_mark, nm_delete, m);
218
277
  }
219
278
 
220
279
 
280
+ /*
281
+ * each_with_indices iterator for dense matrices.
282
+ */
221
283
  VALUE nm_dense_each_with_indices(VALUE nmatrix) {
222
284
  volatile VALUE nm = nmatrix;
223
285
 
224
286
  DENSE_STORAGE* s = NM_STORAGE_DENSE(nm);
225
287
 
226
- RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_dense_enumerator_length); // fourth argument only used by Ruby2+
288
+ RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_enumerator_length); // fourth argument only used by Ruby2+
227
289
 
228
290
  // Create indices and initialize them to zero
229
291
  size_t* coords = ALLOCA_N(size_t, s->dim);
@@ -269,7 +331,7 @@ VALUE nm_dense_each(VALUE nmatrix) {
269
331
  volatile VALUE nm = nmatrix; // Not sure this actually does anything.
270
332
  DENSE_STORAGE* s = NM_STORAGE_DENSE(nm);
271
333
 
272
- RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_storage_count_max_elements(s));
334
+ RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_enumerator_length);
273
335
 
274
336
  size_t* temp_coords = ALLOCA_N(size_t, s->dim);
275
337
  size_t sliced_index;
@@ -280,10 +342,11 @@ VALUE nm_dense_each(VALUE nmatrix) {
280
342
  if (NM_DTYPE(nm) == nm::RUBYOBJ) {
281
343
 
282
344
  // matrix of Ruby objects -- yield those objects directly
283
- for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i)
345
+ for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i) {
284
346
  nm_dense_storage_coords(sliced_dummy, i, temp_coords);
285
347
  sliced_index = nm_dense_storage_pos(s, temp_coords);
286
348
  rb_yield( reinterpret_cast<VALUE*>(s->elements)[sliced_index] );
349
+ }
287
350
 
288
351
  } else {
289
352
 
@@ -300,7 +363,7 @@ VALUE nm_dense_each(VALUE nmatrix) {
300
363
  nm_dense_storage_delete(sliced_dummy);
301
364
 
302
365
  return nmatrix;
303
-
366
+
304
367
  }
305
368
 
306
369
 
@@ -321,13 +384,13 @@ void* nm_dense_storage_get(STORAGE* storage, SLICE* slice) {
321
384
  shape[i] = slice->lengths[i];
322
385
  }
323
386
 
324
- ns = nm_dense_storage_create(s->dtype, shape, s->dim, NULL, 0);
387
+ ns = nm_dense_storage_create(s->dtype, shape, s->dim, NULL, 0);
325
388
 
326
- slice_copy(ns,
327
- reinterpret_cast<const DENSE_STORAGE*>(s->src),
328
- slice->lengths,
329
- 0,
330
- nm_dense_storage_pos(s, slice->coords),
389
+ slice_copy(ns,
390
+ reinterpret_cast<const DENSE_STORAGE*>(s->src),
391
+ slice->lengths,
392
+ 0,
393
+ nm_dense_storage_pos(s, slice->coords),
331
394
  0);
332
395
  return ns;
333
396
  }
@@ -343,7 +406,7 @@ void* nm_dense_storage_ref(STORAGE* storage, SLICE* slice) {
343
406
 
344
407
  if (slice->single)
345
408
  return (char*)(s->elements) + nm_dense_storage_pos(s, slice->coords) * DTYPE_SIZES[s->dtype];
346
-
409
+
347
410
  else {
348
411
  DENSE_STORAGE* ns = ALLOC( DENSE_STORAGE );
349
412
  ns->dim = s->dim;
@@ -358,7 +421,7 @@ void* nm_dense_storage_ref(STORAGE* storage, SLICE* slice) {
358
421
 
359
422
  ns->stride = s->stride;
360
423
  ns->elements = s->elements;
361
-
424
+
362
425
  s->src->count++;
363
426
  ns->src = s->src;
364
427
 
@@ -387,8 +450,13 @@ void nm_dense_storage_set(STORAGE* storage, SLICE* slice, void* val) {
387
450
  * have the same dtype.
388
451
  */
389
452
  bool nm_dense_storage_eqeq(const STORAGE* left, const STORAGE* right) {
390
- LR_DTYPE_TEMPLATE_TABLE(nm::dense_storage::eqeq, bool, const DENSE_STORAGE*, const DENSE_STORAGE*);
391
-
453
+ LR_DTYPE_TEMPLATE_TABLE(nm::dense_storage::eqeq, bool, const DENSE_STORAGE*, const DENSE_STORAGE*)
454
+
455
+ if (!ttable[left->dtype][right->dtype]) {
456
+ rb_raise(nm_eDataTypeError, "comparison between these dtypes is undefined");
457
+ return false;
458
+ }
459
+
392
460
  return ttable[left->dtype][right->dtype]((const DENSE_STORAGE*)left, (const DENSE_STORAGE*)right);
393
461
  }
394
462
 
@@ -399,10 +467,10 @@ bool nm_dense_storage_eqeq(const STORAGE* left, const STORAGE* right) {
399
467
  bool nm_dense_storage_is_hermitian(const DENSE_STORAGE* mat, int lda) {
400
468
  if (mat->dtype == nm::COMPLEX64) {
401
469
  return nm::dense_storage::is_hermitian<nm::Complex64>(mat, lda);
402
-
470
+
403
471
  } else if (mat->dtype == nm::COMPLEX128) {
404
472
  return nm::dense_storage::is_hermitian<nm::Complex128>(mat, lda);
405
-
473
+
406
474
  } else {
407
475
  return nm_dense_storage_is_symmetric(mat, lda);
408
476
  }
@@ -413,7 +481,7 @@ bool nm_dense_storage_is_hermitian(const DENSE_STORAGE* mat, int lda) {
413
481
  */
414
482
  bool nm_dense_storage_is_symmetric(const DENSE_STORAGE* mat, int lda) {
415
483
  DTYPE_TEMPLATE_TABLE(nm::dense_storage::is_symmetric, bool, const DENSE_STORAGE*, int);
416
-
484
+
417
485
  return ttable[mat->dtype](mat, lda);
418
486
  }
419
487
 
@@ -421,24 +489,6 @@ bool nm_dense_storage_is_symmetric(const DENSE_STORAGE* mat, int lda) {
421
489
  // Math //
422
490
  //////////
423
491
 
424
- /*
425
- * Dense matrix-matrix and matrix-scalar element-wise operations.
426
- *
427
- * right or rscalar should be NULL; they should not both be initialized. If right is NULL, it'll use the scalar value instead.
428
- */
429
- STORAGE* nm_dense_storage_ew_op(nm::ewop_t op, const STORAGE* left, const STORAGE* right, VALUE scalar) {
430
- OP_LR_DTYPE_TEMPLATE_TABLE(nm::dense_storage::ew_op, DENSE_STORAGE*, const DENSE_STORAGE* left, const DENSE_STORAGE* right, const void*);
431
-
432
- if (right)
433
- return ttable[op][left->dtype][right->dtype](reinterpret_cast<const DENSE_STORAGE*>(left), reinterpret_cast<const DENSE_STORAGE*>(right), NULL);
434
- else {
435
- nm::dtype_t r_dtype = nm_dtype_guess(scalar);
436
- void* r_scalar = ALLOCA_N(char, DTYPE_SIZES[r_dtype]);
437
- rubyval_to_cval(scalar, r_dtype, r_scalar);
438
-
439
- return ttable[op][left->dtype][r_dtype](reinterpret_cast<const DENSE_STORAGE*>(left), NULL, r_scalar);
440
- }
441
- }
442
492
 
443
493
  /*
444
494
  * Dense matrix-matrix multiplication.
@@ -468,9 +518,9 @@ size_t nm_dense_storage_pos(const DENSE_STORAGE* s, const size_t* coords) {
468
518
  }
469
519
 
470
520
  /*
471
- * Determine the a set of slice coordinates from linear array position (in elements
521
+ * Determine the a set of slice coordinates from linear array position (in elements
472
522
  * of s) of some set of coordinates (given by slice). (Inverse of
473
- * nm_dense_storage_pos).
523
+ * nm_dense_storage_pos).
474
524
  *
475
525
  * The parameter coords_out should be a pre-allocated array of size equal to s->dim.
476
526
  */
@@ -510,7 +560,7 @@ static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* le
510
560
  for (size_t i = 0; i < lengths[n]; ++i) {
511
561
  slice_copy(dest, src, lengths,
512
562
  pdest + dest->stride[n]*i,
513
- psrc + src->stride[n]*i,
563
+ psrc + src->stride[n]*i,
514
564
  n + 1);
515
565
  }
516
566
  } else {
@@ -528,9 +578,14 @@ static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* le
528
578
  /*
529
579
  * Copy dense storage, changing dtype if necessary.
530
580
  */
531
- STORAGE* nm_dense_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype) {
581
+ STORAGE* nm_dense_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype, void* dummy) {
532
582
  NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::cast_copy, DENSE_STORAGE*, const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);
533
583
 
584
+ if (!ttable[new_dtype][rhs->dtype]) {
585
+ rb_raise(nm_eDataTypeError, "cast between these dtypes is undefined");
586
+ return NULL;
587
+ }
588
+
534
589
  return (STORAGE*)ttable[new_dtype][rhs->dtype]((DENSE_STORAGE*)rhs, new_dtype);
535
590
  }
536
591
 
@@ -538,7 +593,7 @@ STORAGE* nm_dense_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype) {
538
593
  * Copy dense storage without a change in dtype.
539
594
  */
540
595
  DENSE_STORAGE* nm_dense_storage_copy(const DENSE_STORAGE* rhs) {
541
- size_t count = 0;
596
+ size_t count = 0;
542
597
  size_t *shape = ALLOC_N(size_t, rhs->dim);
543
598
 
544
599
  // copy shape and offset
@@ -593,6 +648,10 @@ STORAGE* nm_dense_storage_copy_transposed(const STORAGE* rhs_base) {
593
648
  nm_math_transpose_generic(rhs->shape[0], rhs->shape[1], rhs->elements, rhs->shape[1], lhs->elements, lhs->shape[1], DTYPE_SIZES[rhs->dtype]);
594
649
  } else {
595
650
  NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::ref_slice_copy_transposed, void, const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs);
651
+
652
+ if (!ttable[lhs->dtype][rhs->dtype])
653
+ rb_raise(nm_eDataTypeError, "transposition between these dtypes is undefined");
654
+
596
655
  ttable[lhs->dtype][rhs->dtype](rhs, lhs);
597
656
  }
598
657
 
@@ -640,8 +699,7 @@ DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, dtype_t new_dtype) {
640
699
 
641
700
  // Ensure that allocation worked before copying.
642
701
  if (lhs && count) {
643
- if (rhs->src != rhs) {
644
- /* Make a copy of a ref to a matrix. */
702
+ if (rhs->src != rhs) { // Make a copy of a ref to a matrix.
645
703
 
646
704
  DENSE_STORAGE* tmp = nm_dense_storage_copy(rhs);
647
705
 
@@ -650,13 +708,12 @@ DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, dtype_t new_dtype) {
650
708
  lhs_els[count] = tmp_els[count];
651
709
  }
652
710
  nm_dense_storage_delete(tmp);
653
- } else {
654
- /* Make a regular copy. */
655
711
 
712
+ } else { // Make a regular copy.
656
713
  while (count-- > 0) lhs_els[count] = rhs_els[count];
657
714
  }
658
715
  }
659
-
716
+
660
717
  return lhs;
661
718
  }
662
719
 
@@ -673,7 +730,7 @@ bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right) {
673
730
  LDType* left_elements = (LDType*)left->elements;
674
731
  RDType* right_elements = (RDType*)right->elements;
675
732
 
676
- // Copy elements in temp matrix if you have refernce to the right.
733
+ // Copy elements in temp matrix if you have reference to the right.
677
734
  if (left->src != left) {
678
735
  tmp1 = nm_dense_storage_copy(left);
679
736
  left_elements = (LDType*)tmp1->elements;
@@ -682,7 +739,7 @@ bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right) {
682
739
  tmp2 = nm_dense_storage_copy(right);
683
740
  right_elements = (RDType*)tmp2->elements;
684
741
  }
685
-
742
+
686
743
 
687
744
 
688
745
  for (index = nm_storage_count_max_elements(left); index-- > 0;) {
@@ -704,20 +761,20 @@ template <typename DType>
704
761
  bool is_hermitian(const DENSE_STORAGE* mat, int lda) {
705
762
  unsigned int i, j;
706
763
  register DType complex_conj;
707
-
764
+
708
765
  const DType* els = (DType*) mat->elements;
709
-
766
+
710
767
  for (i = mat->shape[0]; i-- > 0;) {
711
768
  for (j = i + 1; j < mat->shape[1]; ++j) {
712
769
  complex_conj = els[j*lda + 1];
713
770
  complex_conj.i = -complex_conj.i;
714
-
771
+
715
772
  if (els[i*lda+j] != complex_conj) {
716
773
  return false;
717
774
  }
718
775
  }
719
776
  }
720
-
777
+
721
778
  return true;
722
779
  }
723
780
 
@@ -725,7 +782,7 @@ template <typename DType>
725
782
  bool is_symmetric(const DENSE_STORAGE* mat, int lda) {
726
783
  unsigned int i, j;
727
784
  const DType* els = (DType*) mat->elements;
728
-
785
+
729
786
  for (i = mat->shape[0]; i-- > 0;) {
730
787
  for (j = i + 1; j < mat->shape[1]; ++j) {
731
788
  if (els[i*lda+j] != els[j*lda+i]) {
@@ -733,137 +790,10 @@ bool is_symmetric(const DENSE_STORAGE* mat, int lda) {
733
790
  }
734
791
  }
735
792
  }
736
-
793
+
737
794
  return true;
738
795
  }
739
796
 
740
- /*
741
- * Templated dense storage element-wise operations which return the same DType.
742
- */
743
- template <ewop_t op, typename LDType, typename RDType>
744
- static DENSE_STORAGE* ew_op(const DENSE_STORAGE* left, const DENSE_STORAGE* right, const void* rscalar) {
745
- unsigned int count;
746
- size_t l_count;
747
- size_t r_count;
748
-
749
- size_t* temp_coords = ALLOCA_N(size_t, left->dim);
750
-
751
- size_t* new_shape = ALLOC_N(size_t, left->dim);
752
- memcpy(new_shape, left->shape, sizeof(size_t) * left->dim);
753
-
754
- // Determine the return dtype. This depends on the type of operation we're doing. Usually, it's going to be
755
- // set by the left matrix, but for comparisons, we'll use BYTE (in lieu of boolean).
756
- dtype_t new_dtype = static_cast<uint8_t>(op) < NUM_NONCOMP_EWOPS ? left->dtype : BYTE;
757
-
758
- DENSE_STORAGE* result = nm_dense_storage_create(new_dtype, new_shape, left->dim, NULL, 0);
759
-
760
- LDType* l_elems = reinterpret_cast<LDType*>(left->elements);
761
-
762
- if (right) { // matrix-matrix operation
763
- RDType* r_elems = reinterpret_cast<RDType*>(right->elements);
764
-
765
- if (static_cast<uint8_t>(op) < NUM_NONCOMP_EWOPS) { // use left-dtype
766
-
767
- for (count = nm_storage_count_max_elements(result); count-- > 0;) {
768
- nm_dense_storage_coords(result, count, temp_coords);
769
- l_count = nm_dense_storage_pos(left, temp_coords);
770
- r_count = nm_dense_storage_pos(right, temp_coords);
771
-
772
- reinterpret_cast<LDType*>(result->elements)[count] = ew_op_switch<op,LDType,RDType>(l_elems[l_count], r_elems[r_count]);
773
- }
774
-
775
- } else { // new_dtype is BYTE: comparison operators
776
- uint8_t* res_elems = reinterpret_cast<uint8_t*>(result->elements);
777
-
778
- for (count = nm_storage_count_max_elements(result); count-- > 0;) {
779
- nm_dense_storage_coords(result, count, temp_coords);
780
- l_count = nm_dense_storage_pos(left, temp_coords);
781
- r_count = nm_dense_storage_pos(right, temp_coords);
782
-
783
- switch (op) {
784
- case EW_EQEQ:
785
- res_elems[count] = l_elems[l_count] == r_elems[r_count];
786
- break;
787
-
788
- case EW_NEQ:
789
- res_elems[count] = l_elems[l_count] != r_elems[r_count];
790
- break;
791
-
792
- case EW_LT:
793
- res_elems[count] = l_elems[l_count] < r_elems[r_count];
794
- break;
795
-
796
- case EW_GT:
797
- res_elems[count] = l_elems[l_count] > r_elems[r_count];
798
- break;
799
-
800
- case EW_LEQ:
801
- res_elems[count] = l_elems[l_count] <= r_elems[r_count];
802
- break;
803
-
804
- case EW_GEQ:
805
- res_elems[count] = l_elems[l_count] >= r_elems[r_count];
806
- break;
807
-
808
- default:
809
- rb_raise(rb_eStandardError, "this should not happen");
810
- }
811
- }
812
- }
813
-
814
- } else { // matrix-scalar operation
815
- const RDType* r_elem = reinterpret_cast<const RDType*>(rscalar);
816
-
817
- if (static_cast<uint8_t>(op) < NUM_NONCOMP_EWOPS) { // use left-dtype
818
-
819
- for (count = nm_storage_count_max_elements(result); count-- > 0;) {
820
- nm_dense_storage_coords(result, count, temp_coords);
821
- l_count = nm_dense_storage_pos(left, temp_coords);
822
-
823
- reinterpret_cast<LDType*>(result->elements)[count] = ew_op_switch<op,LDType,RDType>(l_elems[l_count], *r_elem);
824
- }
825
-
826
- } else {
827
- uint8_t* res_elems = reinterpret_cast<uint8_t*>(result->elements);
828
-
829
- for (count = nm_storage_count_max_elements(result); count-- > 0;) {
830
- nm_dense_storage_coords(result, count, temp_coords);
831
- l_count = nm_dense_storage_pos(left, temp_coords);
832
-
833
- switch (op) {
834
- case EW_EQEQ:
835
- res_elems[count] = l_elems[l_count] == *r_elem;
836
- break;
837
-
838
- case EW_NEQ:
839
- res_elems[count] = l_elems[l_count] != *r_elem;
840
- break;
841
-
842
- case EW_LT:
843
- res_elems[count] = l_elems[l_count] < *r_elem;
844
- break;
845
-
846
- case EW_GT:
847
- res_elems[count] = l_elems[l_count] > *r_elem;
848
- break;
849
-
850
- case EW_LEQ:
851
- res_elems[count] = l_elems[l_count] <= *r_elem;
852
- break;
853
-
854
- case EW_GEQ:
855
- res_elems[count] = l_elems[l_count] >= *r_elem;
856
- break;
857
-
858
- default:
859
- rb_raise(rb_eStandardError, "this should not happen");
860
- }
861
- }
862
-
863
- }
864
- }
865
- return result;
866
- }
867
797
 
868
798
 
869
799
  /*