nmatrix 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/History.txt +102 -10
  3. data/README.rdoc +24 -32
  4. data/Rakefile +1 -1
  5. data/ext/nmatrix/data/complex.h +9 -0
  6. data/ext/nmatrix/data/data.cpp +78 -4
  7. data/ext/nmatrix/data/data.h +86 -54
  8. data/ext/nmatrix/data/rational.h +2 -0
  9. data/ext/nmatrix/data/ruby_object.h +38 -8
  10. data/ext/nmatrix/extconf.rb +13 -7
  11. data/ext/nmatrix/nmatrix.cpp +262 -139
  12. data/ext/nmatrix/nmatrix.h +11 -4
  13. data/ext/nmatrix/storage/common.cpp +20 -13
  14. data/ext/nmatrix/storage/common.h +18 -12
  15. data/ext/nmatrix/storage/dense.cpp +122 -192
  16. data/ext/nmatrix/storage/dense.h +4 -2
  17. data/ext/nmatrix/storage/list.cpp +467 -636
  18. data/ext/nmatrix/storage/list.h +6 -3
  19. data/ext/nmatrix/storage/storage.cpp +83 -46
  20. data/ext/nmatrix/storage/storage.h +7 -7
  21. data/ext/nmatrix/storage/yale.cpp +621 -361
  22. data/ext/nmatrix/storage/yale.h +21 -9
  23. data/ext/nmatrix/ttable_helper.rb +27 -31
  24. data/ext/nmatrix/types.h +1 -1
  25. data/ext/nmatrix/util/math.cpp +9 -10
  26. data/ext/nmatrix/util/sl_list.cpp +1 -7
  27. data/ext/nmatrix/util/sl_list.h +0 -118
  28. data/lib/nmatrix/blas.rb +59 -18
  29. data/lib/nmatrix/monkeys.rb +0 -52
  30. data/lib/nmatrix/nmatrix.rb +136 -9
  31. data/lib/nmatrix/nvector.rb +33 -0
  32. data/lib/nmatrix/shortcuts.rb +95 -16
  33. data/lib/nmatrix/version.rb +1 -1
  34. data/lib/nmatrix/yale_functions.rb +25 -19
  35. data/spec/blas_spec.rb +1 -19
  36. data/spec/elementwise_spec.rb +132 -17
  37. data/spec/lapack_spec.rb +0 -3
  38. data/spec/nmatrix_list_spec.rb +18 -0
  39. data/spec/nmatrix_spec.rb +44 -18
  40. data/spec/nmatrix_yale_spec.rb +1 -3
  41. data/spec/shortcuts_spec.rb +26 -36
  42. data/spec/slice_spec.rb +2 -4
  43. metadata +2 -2
@@ -136,10 +136,10 @@
136
136
 
137
137
  #define NM_DEF_STORAGE_ELEMENTS \
138
138
  NM_DECL_ENUM(dtype_t, dtype); \
139
- size_t dim; \
140
- size_t* shape; \
141
- size_t* offset; \
142
- int count; \
139
+ size_t dim; \
140
+ size_t* shape; \
141
+ size_t* offset; \
142
+ int count; \
143
143
  STORAGE* src;
144
144
 
145
145
  #define NM_DEF_STORAGE_CHILD_STRUCT_PRE(name) struct name : STORAGE {
@@ -312,6 +312,7 @@ NM_DEF_STRUCT_POST(NMATRIX); // };
312
312
  #define NM_SHAPE(val,i) (NM_STORAGE(val)->shape[(i)])
313
313
  #define NM_SHAPE0(val) (NM_STORAGE(val)->shape[0])
314
314
  #define NM_SHAPE1(val) (NM_STORAGE(val)->shape[1])
315
+ #define NM_DEFAULT_VAL(val) (NM_STORAGE_LIST(val)->default_val)
315
316
 
316
317
  #define NM_DENSE_COUNT(val) (storage_count_max_elements(NM_STORAGE_DENSE(val)))
317
318
  #define NM_SIZEOF_DTYPE(val) (DTYPE_SIZES[NM_DTYPE(val)])
@@ -355,6 +356,12 @@ extern "C" {
355
356
  VALUE rb_nvector_dense_create(NM_DECL_ENUM(dtype_t, dtype), void* elements, size_t length);
356
357
 
357
358
  NM_DECL_ENUM(dtype_t, nm_dtype_guess(VALUE)); // (This is a function)
359
+ NM_DECL_ENUM(dtype_t, nm_dtype_min(VALUE));
360
+
361
+ // Non-API functions needed by other cpp files.
362
+ NMATRIX* nm_create(nm::stype_t stype, STORAGE* storage);
363
+ void nm_delete(NMATRIX* mat);
364
+ void nm_delete_ref(NMATRIX* mat);
358
365
 
359
366
  #ifdef __cplusplus
360
367
  }
@@ -52,19 +52,26 @@
52
52
  */
53
53
 
54
54
  extern "C" {
55
- /*
56
- * Calculate the number of elements in the dense storage structure, based on
57
- * shape and dim.
58
- */
59
- size_t nm_storage_count_max_elements(const STORAGE* storage) {
60
- unsigned int i;
61
- size_t count = 1;
62
-
63
- for (i = storage->dim; i-- > 0;) {
64
- count *= storage->shape[i];
55
+ /*
56
+ * Calculate the number of elements in the dense storage structure, based on
57
+ * shape and dim.
58
+ */
59
+ size_t nm_storage_count_max_elements(const STORAGE* storage) {
60
+ unsigned int i;
61
+ size_t count = 1;
62
+
63
+ for (i = storage->dim; i-- > 0;) {
64
+ count *= storage->shape[i];
65
+ }
66
+
67
+ return count;
68
+ }
69
+
70
+ // Helper function used only for the RETURN_SIZED_ENUMERATOR macro. Returns the length of
71
+ // the matrix's storage.
72
+ VALUE nm_enumerator_length(VALUE nmatrix) {
73
+ long len = nm_storage_count_max_elements(NM_STORAGE_DENSE(nmatrix));
74
+ return LONG2NUM(len);
65
75
  }
66
-
67
- return count;
68
- }
69
76
 
70
77
  } // end of extern "C" block
@@ -32,6 +32,8 @@
32
32
  * Standard Includes
33
33
  */
34
34
 
35
+ #include <cmath> // pow().
36
+
35
37
  /*
36
38
  * Project Includes
37
39
  */
@@ -41,7 +43,7 @@
41
43
  /*
42
44
  * Macros
43
45
  */
44
-
46
+
45
47
  extern "C" {
46
48
 
47
49
  /*
@@ -69,6 +71,7 @@ struct SLICE {
69
71
  */
70
72
 
71
73
  size_t nm_storage_count_max_elements(const STORAGE* storage);
74
+ VALUE nm_enumerator_length(VALUE nmatrix);
72
75
 
73
76
  } // end of extern "C" block
74
77
 
@@ -78,19 +81,22 @@ namespace nm {
78
81
  * Templated helper function for element-wise operations, used by dense, yale, and list.
79
82
  */
80
83
  template <ewop_t op, typename LDType, typename RDType>
81
- inline LDType ew_op_switch(LDType left, RDType right) {
84
+ inline VALUE ew_op_switch(LDType left, RDType right) {
82
85
  switch (op) {
83
86
  case EW_ADD:
84
- return left + right;
87
+ return RubyObject(left + right).rval;
85
88
 
86
89
  case EW_SUB:
87
- return left - right;
90
+ return RubyObject(left - right).rval;
88
91
 
89
92
  case EW_MUL:
90
- return left * right;
93
+ return RubyObject(left * right).rval;
91
94
 
92
95
  case EW_DIV:
93
- return left / right;
96
+ return RubyObject(left / right).rval;
97
+
98
+ case EW_POW:
99
+ return RubyObject(pow(left, right)).rval;
94
100
 
95
101
  case EW_MOD:
96
102
  rb_raise(rb_eNotImpError, "Element-wise modulo is currently not supported.");
@@ -99,11 +105,11 @@ namespace nm {
99
105
  default:
100
106
  rb_raise(rb_eStandardError, "This should not happen.");
101
107
  }
102
- return 0;
108
+ return Qnil;
103
109
  }
104
110
 
105
111
  #define EWOP_INT_INT_DIV(ltype, rtype) template <> \
106
- inline ltype ew_op_switch<EW_DIV>( ltype left, rtype right) { \
112
+ inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
107
113
  if (right == 0) rb_raise(rb_eZeroDivError, "cannot divide type by 0, would throw SIGFPE"); \
108
114
  if ((left > 0 && right > 0) || (left < 0 && right < 0)) \
109
115
  return left / right; \
@@ -112,27 +118,27 @@ namespace nm {
112
118
  }
113
119
 
114
120
  #define EWOP_UINT_UINT_DIV(ltype, rtype) template <> \
115
- inline ltype ew_op_switch<EW_DIV>( ltype left, rtype right) { \
121
+ inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
116
122
  if (right == 0) rb_raise(rb_eZeroDivError, "cannot divide type by 0, would throw SIGFPE"); \
117
123
  return left / right; \
118
124
  }
119
125
 
120
126
  #define EWOP_INT_UINT_DIV(ltype, rtype) template <> \
121
- inline ltype ew_op_switch<EW_DIV>( ltype left, rtype right) { \
127
+ inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
122
128
  if (right == 0) rb_raise(rb_eZeroDivError, "cannot divide type by 0, would throw SIGFPE"); \
123
129
  if (left > 0 ) return left / right; \
124
130
  else return ( ltype )(std::floor((double)(left) / (double)(right))); \
125
131
  }
126
132
 
127
133
  #define EWOP_UINT_INT_DIV(ltype, rtype) template <> \
128
- inline ltype ew_op_switch<EW_DIV>( ltype left, rtype right) { \
134
+ inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
129
135
  if (right == 0) rb_raise(rb_eZeroDivError, "cannot divide type by 0, would throw SIGFPE"); \
130
136
  if (right > 0) return left / right; \
131
137
  else return ( ltype )(std::floor((double)(left) / (double)(right))); \
132
138
  }
133
139
 
134
140
  #define EWOP_FLOAT_INT_DIV(ltype, rtype) template <> \
135
- inline ltype ew_op_switch<EW_DIV>( ltype left, rtype right) { \
141
+ inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
136
142
  return left / (ltype)(right); \
137
143
  }
138
144
 
@@ -60,12 +60,9 @@ namespace nm { namespace dense_storage {
60
60
 
61
61
  template <typename LDType, typename RDType>
62
62
  DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);
63
-
63
+
64
64
  template <typename LDType, typename RDType>
65
65
  bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right);
66
-
67
- template <ewop_t op, typename LDType, typename RDType>
68
- static DENSE_STORAGE* ew_op(const DENSE_STORAGE* left, const DENSE_STORAGE* right, const void* rscalar);
69
66
 
70
67
  template <typename DType>
71
68
  static DENSE_STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector);
@@ -132,7 +129,7 @@ DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t
132
129
 
133
130
  if (elements_length == count) {
134
131
  s->elements = elements;
135
-
132
+
136
133
  } else {
137
134
  s->elements = ALLOC_N(char, DTYPE_SIZES[dtype]*count);
138
135
 
@@ -145,12 +142,12 @@ DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t
145
142
  if (i + elements_length > count) {
146
143
  copy_length = count - i;
147
144
  }
148
-
145
+
149
146
  memcpy((char*)(s->elements)+i*DTYPE_SIZES[dtype], (char*)(elements)+(i % elements_length)*DTYPE_SIZES[dtype], copy_length*DTYPE_SIZES[dtype]);
150
147
  }
151
148
 
152
149
  // Get rid of the init_val.
153
- free(elements);
150
+ xfree(elements);
154
151
  }
155
152
  }
156
153
 
@@ -210,20 +207,85 @@ void nm_dense_storage_mark(void* storage_base) {
210
207
  ///////////////
211
208
 
212
209
 
213
- // Helper function used only for the RETURN_SIZED_ENUMERATOR macro. Returns the length of
214
- // the matrix's storage.
215
- static VALUE nm_dense_enumerator_length(VALUE nmatrix) {
216
- long len = nm_storage_count_max_elements(NM_STORAGE_DENSE(nmatrix));
217
- return LONG2NUM(len);
210
+
211
+ /*
212
+ * map_pair iterator for dense matrices (for element-wise operations)
213
+ */
214
+ VALUE nm_dense_map_pair(VALUE self, VALUE right) {
215
+ DENSE_STORAGE *s = NM_STORAGE_DENSE(self),
216
+ *t = NM_STORAGE_DENSE(right);
217
+
218
+ RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_enumerator_length);
219
+
220
+ size_t* coords = ALLOCA_N(size_t, s->dim);
221
+ memset(coords, 0, sizeof(size_t) * s->dim);
222
+
223
+ size_t *shape_copy = ALLOC_N(size_t, s->dim);
224
+ memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
225
+
226
+ size_t count = nm_storage_count_max_elements(s);
227
+
228
+ DENSE_STORAGE* result = nm_dense_storage_create(nm::RUBYOBJ, shape_copy, s->dim, NULL, 0);
229
+ VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);
230
+
231
+ for (size_t k = 0; k < count; ++k) {
232
+ nm_dense_storage_coords(result, k, coords);
233
+ size_t s_index = nm_dense_storage_pos(s, coords),
234
+ t_index = nm_dense_storage_pos(t, coords);
235
+
236
+ VALUE sval = NM_DTYPE(self) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(s->elements)[s_index] : rubyobj_from_cval((char*)(s->elements) + s_index*DTYPE_SIZES[NM_DTYPE(self)], NM_DTYPE(self)).rval;
237
+ VALUE tval = NM_DTYPE(right) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(t->elements)[t_index] : rubyobj_from_cval((char*)(t->elements) + t_index*DTYPE_SIZES[NM_DTYPE(right)], NM_DTYPE(right)).rval;
238
+
239
+ result_elem[k] = rb_yield_values(2, sval, tval);
240
+ }
241
+
242
+ NMATRIX* m = nm_create(nm::DENSE_STORE, reinterpret_cast<STORAGE*>(result));
243
+
244
+ return Data_Wrap_Struct(CLASS_OF(self), nm_dense_storage_mark, nm_delete, m);
245
+ }
246
+
247
+
248
+ /*
249
+ * map enumerator for dense matrices.
250
+ */
251
+ VALUE nm_dense_map(VALUE self) {
252
+ DENSE_STORAGE *s = NM_STORAGE_DENSE(self);
253
+
254
+ RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_enumerator_length);
255
+
256
+ size_t* coords = ALLOCA_N(size_t, s->dim);
257
+ memset(coords, 0, sizeof(size_t) * s->dim);
258
+
259
+ size_t *shape_copy = ALLOC_N(size_t, s->dim);
260
+ memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
261
+
262
+ size_t count = nm_storage_count_max_elements(s);
263
+
264
+ DENSE_STORAGE* result = nm_dense_storage_create(nm::RUBYOBJ, shape_copy, s->dim, NULL, 0);
265
+ VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);
266
+
267
+ for (size_t k = 0; k < count; ++k) {
268
+ nm_dense_storage_coords(result, k, coords);
269
+ size_t s_index = nm_dense_storage_pos(s, coords);
270
+
271
+ result_elem[k] = rb_yield(NM_DTYPE(self) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(s->elements)[s_index] : rubyobj_from_cval((char*)(s->elements) + s_index*DTYPE_SIZES[NM_DTYPE(self)], NM_DTYPE(self)).rval);
272
+ }
273
+
274
+ NMATRIX* m = nm_create(nm::DENSE_STORE, reinterpret_cast<STORAGE*>(result));
275
+
276
+ return Data_Wrap_Struct(CLASS_OF(self), nm_dense_storage_mark, nm_delete, m);
218
277
  }
219
278
 
220
279
 
280
+ /*
281
+ * each_with_indices iterator for dense matrices.
282
+ */
221
283
  VALUE nm_dense_each_with_indices(VALUE nmatrix) {
222
284
  volatile VALUE nm = nmatrix;
223
285
 
224
286
  DENSE_STORAGE* s = NM_STORAGE_DENSE(nm);
225
287
 
226
- RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_dense_enumerator_length); // fourth argument only used by Ruby2+
288
+ RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_enumerator_length); // fourth argument only used by Ruby2+
227
289
 
228
290
  // Create indices and initialize them to zero
229
291
  size_t* coords = ALLOCA_N(size_t, s->dim);
@@ -269,7 +331,7 @@ VALUE nm_dense_each(VALUE nmatrix) {
269
331
  volatile VALUE nm = nmatrix; // Not sure this actually does anything.
270
332
  DENSE_STORAGE* s = NM_STORAGE_DENSE(nm);
271
333
 
272
- RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_storage_count_max_elements(s));
334
+ RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_enumerator_length);
273
335
 
274
336
  size_t* temp_coords = ALLOCA_N(size_t, s->dim);
275
337
  size_t sliced_index;
@@ -280,10 +342,11 @@ VALUE nm_dense_each(VALUE nmatrix) {
280
342
  if (NM_DTYPE(nm) == nm::RUBYOBJ) {
281
343
 
282
344
  // matrix of Ruby objects -- yield those objects directly
283
- for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i)
345
+ for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i) {
284
346
  nm_dense_storage_coords(sliced_dummy, i, temp_coords);
285
347
  sliced_index = nm_dense_storage_pos(s, temp_coords);
286
348
  rb_yield( reinterpret_cast<VALUE*>(s->elements)[sliced_index] );
349
+ }
287
350
 
288
351
  } else {
289
352
 
@@ -300,7 +363,7 @@ VALUE nm_dense_each(VALUE nmatrix) {
300
363
  nm_dense_storage_delete(sliced_dummy);
301
364
 
302
365
  return nmatrix;
303
-
366
+
304
367
  }
305
368
 
306
369
 
@@ -321,13 +384,13 @@ void* nm_dense_storage_get(STORAGE* storage, SLICE* slice) {
321
384
  shape[i] = slice->lengths[i];
322
385
  }
323
386
 
324
- ns = nm_dense_storage_create(s->dtype, shape, s->dim, NULL, 0);
387
+ ns = nm_dense_storage_create(s->dtype, shape, s->dim, NULL, 0);
325
388
 
326
- slice_copy(ns,
327
- reinterpret_cast<const DENSE_STORAGE*>(s->src),
328
- slice->lengths,
329
- 0,
330
- nm_dense_storage_pos(s, slice->coords),
389
+ slice_copy(ns,
390
+ reinterpret_cast<const DENSE_STORAGE*>(s->src),
391
+ slice->lengths,
392
+ 0,
393
+ nm_dense_storage_pos(s, slice->coords),
331
394
  0);
332
395
  return ns;
333
396
  }
@@ -343,7 +406,7 @@ void* nm_dense_storage_ref(STORAGE* storage, SLICE* slice) {
343
406
 
344
407
  if (slice->single)
345
408
  return (char*)(s->elements) + nm_dense_storage_pos(s, slice->coords) * DTYPE_SIZES[s->dtype];
346
-
409
+
347
410
  else {
348
411
  DENSE_STORAGE* ns = ALLOC( DENSE_STORAGE );
349
412
  ns->dim = s->dim;
@@ -358,7 +421,7 @@ void* nm_dense_storage_ref(STORAGE* storage, SLICE* slice) {
358
421
 
359
422
  ns->stride = s->stride;
360
423
  ns->elements = s->elements;
361
-
424
+
362
425
  s->src->count++;
363
426
  ns->src = s->src;
364
427
 
@@ -387,8 +450,13 @@ void nm_dense_storage_set(STORAGE* storage, SLICE* slice, void* val) {
387
450
  * have the same dtype.
388
451
  */
389
452
  bool nm_dense_storage_eqeq(const STORAGE* left, const STORAGE* right) {
390
- LR_DTYPE_TEMPLATE_TABLE(nm::dense_storage::eqeq, bool, const DENSE_STORAGE*, const DENSE_STORAGE*);
391
-
453
+ LR_DTYPE_TEMPLATE_TABLE(nm::dense_storage::eqeq, bool, const DENSE_STORAGE*, const DENSE_STORAGE*)
454
+
455
+ if (!ttable[left->dtype][right->dtype]) {
456
+ rb_raise(nm_eDataTypeError, "comparison between these dtypes is undefined");
457
+ return false;
458
+ }
459
+
392
460
  return ttable[left->dtype][right->dtype]((const DENSE_STORAGE*)left, (const DENSE_STORAGE*)right);
393
461
  }
394
462
 
@@ -399,10 +467,10 @@ bool nm_dense_storage_eqeq(const STORAGE* left, const STORAGE* right) {
399
467
  bool nm_dense_storage_is_hermitian(const DENSE_STORAGE* mat, int lda) {
400
468
  if (mat->dtype == nm::COMPLEX64) {
401
469
  return nm::dense_storage::is_hermitian<nm::Complex64>(mat, lda);
402
-
470
+
403
471
  } else if (mat->dtype == nm::COMPLEX128) {
404
472
  return nm::dense_storage::is_hermitian<nm::Complex128>(mat, lda);
405
-
473
+
406
474
  } else {
407
475
  return nm_dense_storage_is_symmetric(mat, lda);
408
476
  }
@@ -413,7 +481,7 @@ bool nm_dense_storage_is_hermitian(const DENSE_STORAGE* mat, int lda) {
413
481
  */
414
482
  bool nm_dense_storage_is_symmetric(const DENSE_STORAGE* mat, int lda) {
415
483
  DTYPE_TEMPLATE_TABLE(nm::dense_storage::is_symmetric, bool, const DENSE_STORAGE*, int);
416
-
484
+
417
485
  return ttable[mat->dtype](mat, lda);
418
486
  }
419
487
 
@@ -421,24 +489,6 @@ bool nm_dense_storage_is_symmetric(const DENSE_STORAGE* mat, int lda) {
421
489
  // Math //
422
490
  //////////
423
491
 
424
- /*
425
- * Dense matrix-matrix and matrix-scalar element-wise operations.
426
- *
427
- * right or rscalar should be NULL; they should not both be initialized. If right is NULL, it'll use the scalar value instead.
428
- */
429
- STORAGE* nm_dense_storage_ew_op(nm::ewop_t op, const STORAGE* left, const STORAGE* right, VALUE scalar) {
430
- OP_LR_DTYPE_TEMPLATE_TABLE(nm::dense_storage::ew_op, DENSE_STORAGE*, const DENSE_STORAGE* left, const DENSE_STORAGE* right, const void*);
431
-
432
- if (right)
433
- return ttable[op][left->dtype][right->dtype](reinterpret_cast<const DENSE_STORAGE*>(left), reinterpret_cast<const DENSE_STORAGE*>(right), NULL);
434
- else {
435
- nm::dtype_t r_dtype = nm_dtype_guess(scalar);
436
- void* r_scalar = ALLOCA_N(char, DTYPE_SIZES[r_dtype]);
437
- rubyval_to_cval(scalar, r_dtype, r_scalar);
438
-
439
- return ttable[op][left->dtype][r_dtype](reinterpret_cast<const DENSE_STORAGE*>(left), NULL, r_scalar);
440
- }
441
- }
442
492
 
443
493
  /*
444
494
  * Dense matrix-matrix multiplication.
@@ -468,9 +518,9 @@ size_t nm_dense_storage_pos(const DENSE_STORAGE* s, const size_t* coords) {
468
518
  }
469
519
 
470
520
  /*
471
- * Determine the a set of slice coordinates from linear array position (in elements
521
+ * Determine the a set of slice coordinates from linear array position (in elements
472
522
  * of s) of some set of coordinates (given by slice). (Inverse of
473
- * nm_dense_storage_pos).
523
+ * nm_dense_storage_pos).
474
524
  *
475
525
  * The parameter coords_out should be a pre-allocated array of size equal to s->dim.
476
526
  */
@@ -510,7 +560,7 @@ static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* le
510
560
  for (size_t i = 0; i < lengths[n]; ++i) {
511
561
  slice_copy(dest, src, lengths,
512
562
  pdest + dest->stride[n]*i,
513
- psrc + src->stride[n]*i,
563
+ psrc + src->stride[n]*i,
514
564
  n + 1);
515
565
  }
516
566
  } else {
@@ -528,9 +578,14 @@ static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* le
528
578
  /*
529
579
  * Copy dense storage, changing dtype if necessary.
530
580
  */
531
- STORAGE* nm_dense_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype) {
581
+ STORAGE* nm_dense_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype, void* dummy) {
532
582
  NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::cast_copy, DENSE_STORAGE*, const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);
533
583
 
584
+ if (!ttable[new_dtype][rhs->dtype]) {
585
+ rb_raise(nm_eDataTypeError, "cast between these dtypes is undefined");
586
+ return NULL;
587
+ }
588
+
534
589
  return (STORAGE*)ttable[new_dtype][rhs->dtype]((DENSE_STORAGE*)rhs, new_dtype);
535
590
  }
536
591
 
@@ -538,7 +593,7 @@ STORAGE* nm_dense_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype) {
538
593
  * Copy dense storage without a change in dtype.
539
594
  */
540
595
  DENSE_STORAGE* nm_dense_storage_copy(const DENSE_STORAGE* rhs) {
541
- size_t count = 0;
596
+ size_t count = 0;
542
597
  size_t *shape = ALLOC_N(size_t, rhs->dim);
543
598
 
544
599
  // copy shape and offset
@@ -593,6 +648,10 @@ STORAGE* nm_dense_storage_copy_transposed(const STORAGE* rhs_base) {
593
648
  nm_math_transpose_generic(rhs->shape[0], rhs->shape[1], rhs->elements, rhs->shape[1], lhs->elements, lhs->shape[1], DTYPE_SIZES[rhs->dtype]);
594
649
  } else {
595
650
  NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::ref_slice_copy_transposed, void, const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs);
651
+
652
+ if (!ttable[lhs->dtype][rhs->dtype])
653
+ rb_raise(nm_eDataTypeError, "transposition between these dtypes is undefined");
654
+
596
655
  ttable[lhs->dtype][rhs->dtype](rhs, lhs);
597
656
  }
598
657
 
@@ -640,8 +699,7 @@ DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, dtype_t new_dtype) {
640
699
 
641
700
  // Ensure that allocation worked before copying.
642
701
  if (lhs && count) {
643
- if (rhs->src != rhs) {
644
- /* Make a copy of a ref to a matrix. */
702
+ if (rhs->src != rhs) { // Make a copy of a ref to a matrix.
645
703
 
646
704
  DENSE_STORAGE* tmp = nm_dense_storage_copy(rhs);
647
705
 
@@ -650,13 +708,12 @@ DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, dtype_t new_dtype) {
650
708
  lhs_els[count] = tmp_els[count];
651
709
  }
652
710
  nm_dense_storage_delete(tmp);
653
- } else {
654
- /* Make a regular copy. */
655
711
 
712
+ } else { // Make a regular copy.
656
713
  while (count-- > 0) lhs_els[count] = rhs_els[count];
657
714
  }
658
715
  }
659
-
716
+
660
717
  return lhs;
661
718
  }
662
719
 
@@ -673,7 +730,7 @@ bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right) {
673
730
  LDType* left_elements = (LDType*)left->elements;
674
731
  RDType* right_elements = (RDType*)right->elements;
675
732
 
676
- // Copy elements in temp matrix if you have refernce to the right.
733
+ // Copy elements in temp matrix if you have reference to the right.
677
734
  if (left->src != left) {
678
735
  tmp1 = nm_dense_storage_copy(left);
679
736
  left_elements = (LDType*)tmp1->elements;
@@ -682,7 +739,7 @@ bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right) {
682
739
  tmp2 = nm_dense_storage_copy(right);
683
740
  right_elements = (RDType*)tmp2->elements;
684
741
  }
685
-
742
+
686
743
 
687
744
 
688
745
  for (index = nm_storage_count_max_elements(left); index-- > 0;) {
@@ -704,20 +761,20 @@ template <typename DType>
704
761
  bool is_hermitian(const DENSE_STORAGE* mat, int lda) {
705
762
  unsigned int i, j;
706
763
  register DType complex_conj;
707
-
764
+
708
765
  const DType* els = (DType*) mat->elements;
709
-
766
+
710
767
  for (i = mat->shape[0]; i-- > 0;) {
711
768
  for (j = i + 1; j < mat->shape[1]; ++j) {
712
769
  complex_conj = els[j*lda + 1];
713
770
  complex_conj.i = -complex_conj.i;
714
-
771
+
715
772
  if (els[i*lda+j] != complex_conj) {
716
773
  return false;
717
774
  }
718
775
  }
719
776
  }
720
-
777
+
721
778
  return true;
722
779
  }
723
780
 
@@ -725,7 +782,7 @@ template <typename DType>
725
782
  bool is_symmetric(const DENSE_STORAGE* mat, int lda) {
726
783
  unsigned int i, j;
727
784
  const DType* els = (DType*) mat->elements;
728
-
785
+
729
786
  for (i = mat->shape[0]; i-- > 0;) {
730
787
  for (j = i + 1; j < mat->shape[1]; ++j) {
731
788
  if (els[i*lda+j] != els[j*lda+i]) {
@@ -733,137 +790,10 @@ bool is_symmetric(const DENSE_STORAGE* mat, int lda) {
733
790
  }
734
791
  }
735
792
  }
736
-
793
+
737
794
  return true;
738
795
  }
739
796
 
740
- /*
741
- * Templated dense storage element-wise operations which return the same DType.
742
- */
743
- template <ewop_t op, typename LDType, typename RDType>
744
- static DENSE_STORAGE* ew_op(const DENSE_STORAGE* left, const DENSE_STORAGE* right, const void* rscalar) {
745
- unsigned int count;
746
- size_t l_count;
747
- size_t r_count;
748
-
749
- size_t* temp_coords = ALLOCA_N(size_t, left->dim);
750
-
751
- size_t* new_shape = ALLOC_N(size_t, left->dim);
752
- memcpy(new_shape, left->shape, sizeof(size_t) * left->dim);
753
-
754
- // Determine the return dtype. This depends on the type of operation we're doing. Usually, it's going to be
755
- // set by the left matrix, but for comparisons, we'll use BYTE (in lieu of boolean).
756
- dtype_t new_dtype = static_cast<uint8_t>(op) < NUM_NONCOMP_EWOPS ? left->dtype : BYTE;
757
-
758
- DENSE_STORAGE* result = nm_dense_storage_create(new_dtype, new_shape, left->dim, NULL, 0);
759
-
760
- LDType* l_elems = reinterpret_cast<LDType*>(left->elements);
761
-
762
- if (right) { // matrix-matrix operation
763
- RDType* r_elems = reinterpret_cast<RDType*>(right->elements);
764
-
765
- if (static_cast<uint8_t>(op) < NUM_NONCOMP_EWOPS) { // use left-dtype
766
-
767
- for (count = nm_storage_count_max_elements(result); count-- > 0;) {
768
- nm_dense_storage_coords(result, count, temp_coords);
769
- l_count = nm_dense_storage_pos(left, temp_coords);
770
- r_count = nm_dense_storage_pos(right, temp_coords);
771
-
772
- reinterpret_cast<LDType*>(result->elements)[count] = ew_op_switch<op,LDType,RDType>(l_elems[l_count], r_elems[r_count]);
773
- }
774
-
775
- } else { // new_dtype is BYTE: comparison operators
776
- uint8_t* res_elems = reinterpret_cast<uint8_t*>(result->elements);
777
-
778
- for (count = nm_storage_count_max_elements(result); count-- > 0;) {
779
- nm_dense_storage_coords(result, count, temp_coords);
780
- l_count = nm_dense_storage_pos(left, temp_coords);
781
- r_count = nm_dense_storage_pos(right, temp_coords);
782
-
783
- switch (op) {
784
- case EW_EQEQ:
785
- res_elems[count] = l_elems[l_count] == r_elems[r_count];
786
- break;
787
-
788
- case EW_NEQ:
789
- res_elems[count] = l_elems[l_count] != r_elems[r_count];
790
- break;
791
-
792
- case EW_LT:
793
- res_elems[count] = l_elems[l_count] < r_elems[r_count];
794
- break;
795
-
796
- case EW_GT:
797
- res_elems[count] = l_elems[l_count] > r_elems[r_count];
798
- break;
799
-
800
- case EW_LEQ:
801
- res_elems[count] = l_elems[l_count] <= r_elems[r_count];
802
- break;
803
-
804
- case EW_GEQ:
805
- res_elems[count] = l_elems[l_count] >= r_elems[r_count];
806
- break;
807
-
808
- default:
809
- rb_raise(rb_eStandardError, "this should not happen");
810
- }
811
- }
812
- }
813
-
814
- } else { // matrix-scalar operation
815
- const RDType* r_elem = reinterpret_cast<const RDType*>(rscalar);
816
-
817
- if (static_cast<uint8_t>(op) < NUM_NONCOMP_EWOPS) { // use left-dtype
818
-
819
- for (count = nm_storage_count_max_elements(result); count-- > 0;) {
820
- nm_dense_storage_coords(result, count, temp_coords);
821
- l_count = nm_dense_storage_pos(left, temp_coords);
822
-
823
- reinterpret_cast<LDType*>(result->elements)[count] = ew_op_switch<op,LDType,RDType>(l_elems[l_count], *r_elem);
824
- }
825
-
826
- } else {
827
- uint8_t* res_elems = reinterpret_cast<uint8_t*>(result->elements);
828
-
829
- for (count = nm_storage_count_max_elements(result); count-- > 0;) {
830
- nm_dense_storage_coords(result, count, temp_coords);
831
- l_count = nm_dense_storage_pos(left, temp_coords);
832
-
833
- switch (op) {
834
- case EW_EQEQ:
835
- res_elems[count] = l_elems[l_count] == *r_elem;
836
- break;
837
-
838
- case EW_NEQ:
839
- res_elems[count] = l_elems[l_count] != *r_elem;
840
- break;
841
-
842
- case EW_LT:
843
- res_elems[count] = l_elems[l_count] < *r_elem;
844
- break;
845
-
846
- case EW_GT:
847
- res_elems[count] = l_elems[l_count] > *r_elem;
848
- break;
849
-
850
- case EW_LEQ:
851
- res_elems[count] = l_elems[l_count] <= *r_elem;
852
- break;
853
-
854
- case EW_GEQ:
855
- res_elems[count] = l_elems[l_count] >= *r_elem;
856
- break;
857
-
858
- default:
859
- rb_raise(rb_eStandardError, "this should not happen");
860
- }
861
- }
862
-
863
- }
864
- }
865
- return result;
866
- }
867
797
 
868
798
 
869
799
  /*