nmatrix 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/History.txt +102 -10
  3. data/README.rdoc +24 -32
  4. data/Rakefile +1 -1
  5. data/ext/nmatrix/data/complex.h +9 -0
  6. data/ext/nmatrix/data/data.cpp +78 -4
  7. data/ext/nmatrix/data/data.h +86 -54
  8. data/ext/nmatrix/data/rational.h +2 -0
  9. data/ext/nmatrix/data/ruby_object.h +38 -8
  10. data/ext/nmatrix/extconf.rb +13 -7
  11. data/ext/nmatrix/nmatrix.cpp +262 -139
  12. data/ext/nmatrix/nmatrix.h +11 -4
  13. data/ext/nmatrix/storage/common.cpp +20 -13
  14. data/ext/nmatrix/storage/common.h +18 -12
  15. data/ext/nmatrix/storage/dense.cpp +122 -192
  16. data/ext/nmatrix/storage/dense.h +4 -2
  17. data/ext/nmatrix/storage/list.cpp +467 -636
  18. data/ext/nmatrix/storage/list.h +6 -3
  19. data/ext/nmatrix/storage/storage.cpp +83 -46
  20. data/ext/nmatrix/storage/storage.h +7 -7
  21. data/ext/nmatrix/storage/yale.cpp +621 -361
  22. data/ext/nmatrix/storage/yale.h +21 -9
  23. data/ext/nmatrix/ttable_helper.rb +27 -31
  24. data/ext/nmatrix/types.h +1 -1
  25. data/ext/nmatrix/util/math.cpp +9 -10
  26. data/ext/nmatrix/util/sl_list.cpp +1 -7
  27. data/ext/nmatrix/util/sl_list.h +0 -118
  28. data/lib/nmatrix/blas.rb +59 -18
  29. data/lib/nmatrix/monkeys.rb +0 -52
  30. data/lib/nmatrix/nmatrix.rb +136 -9
  31. data/lib/nmatrix/nvector.rb +33 -0
  32. data/lib/nmatrix/shortcuts.rb +95 -16
  33. data/lib/nmatrix/version.rb +1 -1
  34. data/lib/nmatrix/yale_functions.rb +25 -19
  35. data/spec/blas_spec.rb +1 -19
  36. data/spec/elementwise_spec.rb +132 -17
  37. data/spec/lapack_spec.rb +0 -3
  38. data/spec/nmatrix_list_spec.rb +18 -0
  39. data/spec/nmatrix_spec.rb +44 -18
  40. data/spec/nmatrix_yale_spec.rb +1 -3
  41. data/spec/shortcuts_spec.rb +26 -36
  42. data/spec/slice_spec.rb +2 -4
  43. metadata +2 -2
@@ -82,7 +82,7 @@ extern "C" {
82
82
  // Accessors //
83
83
  ///////////////
84
84
 
85
- VALUE nm_list_each_stored_with_indices(VALUE nmatrix);
85
+ VALUE nm_list_each_with_indices(VALUE nmatrix, bool stored);
86
86
  void* nm_list_storage_ref(STORAGE* s, SLICE* slice);
87
87
  void* nm_list_storage_get(STORAGE* s, SLICE* slice);
88
88
  void* nm_list_storage_insert(STORAGE* s, SLICE* slice, void* val);
@@ -98,7 +98,6 @@ extern "C" {
98
98
  // Math //
99
99
  //////////
100
100
 
101
- STORAGE* nm_list_storage_ew_op(nm::ewop_t op, const STORAGE* left, const STORAGE* right, VALUE scalar);
102
101
  STORAGE* nm_list_storage_matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector);
103
102
 
104
103
 
@@ -122,9 +121,13 @@ extern "C" {
122
121
 
123
122
  LIST_STORAGE* nm_list_storage_copy(const LIST_STORAGE* rhs);
124
123
  STORAGE* nm_list_storage_copy_transposed(const STORAGE* rhs_base);
125
- STORAGE* nm_list_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype);
124
+ STORAGE* nm_list_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype, void*);
126
125
  VALUE nm_list_storage_to_hash(const LIST_STORAGE* s, const nm::dtype_t dtype);
127
126
 
127
+ // Exposed functions
128
+ VALUE nm_to_hash(VALUE self);
129
+ VALUE nm_list_map_merged_stored(VALUE left, VALUE right, VALUE init);
130
+ VALUE nm_list_default_value(VALUE self);
128
131
  } // end of extern "C" block
129
132
 
130
133
  #endif // LIST_H
@@ -161,7 +161,7 @@ DENSE_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype
161
161
  for (RIType j = 0; j < rhs->shape[1]; ++j) { // Move to next dense position.
162
162
 
163
163
  // Fill in zeros (except for diagonal)
164
- if (i == j) lhs_elements[pos] = rhs_a[i];
164
+ if (i == j) lhs_elements[pos] = static_cast<LDType>(rhs_a[i]);
165
165
  else lhs_elements[pos] = LCAST_ZERO;
166
166
 
167
167
  ++pos;
@@ -173,10 +173,10 @@ DENSE_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype
173
173
 
174
174
  for (size_t j = 0; j < rhs->shape[1]; ++j) {
175
175
  if (i == j) {
176
- lhs_elements[pos] = rhs_a[i];
176
+ lhs_elements[pos] = static_cast<LDType>(rhs_a[i]);
177
177
 
178
178
  } else if (j == jj) {
179
- lhs_elements[pos] = rhs_a[ija]; // Copy from rhs.
179
+ lhs_elements[pos] = static_cast<LDType>(rhs_a[ija]); // Copy from rhs.
180
180
 
181
181
  // Get next.
182
182
  ++ija;
@@ -214,14 +214,14 @@ static void cast_copy_list_contents(LDType* lhs, const LIST* rhs, RDType* defaul
214
214
 
215
215
  if (!curr || (curr->key > (size_t)(last_key+1))) {
216
216
 
217
- if (recursions == 0) lhs[pos] = *default_val;
217
+ if (recursions == 0) lhs[pos] = static_cast<LDType>(*default_val);
218
218
  else cast_copy_list_default<LDType,RDType>(lhs, default_val, pos, shape, dim, max_elements, recursions-1);
219
219
 
220
220
  ++last_key;
221
221
 
222
222
  } else {
223
223
 
224
- if (recursions == 0) lhs[pos] = *reinterpret_cast<RDType*>(curr->val);
224
+ if (recursions == 0) lhs[pos] = static_cast<LDType>(*reinterpret_cast<RDType*>(curr->val));
225
225
  else cast_copy_list_contents<LDType,RDType>(lhs, (const LIST*)(curr->val),
226
226
  default_val, pos, shape, dim, max_elements, recursions-1);
227
227
 
@@ -240,7 +240,7 @@ template <typename LDType,typename RDType>
240
240
  static void cast_copy_list_default(LDType* lhs, RDType* default_val, size_t& pos, const size_t* shape, size_t dim, size_t max_elements, size_t recursions) {
241
241
  for (size_t i = 0; i < shape[dim - 1 - recursions]; ++i, ++pos) {
242
242
 
243
- if (recursions == 0) lhs[pos] = *default_val;
243
+ if (recursions == 0) lhs[pos] = static_cast<LDType>(*default_val);
244
244
  else cast_copy_list_default<LDType,RDType>(lhs, default_val, pos, shape, dim, max_elements, recursions-1);
245
245
 
246
246
  }
@@ -261,7 +261,7 @@ static bool cast_copy_contents_dense(LIST* lhs, const RDType* rhs, RDType* zero,
261
261
  * Creation of list storage from dense storage.
262
262
  */
263
263
  template <typename LDType, typename RDType>
264
- LIST_STORAGE* create_from_dense_storage(const DENSE_STORAGE* rhs, dtype_t l_dtype) {
264
+ LIST_STORAGE* create_from_dense_storage(const DENSE_STORAGE* rhs, dtype_t l_dtype, void* init) {
265
265
 
266
266
  LDType* l_default_val = ALLOC_N(LDType, 1);
267
267
  RDType* r_default_val = ALLOCA_N(RDType, 1); // clean up when finished with this function
@@ -274,13 +274,16 @@ LIST_STORAGE* create_from_dense_storage(const DENSE_STORAGE* rhs, dtype_t l_dtyp
274
274
  memset(coords, 0, rhs->dim * sizeof(size_t));
275
275
 
276
276
  // set list default_val to 0
277
- if (l_dtype == RUBYOBJ) *l_default_val = INT2FIX(0);
278
- else *l_default_val = 0;
277
+ if (init) *l_default_val = *reinterpret_cast<LDType*>(init);
278
+ else {
279
+ if (l_dtype == RUBYOBJ) *l_default_val = INT2FIX(0);
280
+ else *l_default_val = 0;
281
+ }
279
282
 
280
283
  // need test default value for comparing to elements in dense matrix
281
- if (rhs->dtype == l_dtype) *r_default_val = *l_default_val;
282
- else if (rhs->dtype == RUBYOBJ) *r_default_val = INT2FIX(0);
283
- else *r_default_val = 0;
284
+ if (rhs->dtype == l_dtype || rhs->dtype != RUBYOBJ) *r_default_val = static_cast<RDType>(*l_default_val);
285
+ else *r_default_val = rubyobj_from_cval(l_default_val, l_dtype);
286
+
284
287
 
285
288
  LIST_STORAGE* lhs = nm_list_storage_create(l_dtype, shape, rhs->dim, l_default_val);
286
289
 
@@ -320,7 +323,7 @@ LIST_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype)
320
323
 
321
324
  // copy default value from the zero location in the Yale matrix
322
325
  LDType* default_val = ALLOC_N(LDType, 1);
323
- *default_val = R_ZERO;
326
+ *default_val = static_cast<LDType>(R_ZERO);
324
327
 
325
328
  LIST_STORAGE* lhs = nm_list_storage_create(l_dtype, shape, rhs->dim, default_val);
326
329
 
@@ -353,8 +356,8 @@ LIST_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype)
353
356
  // Is there a nonzero diagonal item between the previously added item and the current one?
354
357
  if (jj > i && add_diag) {
355
358
  // Allocate and copy insertion value
356
- insert_val = ALLOC_N(LDType, 1);
357
- *insert_val = rhs_a[i];
359
+ insert_val = ALLOC_N(LDType, 1);
360
+ *insert_val = static_cast<LDType>(rhs_a[i]);
358
361
 
359
362
  // insert the item in the list at the appropriate location
360
363
  if (last_added) last_added = list::insert_after(last_added, i, insert_val);
@@ -366,7 +369,7 @@ LIST_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype)
366
369
 
367
370
  // now allocate and add the current item
368
371
  insert_val = ALLOC_N(LDType, 1);
369
- *insert_val = rhs_a[ija];
372
+ *insert_val = static_cast<LDType>(rhs_a[ija]);
370
373
 
371
374
  if (last_added) last_added = list::insert_after(last_added, jj, insert_val);
372
375
  else last_added = list::insert(curr_row, false, jj, insert_val);
@@ -376,8 +379,8 @@ LIST_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype)
376
379
 
377
380
  if (add_diag) {
378
381
  // still haven't added the diagonal.
379
- insert_val = ALLOC_N(LDType, 1);
380
- *insert_val = rhs_a[i];
382
+ insert_val = ALLOC_N(LDType, 1);
383
+ *insert_val = static_cast<LDType>(rhs_a[i]);
381
384
 
382
385
  // insert the item in the list at the appropriate location
383
386
  if (last_added) last_added = list::insert_after(last_added, i, insert_val);
@@ -417,7 +420,7 @@ static bool cast_copy_contents_dense(LIST* lhs, const RDType* rhs, RDType* zero,
417
420
 
418
421
  // Create a copy of our value that we will insert in the list
419
422
  LDType* insert_value = ALLOC_N(LDType, 1);
420
- *insert_value = (LDType)(rhs[pos]);
423
+ *insert_value = static_cast<LDType>(rhs[pos]);
421
424
 
422
425
  if (!lhs->first) prev = list::insert(lhs, false, coords[dim-1-recursions], insert_value);
423
426
  else prev = list::insert_after(prev, coords[dim-1-recursions], insert_value);
@@ -454,16 +457,19 @@ namespace yale_storage { // FIXME: Move to yale.cpp
454
457
  * Creation of yale storage from dense storage.
455
458
  */
456
459
  template <typename LDType, typename RDType, typename LIType>
457
- YALE_STORAGE* create_from_dense_storage(const DENSE_STORAGE* rhs, dtype_t l_dtype) {
460
+ YALE_STORAGE* create_from_dense_storage(const DENSE_STORAGE* rhs, dtype_t l_dtype, void* init) {
458
461
  if (rhs->dim != 2) rb_raise(nm_eStorageTypeError, "can only convert matrices of dim 2 to yale");
459
462
 
460
463
  LIType pos = 0;
461
464
  LIType ndnz = 0;
462
465
 
463
- RDType R_ZERO; // need zero for easier comparisons
464
- if (rhs->dtype == RUBYOBJ) R_ZERO = INT2FIX(0);
465
- else R_ZERO = 0;
466
-
466
+ // We need a zero value. This should nearly always be zero, but sometimes you might want false or nil.
467
+ LDType L_INIT(0);
468
+ if (init) {
469
+ if (l_dtype == RUBYOBJ) L_INIT = *reinterpret_cast<VALUE*>(init);
470
+ else L_INIT = rubyobj_from_cval(init, rhs->dtype);
471
+ }
472
+ RDType R_INIT = static_cast<RDType>(L_INIT);
467
473
 
468
474
  RDType* rhs_elements = reinterpret_cast<RDType*>(rhs->elements);
469
475
 
@@ -471,7 +477,7 @@ namespace yale_storage { // FIXME: Move to yale.cpp
471
477
  for (size_t i = rhs->shape[0]; i-- > 0;) {
472
478
  for (size_t j = rhs->shape[1]; j-- > 0;) {
473
479
  pos = rhs->stride[0]*(i + rhs->offset[0]) + rhs->stride[1]*(j + rhs->offset[1]);
474
- if (i != j && rhs_elements[pos] != R_ZERO) ++ndnz;
480
+ if (i != j && rhs_elements[pos] != R_INIT) ++ndnz;
475
481
 
476
482
  // move forward 1 position in dense matrix elements array
477
483
  }
@@ -494,7 +500,7 @@ namespace yale_storage { // FIXME: Move to yale.cpp
494
500
  LIType* lhs_ija = reinterpret_cast<LIType*>(lhs->ija);
495
501
 
496
502
  // Set the zero position in the yale matrix
497
- lhs_a[shape[0]] = R_ZERO;
503
+ lhs_a[shape[0]] = L_INIT;
498
504
 
499
505
  // Start just after the zero position.
500
506
  LIType ija = shape[0]+1;
@@ -510,11 +516,10 @@ namespace yale_storage { // FIXME: Move to yale.cpp
510
516
  pos = rhs->stride[0]*(i + rhs->offset[0]) + rhs->stride[1]*(j + rhs->offset[1]); // calc position with offsets
511
517
 
512
518
  if (i == j) { // copy to diagonal
513
- lhs_a[i] = rhs_elements[pos];
514
- } else if (rhs_elements[pos] != R_ZERO) { // copy nonzero to LU
519
+ lhs_a[i] = static_cast<LDType>(rhs_elements[pos]);
520
+ } else if (rhs_elements[pos] != R_INIT) { // copy nonzero to LU
515
521
  lhs_ija[ija] = j; // write column index
516
-
517
- lhs_a[ija] = rhs_elements[pos];
522
+ lhs_a[ija] = static_cast<LDType>(rhs_elements[pos]);
518
523
 
519
524
  ++ija;
520
525
  }
@@ -534,9 +539,12 @@ namespace yale_storage { // FIXME: Move to yale.cpp
534
539
  YALE_STORAGE* create_from_list_storage(const LIST_STORAGE* rhs, nm::dtype_t l_dtype) {
535
540
  if (rhs->dim != 2) rb_raise(nm_eStorageTypeError, "can only convert matrices of dim 2 to yale");
536
541
 
537
- if ((rhs->dtype == RUBYOBJ and (*reinterpret_cast<RubyObject*>(rhs->default_val)) == RubyObject(INT2FIX(0)))
538
- || strncmp(reinterpret_cast<const char*>(rhs->default_val), "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", DTYPE_SIZES[rhs->dtype]))
539
- rb_raise(nm_eStorageTypeError, "list matrix must have default value of 0 to convert to yale");
542
+ if (rhs->dtype == RUBYOBJ) {
543
+ VALUE init_val = *reinterpret_cast<VALUE*>(rhs->default_val);
544
+ if (rb_funcall(init_val, rb_intern("!="), 1, Qnil) == Qtrue && rb_funcall(init_val, rb_intern("!="), 1, Qfalse) == Qtrue && rb_funcall(init_val, rb_intern("!="), 1, INT2FIX(0)) == Qtrue)
545
+ rb_raise(nm_eStorageTypeError, "list matrix of Ruby objects must have default value equal to 0, nil, or false to convert to yale");
546
+ } else if (strncmp(reinterpret_cast<const char*>(rhs->default_val), "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", DTYPE_SIZES[rhs->dtype]))
547
+ rb_raise(nm_eStorageTypeError, "list matrix of non-Ruby objects must have default value of 0 to convert to yale");
540
548
 
541
549
 
542
550
  size_t ndnz = nm_list_storage_count_nd_elements(rhs);
@@ -552,7 +560,7 @@ namespace yale_storage { // FIXME: Move to yale.cpp
552
560
  rb_raise(nm_eStorageTypeError, "conversion failed; capacity of %ld requested, max allowable is %ld", (unsigned long)request_capacity, (unsigned long)(lhs->capacity));
553
561
 
554
562
  // Initialize the A and IJA arrays
555
- init<LDType,LIType>(lhs);
563
+ init<LDType,LIType>(lhs, rhs->default_val);
556
564
 
557
565
  LIType* lhs_ija = reinterpret_cast<LIType*>(lhs->ija);
558
566
  LDType* lhs_a = reinterpret_cast<LDType*>(lhs->a);
@@ -602,7 +610,6 @@ namespace yale_storage { // FIXME: Move to yale.cpp
602
610
 
603
611
  extern "C" {
604
612
 
605
-
606
613
  /*
607
614
  * The following functions represent stype casts -- conversions from one
608
615
  * stype to another. Each of these is the C accessor for a templated C++
@@ -610,47 +617,77 @@ extern "C" {
610
617
  */
611
618
 
612
619
 
613
-
614
- STORAGE* nm_yale_storage_from_dense(const STORAGE* right, nm::dtype_t l_dtype) {
615
- NAMED_LRI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::create_from_dense_storage, YALE_STORAGE*, const DENSE_STORAGE* rhs, nm::dtype_t l_dtype);
620
+ STORAGE* nm_yale_storage_from_dense(const STORAGE* right, nm::dtype_t l_dtype, void* init) {
621
+ NAMED_LRI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::create_from_dense_storage, YALE_STORAGE*, const DENSE_STORAGE* rhs, nm::dtype_t l_dtype, void*);
616
622
 
617
623
  nm::itype_t itype = nm_yale_storage_default_itype((const YALE_STORAGE*)right);
618
624
 
619
- return (STORAGE*)ttable[l_dtype][right->dtype][itype]((const DENSE_STORAGE*)right, l_dtype);
625
+ if (!ttable[l_dtype][right->dtype][itype]) {
626
+ rb_raise(nm_eDataTypeError, "casting between these dtypes is undefined");
627
+ return NULL;
628
+ }
629
+
630
+ return (STORAGE*)ttable[l_dtype][right->dtype][itype]((const DENSE_STORAGE*)right, l_dtype, init);
620
631
  }
621
632
 
622
- STORAGE* nm_yale_storage_from_list(const STORAGE* right, nm::dtype_t l_dtype) {
633
+ STORAGE* nm_yale_storage_from_list(const STORAGE* right, nm::dtype_t l_dtype, void* dummy) {
623
634
  NAMED_LRI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::create_from_list_storage, YALE_STORAGE*, const LIST_STORAGE* rhs, nm::dtype_t l_dtype);
624
635
 
625
636
  nm::itype_t itype = nm_yale_storage_default_itype((const YALE_STORAGE*)right);
626
637
 
638
+ if (!ttable[l_dtype][right->dtype][itype]) {
639
+ rb_raise(nm_eDataTypeError, "casting between these dtypes is undefined");
640
+ return NULL;
641
+ }
642
+
627
643
  return (STORAGE*)ttable[l_dtype][right->dtype][itype]((const LIST_STORAGE*)right, l_dtype);
628
644
  }
629
645
 
630
- STORAGE* nm_dense_storage_from_list(const STORAGE* right, nm::dtype_t l_dtype) {
646
+ STORAGE* nm_dense_storage_from_list(const STORAGE* right, nm::dtype_t l_dtype, void* dummy) {
631
647
  NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::create_from_list_storage, DENSE_STORAGE*, const LIST_STORAGE* rhs, nm::dtype_t l_dtype);
632
648
 
649
+ if (!ttable[l_dtype][right->dtype]) {
650
+ rb_raise(nm_eDataTypeError, "casting between these dtypes is undefined");
651
+ return NULL;
652
+ }
653
+
633
654
  return (STORAGE*)ttable[l_dtype][right->dtype]((const LIST_STORAGE*)right, l_dtype);
634
655
  }
635
656
 
636
- STORAGE* nm_dense_storage_from_yale(const STORAGE* right, nm::dtype_t l_dtype) {
657
+ STORAGE* nm_dense_storage_from_yale(const STORAGE* right, nm::dtype_t l_dtype, void* dummy) {
637
658
  NAMED_LRI_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::create_from_yale_storage, DENSE_STORAGE*, const YALE_STORAGE* rhs, nm::dtype_t l_dtype);
638
659
 
639
660
  const YALE_STORAGE* casted_right = reinterpret_cast<const YALE_STORAGE*>(right);
661
+
662
+ if (!ttable[l_dtype][right->dtype][casted_right->itype]) {
663
+ rb_raise(nm_eDataTypeError, "casting between these dtypes is undefined");
664
+ return NULL;
665
+ }
666
+
640
667
  return reinterpret_cast<STORAGE*>(ttable[l_dtype][right->dtype][casted_right->itype](casted_right, l_dtype));
641
668
  }
642
669
 
643
- STORAGE* nm_list_storage_from_dense(const STORAGE* right, nm::dtype_t l_dtype) {
644
- NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::list_storage::create_from_dense_storage, LIST_STORAGE*, const DENSE_STORAGE*, nm::dtype_t);
670
+ STORAGE* nm_list_storage_from_dense(const STORAGE* right, nm::dtype_t l_dtype, void* init) {
671
+ NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::list_storage::create_from_dense_storage, LIST_STORAGE*, const DENSE_STORAGE*, nm::dtype_t, void*);
672
+
673
+ if (!ttable[l_dtype][right->dtype]) {
674
+ rb_raise(nm_eDataTypeError, "casting between these dtypes is undefined");
675
+ return NULL;
676
+ }
645
677
 
646
- return (STORAGE*)ttable[l_dtype][right->dtype]((DENSE_STORAGE*)right, l_dtype);
678
+ return (STORAGE*)ttable[l_dtype][right->dtype]((DENSE_STORAGE*)right, l_dtype, init);
647
679
  }
648
680
 
649
- STORAGE* nm_list_storage_from_yale(const STORAGE* right, nm::dtype_t l_dtype) {
681
+ STORAGE* nm_list_storage_from_yale(const STORAGE* right, nm::dtype_t l_dtype, void* dummy) {
650
682
  NAMED_LRI_DTYPE_TEMPLATE_TABLE(ttable, nm::list_storage::create_from_yale_storage, LIST_STORAGE*, const YALE_STORAGE* rhs, nm::dtype_t l_dtype);
651
683
 
652
684
  const YALE_STORAGE* casted_right = reinterpret_cast<const YALE_STORAGE*>(right);
653
685
 
686
+ if (!ttable[l_dtype][right->dtype][casted_right->itype]) {
687
+ rb_raise(nm_eDataTypeError, "casting between these dtypes is undefined");
688
+ return NULL;
689
+ }
690
+
654
691
  return (STORAGE*)ttable[l_dtype][right->dtype][casted_right->itype](casted_right, l_dtype);
655
692
  }
656
693
 
@@ -34,7 +34,7 @@
34
34
  * Standard Includes
35
35
  */
36
36
 
37
- #include <stdlib.h>
37
+ #include <cstdlib>
38
38
 
39
39
  /*
40
40
  * Project Includes
@@ -86,12 +86,12 @@ extern "C" {
86
86
  // Copying and Casting //
87
87
  /////////////////////////
88
88
 
89
- STORAGE* nm_dense_storage_from_list(const STORAGE* right, nm::dtype_t l_dtype);
90
- STORAGE* nm_dense_storage_from_yale(const STORAGE* right, nm::dtype_t l_dtype);
91
- STORAGE* nm_list_storage_from_dense(const STORAGE* right, nm::dtype_t l_dtype);
92
- STORAGE* nm_list_storage_from_yale(const STORAGE* right, nm::dtype_t l_dtype);
93
- STORAGE* nm_yale_storage_from_list(const STORAGE* right, nm::dtype_t l_dtype);
94
- STORAGE* nm_yale_storage_from_dense(const STORAGE* right, nm::dtype_t l_dtype);
89
+ STORAGE* nm_dense_storage_from_list(const STORAGE* right, nm::dtype_t l_dtype, void*);
90
+ STORAGE* nm_dense_storage_from_yale(const STORAGE* right, nm::dtype_t l_dtype, void*);
91
+ STORAGE* nm_list_storage_from_dense(const STORAGE* right, nm::dtype_t l_dtype, void*);
92
+ STORAGE* nm_list_storage_from_yale(const STORAGE* right, nm::dtype_t l_dtype, void*);
93
+ STORAGE* nm_yale_storage_from_list(const STORAGE* right, nm::dtype_t l_dtype, void*);
94
+ STORAGE* nm_yale_storage_from_dense(const STORAGE* right, nm::dtype_t l_dtype, void*);
95
95
 
96
96
  } // end of extern "C" block
97
97
 
@@ -43,6 +43,11 @@
43
43
  #include <algorithm> // std::min
44
44
  #include <cstdio> // std::fprintf
45
45
  #include <iostream>
46
+ #include <array>
47
+
48
+ #define RB_P(OBJ) \
49
+ rb_funcall(rb_stderr, rb_intern("print"), 1, rb_funcall(OBJ, rb_intern("object_id"), 0)); \
50
+ rb_funcall(rb_stderr, rb_intern("puts"), 1, rb_funcall(OBJ, rb_intern("inspect"), 0));
46
51
 
47
52
  /*
48
53
  * Project Includes
@@ -81,6 +86,9 @@ extern "C" {
81
86
  static YALE_STORAGE* nm_copy_alloc_struct(const YALE_STORAGE* rhs, const nm::dtype_t new_dtype, const size_t new_capacity, const size_t new_size);
82
87
  static YALE_STORAGE* alloc(nm::dtype_t dtype, size_t* shape, size_t dim, nm::itype_t min_itype);
83
88
 
89
+ static void* default_value_ptr(const YALE_STORAGE* s);
90
+ static VALUE default_value(const YALE_STORAGE* s);
91
+
84
92
  /* Ruby-accessible functions */
85
93
  static VALUE nm_size(VALUE self);
86
94
  static VALUE nm_a(int argc, VALUE* argv, VALUE self);
@@ -91,7 +99,6 @@ extern "C" {
91
99
  static VALUE nm_ija(int argc, VALUE* argv, VALUE self);
92
100
 
93
101
  static VALUE nm_nd_row(int argc, VALUE* argv, VALUE self);
94
- static VALUE nm_vector_insert(int argc, VALUE* argv, VALUE self);
95
102
 
96
103
 
97
104
  } // end extern "C" block
@@ -107,6 +114,9 @@ static bool ndrow_eqeq_ndrow(const YALE_STORAGE* l, const YALE_STORAGE* r,
107
114
  template <typename LDType, typename RDType, typename IType>
108
115
  static bool eqeq(const YALE_STORAGE* left, const YALE_STORAGE* right);
109
116
 
117
+ template <typename LDType, typename RDType, typename IType>
118
+ static bool eqeq_different_defaults(const YALE_STORAGE* s, const LDType& s_init, const YALE_STORAGE* t, const RDType& t_init);
119
+
110
120
  template <typename IType>
111
121
  static YALE_STORAGE* copy_alloc_struct(const YALE_STORAGE* rhs, const dtype_t new_dtype, const size_t new_capacity, const size_t new_size);
112
122
 
@@ -127,8 +137,6 @@ static char vector_insert(YALE_STORAGE* s, size_t pos, size_t* j, void
127
137
  template <typename DType, typename IType>
128
138
  static char vector_insert_resize(YALE_STORAGE* s, size_t current_size, size_t pos, size_t* j, size_t n, bool struct_only);
129
139
 
130
- template <typename nm::ewop_t op, typename IType, typename DType>
131
- YALE_STORAGE* ew_op(const YALE_STORAGE* left, const YALE_STORAGE* right, dtype_t dtype);
132
140
 
133
141
  /*
134
142
  * Functions
@@ -239,7 +247,7 @@ YALE_STORAGE* create_from_old_yale(dtype_t dtype, size_t* shape, void* r_ia, voi
239
247
  * Uses the left as a template for the creation of a new one.
240
248
  */
241
249
  template <typename DType, typename IType>
242
- YALE_STORAGE* create_merged(const YALE_STORAGE* left, const YALE_STORAGE* right) {
250
+ YALE_STORAGE* create_merged__(const YALE_STORAGE* left, const YALE_STORAGE* right) {
243
251
  char ins_type;
244
252
 
245
253
  size_t size = get_size<IType>(left);
@@ -305,7 +313,7 @@ YALE_STORAGE* create_merged(const YALE_STORAGE* left, const YALE_STORAGE* right)
305
313
  * Called when most YALE_STORAGE objects are created.
306
314
  */
307
315
  template <typename DType, typename IType>
308
- void init(YALE_STORAGE* s) {
316
+ void init(YALE_STORAGE* s, void* init_val) {
309
317
  IType IA_INIT = s->shape[0] + 1;
310
318
 
311
319
  IType* ija = reinterpret_cast<IType*>(s->ija);
@@ -314,7 +322,7 @@ void init(YALE_STORAGE* s) {
314
322
  ija[i] = IA_INIT; // set initial values for IJA
315
323
  }
316
324
 
317
- clear_diagonal_and_zero<DType>(s);
325
+ clear_diagonal_and_zero<DType>(s, init_val);
318
326
  }
319
327
 
320
328
  size_t max_size(YALE_STORAGE* s) {
@@ -376,7 +384,7 @@ void* get(YALE_STORAGE* storage, SLICE* slice) {
376
384
  rb_raise(nm_eStorageTypeError, "conversion failed; capacity of %ld requested, max allowable is %ld", request_capacity, ns->capacity);
377
385
 
378
386
  // Initialize the A and IJA arrays
379
- init<DType,IType>(ns);
387
+ init<DType,IType>(ns, default_value_ptr(storage));
380
388
  IType* dst_ija = reinterpret_cast<IType*>(ns->ija);
381
389
  DType* dst_a = reinterpret_cast<DType*>(ns->a);
382
390
 
@@ -516,6 +524,13 @@ char set(YALE_STORAGE* storage, SLICE* slice, void* value) {
516
524
  */
517
525
  template <typename LDType, typename RDType, typename IType>
518
526
  static bool eqeq(const YALE_STORAGE* left, const YALE_STORAGE* right) {
527
+ LDType l_init = reinterpret_cast<LDType*>(left->a )[left->shape[0] ];
528
+ RDType r_init = reinterpret_cast<RDType*>(right->a)[right->shape[0]];
529
+
530
+ // If the defaults are different between the two matrices, or if slicing is involved, use this other function instead:
531
+ if (l_init != r_init || left->src != left || right->src != right)
532
+ return eqeq_different_defaults<LDType,RDType,IType>(left, l_init, right, r_init);
533
+
519
534
  LDType* la = reinterpret_cast<LDType*>(left->a);
520
535
  RDType* ra = reinterpret_cast<RDType*>(right->a);
521
536
 
@@ -555,6 +570,8 @@ static bool eqeq(const YALE_STORAGE* left, const YALE_STORAGE* right) {
555
570
  return true;
556
571
  }
557
572
 
573
+
574
+
558
575
  /*
559
576
  * Are two non-diagonal rows the same? We already know.
560
577
  */
@@ -573,6 +590,9 @@ static bool ndrow_eqeq_ndrow(const YALE_STORAGE* l, const YALE_STORAGE* r, IType
573
590
 
574
591
  IType ja = std::min(l_ja, r_ja);
575
592
 
593
+ LDType LZERO = la[l->shape[0]];
594
+ RDType RZERO = ra[r->shape[0]];
595
+
576
596
  while (!(l_no_more && r_no_more)) {
577
597
  if (l_ja == r_ja) {
578
598
 
@@ -599,7 +619,7 @@ static bool ndrow_eqeq_ndrow(const YALE_STORAGE* l, const YALE_STORAGE* r, IType
599
619
 
600
620
  } else if (l_no_more || ja < l_ja) {
601
621
 
602
- if (ra[r_ija] != 0) return false;
622
+ if (ra[r_ija] != RZERO) return false;
603
623
 
604
624
  ++r_ija;
605
625
  if (r_ija < r_ija_next) {
@@ -613,7 +633,7 @@ static bool ndrow_eqeq_ndrow(const YALE_STORAGE* l, const YALE_STORAGE* r, IType
613
633
 
614
634
  } else if (r_no_more || ja < r_ja) {
615
635
 
616
- if (la[l_ija] != 0) return false;
636
+ if (la[l_ija] != LZERO) return false;
617
637
 
618
638
  ++l_ija;
619
639
  if (l_ija < l_ija_next) {
@@ -658,243 +678,6 @@ static bool ndrow_is_empty(const YALE_STORAGE* s, IType ija, const IType ija_nex
658
678
  #define YALE_IJ(s) (reinterpret_cast<IType*>(s->ija) + s->shape[0] + 1)
659
679
  #define YALE_COUNT(yale) (yale->ndnz + yale->shape[0])
660
680
 
661
- template <typename nm::ewop_t op, typename IType, typename DType>
662
- YALE_STORAGE* ew_op(const YALE_STORAGE* left, const YALE_STORAGE* right, dtype_t dtype) {
663
- size_t init_capacity;
664
- size_t* new_shape;
665
-
666
- unsigned int da_index,
667
- la_index,
668
- ra_index,
669
-
670
- a_index_offset,
671
-
672
- la_row_max,
673
- ra_row_max,
674
-
675
- row_index;
676
-
677
- DType tmp_result;
678
-
679
- DType * la = reinterpret_cast<DType*> (left->a),
680
- * ra = reinterpret_cast<DType*>(right->a),
681
- * da;
682
-
683
- YALE_STORAGE* dest;
684
-
685
- new_shape = reinterpret_cast<size_t*>(ALLOC_N(size_t, 2));
686
- new_shape[0] = left->shape[0];
687
- new_shape[1] = left->shape[1];
688
-
689
- init_capacity = std::min(left->ndnz + right->ndnz + new_shape[0], new_shape[0] * new_shape[1]);
690
-
691
- dest = nm_yale_storage_create(dtype, new_shape, 2, init_capacity, left->itype);
692
- da = reinterpret_cast<DType*>(dest->a);
693
-
694
- // Calculate diagonal values.
695
- for (da_index = 0; da_index < dest->shape[0]; ++da_index) {
696
- da[da_index] = ew_op_switch<op, DType, DType>(la[da_index], ra[da_index]);
697
- }
698
-
699
- // Set the zero representation seperator.
700
- da[da_index] = typeid(DType) == typeid(RubyObject) ? INT2FIX(0) : 0;
701
-
702
- /*
703
- * Calculate the offset between start of the A arrays and the non-diagonal
704
- * entries.
705
- */
706
- a_index_offset = dest->shape[0] + 1;
707
-
708
- // Re-base the A arrays.
709
- la = la + a_index_offset;
710
- ra = ra + a_index_offset;
711
- da = da + a_index_offset;
712
-
713
- // Initialize our A array indices.
714
- la_index = ra_index = da_index = 0;
715
-
716
- // Calculate the non-diagonal values.
717
- for (row_index = 0; row_index < dest->shape[0]; ++row_index) {
718
- /*
719
- * Each row.
720
- */
721
-
722
- printf("Row %d\n", row_index);
723
-
724
- // Get row bounds.
725
- la_row_max = YALE_IA( left)[row_index + 1] - a_index_offset;
726
- ra_row_max = YALE_IA(right)[row_index + 1] - a_index_offset;
727
-
728
- printf("Left : Row Start: %d - Row End %d\n", la_index + a_index_offset, la_row_max + a_index_offset);
729
- printf("Right : Row Start: %d - Row End %d\n", ra_index + a_index_offset, ra_row_max + a_index_offset);
730
-
731
- /*
732
- * Set this row's left bound (which is also the previous row's right
733
- * bound).
734
- */
735
- YALE_IA(dest)[row_index] = da_index + a_index_offset;
736
-
737
- printf("Left bound of row %d in destination: %d\n", (int)row_index, (int)YALE_IA(dest)[row_index]);
738
-
739
- // Iterate over non-diagonal entries in this row.
740
- while (la_index < la_row_max and ra_index < ra_row_max) {
741
- /*
742
- * Elements are present on both the left- and right-hand side.
743
- */
744
-
745
- printf("Marker 0\n");
746
-
747
- if (YALE_IJ(left)[la_index] == YALE_IJ(right)[ra_index]) {
748
- /*
749
- * Current left- and right-hand values are in the same row and
750
- * column.
751
- */
752
-
753
- printf("Calculating value for [%d, %d].\n", (int)row_index, (int)YALE_IJ(left)[la_index]);
754
-
755
- tmp_result = ew_op_switch<op, DType, DType>(la[la_index], ra[ra_index]);
756
-
757
- if (tmp_result != 0) {
758
- printf("Setting value for [%d, %d] at index %d in destination's A array.\n", (int)row_index, (int)YALE_IJ(left)[la_index], (int)(da_index + a_index_offset));
759
-
760
- da[da_index] = tmp_result;
761
- YALE_IJ(dest)[da_index] = YALE_IJ(left)[la_index];
762
-
763
- ++da_index;
764
-
765
- } else {
766
- printf("Result was 0. Skipping.\n");
767
- }
768
-
769
- ++la_index;
770
- ++ra_index;
771
-
772
- } else if (YALE_IJ(left)[la_index] < YALE_IJ(right)[ra_index]) {
773
- /*
774
- * The right-hand index is ahead of the left-hand index.
775
- */
776
-
777
- if (op != EW_MUL) {
778
- // If this is multiplion there is no point in doing the operation.
779
-
780
- tmp_result = ew_op_switch<op, DType, DType>(la[la_index], typeid(DType) == typeid(RubyObject) ? INT2FIX(0) : 0);
781
-
782
- printf("Setting value for [%d, %d].\n", (int)row_index, (int)YALE_IJ(left)[la_index]);
783
-
784
- if (tmp_result != 0) {
785
- da[da_index] = tmp_result;
786
- YALE_IJ(dest)[da_index] = YALE_IJ(left)[la_index];
787
-
788
- ++da_index;
789
- }
790
- }
791
-
792
- ++la_index;
793
-
794
- } else {
795
- /*
796
- * The left-hand index is ahead of the right-hand index.
797
- */
798
-
799
- if (op != EW_MUL) {
800
- // If this is multiplion there is no point in doing the operation.
801
-
802
- tmp_result = ew_op_switch<op, DType, DType>(typeid(DType) == typeid(RubyObject) ? INT2FIX(0) : 0, ra[ra_index]);
803
-
804
- printf("Setting value for [%d, %d].\n", (int)row_index, (int)YALE_IJ(right)[ra_index]);
805
-
806
- if (tmp_result != 0) {
807
- da[da_index] = tmp_result;
808
- YALE_IJ(dest)[da_index] = YALE_IJ(right)[ra_index];
809
-
810
- ++da_index;
811
- }
812
- }
813
-
814
- ++ra_index;
815
- }
816
- }
817
-
818
- if (op != EW_MUL) {
819
- /*
820
- * Process the remaining elements on the left- or right-hand side. One or
821
- * the other, or neither, of the following loops may execute, but not
822
- * both.
823
- *
824
- * If we are doing multiplication this is unnecessary as all remaining
825
- * operations will produce a zero value.
826
- */
827
-
828
- while (la_index < la_row_max) {
829
- /*
830
- * Process the remaining elements on the left-hand side.
831
- */
832
-
833
- printf("Marker 1\n");
834
-
835
- tmp_result = ew_op_switch<op, DType, DType>(la[la_index], typeid(DType) == typeid(RubyObject) ? INT2FIX(0) : 0);
836
-
837
- printf("Setting value for [%d, %d].\n", (int)row_index, (int)YALE_IJ(left)[la_index]);
838
-
839
- if (tmp_result != 0) {
840
- da[da_index] = tmp_result;
841
- YALE_IJ(dest)[da_index] = YALE_IJ(left)[la_index];
842
-
843
- ++da_index;
844
- }
845
-
846
- ++la_index;
847
- }
848
-
849
- while (ra_index < ra_row_max) {
850
- /*
851
- * Process the remaining elements on the right-hand side.
852
- */
853
-
854
- printf("Marker 2\n");
855
-
856
- tmp_result = ew_op_switch<op, DType, DType>(typeid(DType) == typeid(RubyObject) ? INT2FIX(0) : 0, ra[ra_index]);
857
-
858
- printf("Setting value for [%d, %d].\n", (int)row_index, (int)YALE_IJ(right)[ra_index]);
859
-
860
- if (tmp_result != 0) {
861
- da[da_index] = tmp_result;
862
- YALE_IJ(dest)[da_index] = YALE_IJ(right)[ra_index];
863
-
864
- ++da_index;
865
- }
866
-
867
- ++ra_index;
868
- }
869
- }
870
-
871
- // Advance the row indices.
872
- la_index = la_row_max;
873
- ra_index = ra_row_max;
874
-
875
- printf("End of row %d\n\n", row_index);
876
- }
877
-
878
- // Set the last row's right bound.
879
- YALE_IA(dest)[row_index] = da_index + a_index_offset;
880
-
881
- printf("Right bound of row %d in destination: %d\n", row_index - 1, da_index + a_index_offset);
882
-
883
- // Set the number of non-diagonal non-zero entries in the destination matrix.
884
- dest->ndnz = da_index;
885
-
886
- printf("Number of non-diagonal non-zero entries: %ld\n\n", (unsigned long)(dest->ndnz));
887
-
888
- // Set the capacity of the destination matrix.
889
- dest->capacity = dest->shape[0] + dest->ndnz + 1;
890
-
891
- // Resize the destination matrix.
892
- dest->a = realloc(dest->a, sizeof(DType) * dest->capacity);
893
- dest->ija = realloc(dest->ija, sizeof(IType) * dest->capacity);
894
-
895
- return dest;
896
- }
897
-
898
681
  /////////////
899
682
  // Utility //
900
683
  /////////////
@@ -923,6 +706,36 @@ int binary_search(YALE_STORAGE* s, IType left, IType right, IType key) {
923
706
  }
924
707
 
925
708
 
709
+ /*
710
+ * Resize yale storage vectors A and IJA, copying values.
711
+ */
712
+ static void vector_grow(YALE_STORAGE* s) {
713
+ size_t new_capacity = s->capacity * GROWTH_CONSTANT;
714
+ size_t max_capacity = max_size(s);
715
+
716
+ if (new_capacity > max_capacity) new_capacity = max_capacity;
717
+
718
+ void* new_ija = ALLOC_N(char, ITYPE_SIZES[s->itype] * new_capacity);
719
+ NM_CHECK_ALLOC(new_ija);
720
+
721
+ void* new_a = ALLOC_N(char, DTYPE_SIZES[s->dtype] * new_capacity);
722
+ NM_CHECK_ALLOC(new_a);
723
+
724
+ void* old_ija = s->ija;
725
+ void* old_a = s->a;
726
+
727
+ memcpy(new_ija, old_ija, s->capacity * ITYPE_SIZES[s->itype]);
728
+ memcpy(new_a, old_a, s->capacity * DTYPE_SIZES[s->dtype]);
729
+
730
+ s->capacity = new_capacity;
731
+
732
+ xfree(old_ija);
733
+ xfree(old_a);
734
+
735
+ s->ija = new_ija;
736
+ s->a = new_a;
737
+ }
738
+
926
739
 
927
740
  /*
928
741
  * Resize yale storage vectors A and IJA in preparation for an insertion.
@@ -979,14 +792,12 @@ static char vector_insert_resize(YALE_STORAGE* s, size_t current_size, size_t po
979
792
 
980
793
  s->capacity = new_capacity;
981
794
 
982
- free(s->ija);
983
- free(s->a);
795
+ xfree(s->ija);
796
+ xfree(s->a);
984
797
 
985
798
  s->ija = reinterpret_cast<void*>(new_ija);
986
799
  s->a = reinterpret_cast<void*>(new_a);
987
800
 
988
- fprintf(stderr, "resize\n");
989
-
990
801
  return 'i';
991
802
  }
992
803
 
@@ -1145,6 +956,8 @@ static YALE_STORAGE* copy_alloc_struct(const YALE_STORAGE* rhs, const dtype_t ne
1145
956
  YALE_STORAGE* lhs = ALLOC( YALE_STORAGE );
1146
957
  lhs->dim = rhs->dim;
1147
958
  lhs->shape = ALLOC_N( size_t, lhs->dim );
959
+ lhs->offset = ALLOC_N( size_t, lhs->dim );
960
+ memcpy(lhs->shape, rhs->shape, lhs->dim * sizeof(size_t));
1148
961
  memcpy(lhs->shape, rhs->shape, lhs->dim * sizeof(size_t));
1149
962
  lhs->itype = rhs->itype;
1150
963
  lhs->capacity = new_capacity;
@@ -1153,6 +966,7 @@ static YALE_STORAGE* copy_alloc_struct(const YALE_STORAGE* rhs, const dtype_t ne
1153
966
 
1154
967
  lhs->ija = ALLOC_N( IType, lhs->capacity );
1155
968
  lhs->a = ALLOC_N( char, DTYPE_SIZES[new_dtype] * lhs->capacity );
969
+ lhs->src = lhs;
1156
970
 
1157
971
  // Now copy the contents -- but only within the boundaries set by the size. Leave
1158
972
  // the rest uninitialized.
@@ -1176,9 +990,7 @@ static STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resu
1176
990
  IType* ijl;
1177
991
  if (left->itype == result_itype) ijl = reinterpret_cast<IType*>(left->ija);
1178
992
  else { // make a temporary copy of the IJA vector for L with the correct itype
1179
- std::cerr << "changing left itype from " << static_cast<uint8_t>(left->itype) << " to " << static_cast<int8_t>(result_itype) << std::endl;
1180
993
  size_t length = nm_yale_storage_get_size(left);
1181
- std::cerr << "length = " << length << std::endl;
1182
994
  ijl = ALLOCA_N(IType, length);
1183
995
  copy_recast_itype_vector(reinterpret_cast<void*>(left->ija), left->itype, reinterpret_cast<void*>(ijl), result_itype, length);
1184
996
  }
@@ -1186,9 +998,7 @@ static STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resu
1186
998
  IType* ijr;
1187
999
  if (right->itype == result_itype) ijr = reinterpret_cast<IType*>(right->ija);
1188
1000
  else { // make a temporary copy of the IJA vector for R with the correct itype
1189
- std::cerr << "changing right itype from " << static_cast<uint8_t>(right->itype) << " to " << static_cast<int8_t>(result_itype) << std::endl;
1190
1001
  size_t length = nm_yale_storage_get_size(right);
1191
- std::cerr << "length = " << length << std::endl;
1192
1002
  ijr = ALLOCA_N(IType, length);
1193
1003
  copy_recast_itype_vector(reinterpret_cast<void*>(right->ija), right->itype, reinterpret_cast<void*>(ijr), result_itype, length);
1194
1004
  }
@@ -1200,7 +1010,7 @@ static STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resu
1200
1010
 
1201
1011
  // Create result storage.
1202
1012
  YALE_STORAGE* result = nm_yale_storage_create(left->dtype, resulting_shape, 2, result_ndnz, result_itype);
1203
- init<DType,IType>(result);
1013
+ init<DType,IType>(result, NULL);
1204
1014
  IType* ija = reinterpret_cast<IType*>(result->ija);
1205
1015
 
1206
1016
  // Symbolic multiplication step (build the structure)
@@ -1221,28 +1031,395 @@ static STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resu
1221
1031
  }
1222
1032
 
1223
1033
 
1034
+ /*
1035
+ * Get the sum of offsets from the original matrix (for sliced iteration).
1036
+ */
1037
+ static std::array<size_t,2> get_offsets(YALE_STORAGE* x) {
1038
+ std::array<size_t, 2> offsets{ {0,0} };
1039
+ while (x != x->src) {
1040
+ offsets[0] += x->offset[0];
1041
+ offsets[1] += x->offset[1];
1042
+ x = reinterpret_cast<YALE_STORAGE*>(x->src);
1043
+ }
1044
+ return offsets;
1045
+ }
1046
+
1047
+
1048
+ static VALUE obj_at(YALE_STORAGE* s, size_t k) {
1049
+ if (s->dtype == nm::RUBYOBJ) return reinterpret_cast<VALUE*>(s->a)[k];
1050
+ else return rubyobj_from_cval(reinterpret_cast<void*>(reinterpret_cast<char*>(s->a) + k * DTYPE_SIZES[s->dtype]), s->dtype).rval;
1051
+ }
1052
+
1053
+
1054
+
1055
+ template <typename IType>
1056
+ class IJAManager {
1057
+ protected:
1058
+ bool needs_free;
1059
+
1060
+ public:
1061
+ IType* ija;
1062
+
1063
+ IJAManager(YALE_STORAGE* s, itype_t temp_itype) : needs_free(false), ija(reinterpret_cast<IType*>(s->ija)) {
1064
+ if (s->itype != temp_itype) {
1065
+ size_t len = nm_yale_storage_get_size(s);
1066
+ needs_free = true;
1067
+ ija = ALLOC_N(IType, len);
1068
+ copy_recast_itype_vector(s->ija, s->itype, reinterpret_cast<void*>(ija), temp_itype, len);
1069
+ }
1070
+ }
1071
+
1072
+ ~IJAManager() {
1073
+ if (needs_free) xfree(ija);
1074
+ }
1075
+ };
1076
+
1077
+
1078
+ template <typename IType>
1079
+ class RowIterator {
1080
+ protected:
1081
+ YALE_STORAGE* s;
1082
+ IType* ija;
1083
+ void* a;
1084
+ IType i, k, k_end;
1085
+ size_t j_offset, j_shape;
1086
+ bool diag, End;
1087
+ VALUE init;
1088
+ public:
1089
+ RowIterator(YALE_STORAGE* s_, IType* ija_, IType i_, size_t j_shape_, size_t j_offset_ = 0)
1090
+ : s(s_),
1091
+ ija(ija_),
1092
+ a(s->a),
1093
+ i(i_),
1094
+ k(ija[i]),
1095
+ k_end(ija[i+1]),
1096
+ j_offset(j_offset_),
1097
+ j_shape(j_shape_),
1098
+ diag(row_has_no_nd() || diag_is_first()),
1099
+ End(false),
1100
+ init(default_value(s))
1101
+ { }
1102
+
1103
+ RowIterator(YALE_STORAGE* s_, IType i_, size_t j_shape_, size_t j_offset_ = 0)
1104
+ : s(s_),
1105
+ ija(reinterpret_cast<IType*>(s->ija)),
1106
+ a(s->a),
1107
+ i(i_),
1108
+ k(ija[i]),
1109
+ k_end(ija[i+1]),
1110
+ j_offset(j_offset_),
1111
+ j_shape(j_shape_),
1112
+ diag(row_has_no_nd() || diag_is_first()),
1113
+ End(false),
1114
+ init(default_value(s))
1115
+ { }
1116
+
1117
+ RowIterator(const RowIterator& rhs) : s(rhs.s), ija(rhs.ija), a(s->a), i(rhs.i), k(rhs.k), k_end(rhs.k_end), j_offset(rhs.j_offset), j_shape(rhs.j_shape), diag(rhs.diag), End(rhs.End), init(rhs.init) { }
1118
+
1119
+ VALUE obj() const {
1120
+ return diag ? obj_at(s, i) : obj_at(s, k);
1121
+ }
1122
+
1123
+ template <typename T>
1124
+ T cobj() const {
1125
+ if (typeid(T) == typeid(RubyObject)) return obj();
1126
+ return diag ? reinterpret_cast<T*>(s->a)[i] : reinterpret_cast<T*>(s->a)[k];
1127
+ }
1128
+
1129
+ inline IType proper_j() const {
1130
+ //if (!diag && k >= s->capacity) {
1131
+ // std::cerr << "proper_j(): Warning: (nondiag) k exceeded capacity at row " << int(i) << ": k=" << int(k) << ", cap=" << s->capacity << std::endl;
1132
+ // throw;
1133
+ //}
1134
+ return diag ? i : ija[k];
1135
+ }
1136
+
1137
+ inline IType offset_j() const {
1138
+ return proper_j() - j_offset;
1139
+ }
1140
+
1141
+ /* Returns true if an additional value is inserted, false if it goes on the diagonal */
1142
+ bool insert(IType j, VALUE v) {
1143
+ if (j == i) { // insert regardless on diagonal
1144
+ reinterpret_cast<VALUE*>(a)[j] = v;
1145
+ return false;
1146
+
1147
+ } else {
1148
+ if (rb_funcall(v, rb_intern("!="), 1, init) == Qtrue) {
1149
+ if (k >= s->capacity) {
1150
+ vector_grow(s);
1151
+ ija = reinterpret_cast<IType*>(s->ija);
1152
+ a = s->a;
1153
+ }
1154
+ reinterpret_cast<VALUE*>(a)[k] = v;
1155
+ ija[k] = j;
1156
+ k++;
1157
+ return true;
1158
+ }
1159
+ return false;
1160
+ }
1161
+ }
1162
+
1163
+ void update_row_end() {
1164
+ ija[i+1] = k;
1165
+ k_end = k;
1166
+ }
1167
+
1168
+ /* Past the j_shape? */
1169
+ inline bool end() const {
1170
+ if (End) return true;
1171
+ //if (diag) return i - j_offset >= j_shape;
1172
+ //else return k >= s->capacity || ija[k] - j_offset >= j_shape;
1173
+ return (diag ? i : ija[k]) - j_offset >= j_shape;
1174
+ }
1175
+
1176
+ inline bool row_has_no_nd() const { return ija[i] == k_end; /* k_start == k_end */ }
1177
+ inline bool diag_is_first() const { return i < ija[ija[i]]; }
1178
+ inline bool diag_is_last() const { return i > ija[k_end-1]; } // only works if !row_has_no_nd()
1179
+ inline bool k_is_last_nd() const { return k == k_end-1; }
1180
+ inline bool k_is_last() const { return k_is_last_nd() && !diag_is_last(); }
1181
+ inline bool diag_is_ahead() const { return i > ija[k]; }
1182
+ inline bool row_has_diag() const { return i < s->shape[1]; }
1183
+ inline bool diag_is_next() const { // assumes we've already tested for diag, row_has_no_nd(), diag_is_first()
1184
+ if (i == ija[k]+1) return true; // definite next
1185
+ else if (k+1 < k_end && i >= ija[k+1]+1) return false; // at least one item before it
1186
+ else return true;
1187
+ }
1188
+
1189
+ RowIterator<IType>& operator++() {
1190
+ if (diag) { // we're at the diagonal
1191
+ if (row_has_no_nd() || diag_is_last()) End = true; // and there are no non-diagonals (or none still to visit)
1192
+ diag = false;
1193
+ } else if (!row_has_diag()) { // row has no diagonal entries
1194
+ if (row_has_no_nd() || k_is_last_nd()) End = true; // row is totally empty, or we're at last entry
1195
+ else k++; // still entries to visit
1196
+ // } else if (row_has_no_nd()) { // in this case we started at diag, so don't check it
1197
+ } else { // not at diag but it exists somewhere in the row, and row has at least one nd entry
1198
+ if (diag_is_ahead()) { // diag is ahead
1199
+ if (k_is_last_nd()) diag = true; // diag is next and last
1200
+ else if (diag_is_next()) { // diag is next and not last
1201
+ diag = true;
1202
+ k++;
1203
+ } else k++; // diag is not next
1204
+ } else { // diag is past
1205
+ if (k_is_last_nd()) End = true; // and we're at the end
1206
+ else k++; // and we're not at the end
1207
+ }
1208
+ }
1209
+
1210
+ //if (k >= s->capacity)
1211
+ // std::cerr << "operator++: Warning: k has exceeded capacity for row " << int(i) << "; k=" << int(k) << ", cap=" << s->capacity << std::endl;
1212
+
1213
+ return *this;
1214
+ }
1215
+
1216
+
1217
+ RowIterator<IType> operator++(int unused) {
1218
+ RowIterator<IType> x(*this);
1219
+ ++(*this);
1220
+ return x;
1221
+ }
1222
+ };
1223
+
1224
+
1225
+ template <typename IType>
1226
+ static VALUE map_stored(VALUE self) {
1227
+
1228
+ YALE_STORAGE* s = NM_STORAGE_YALE(self);
1229
+
1230
+ size_t* shape = ALLOC_N(size_t, 2);
1231
+ shape[0] = s->shape[0];
1232
+ shape[1] = s->shape[1];
1233
+
1234
+ std::array<size_t,2> s_offsets = get_offsets(s);
1235
+
1236
+ RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_yale_enumerator_length);
1237
+ VALUE init = rb_yield(default_value(s));
1238
+
1239
+ YALE_STORAGE* r = nm_yale_storage_create(nm::RUBYOBJ, shape, 2, s->capacity, NM_ITYPE(self));
1240
+ nm_yale_storage_init(r, &init);
1241
+
1242
+ for (IType ri = 0; ri < shape[0]; ++ri) {
1243
+ RowIterator<IType> sit(s, ri + s_offsets[0], shape[1], s_offsets[1]);
1244
+ RowIterator<IType> rit(r, ri, shape[1]);
1245
+
1246
+ while (!sit.end()) {
1247
+ VALUE rv = rb_yield(sit.obj());
1248
+ VALUE rj = sit.offset_j();
1249
+ rit.insert(rj, rv);
1250
+ ++sit;
1251
+ }
1252
+ // Update the row end information.
1253
+ rit.update_row_end();
1254
+ }
1255
+
1256
+ NMATRIX* m = nm_create(nm::YALE_STORE, reinterpret_cast<STORAGE*>(r));
1257
+ return Data_Wrap_Struct(CLASS_OF(self), nm_yale_storage_mark, nm_delete, m);
1258
+ }
1259
+
1260
+
1261
+ /*
1262
+ * eqeq function for slicing and different defaults.
1263
+ */
1264
+ template <typename LDType, typename RDType, typename IType>
1265
+ static bool eqeq_different_defaults(const YALE_STORAGE* s, const LDType& s_init, const YALE_STORAGE* t, const RDType& t_init) {
1266
+
1267
+ std::array<size_t,2> s_offsets = get_offsets(const_cast<YALE_STORAGE*>(s)),
1268
+ t_offsets = get_offsets(const_cast<YALE_STORAGE*>(t));
1269
+
1270
+ for (IType ri = 0; ri < s->shape[0]; ++ri) {
1271
+ RowIterator<IType> sit(const_cast<YALE_STORAGE*>(s), reinterpret_cast<IType*>(s->ija), ri + s_offsets[0], s->shape[1], s_offsets[1]);
1272
+ RowIterator<IType> tit(const_cast<YALE_STORAGE*>(t), reinterpret_cast<IType*>(t->ija), ri + t_offsets[0], s->shape[1], t_offsets[1]);
1273
+
1274
+ while (!sit.end() || !tit.end()) {
1275
+
1276
+ // Perform the computation. Use a default value if the matrix doesn't have some value stored.
1277
+ if (tit.end() || (!sit.end() && sit.offset_j() < tit.offset_j())) {
1278
+ if (sit.template cobj<LDType>() != t_init) return false;
1279
+ ++sit;
1280
+
1281
+ } else if (sit.end() || (!tit.end() && sit.offset_j() > tit.offset_j())) {
1282
+ if (s_init != tit.template cobj<RDType>()) return false;
1283
+ ++tit;
1284
+
1285
+ } else { // same index
1286
+ if (sit.template cobj<LDType>() != tit.template cobj<RDType>()) return false;
1287
+ ++sit;
1288
+ ++tit;
1289
+ }
1290
+ }
1291
+ }
1292
+ return true;
1293
+ }
1294
+
1295
+
1296
+ template <typename IType>
1297
+ static VALUE map_merged_stored(VALUE left, VALUE right, VALUE init, nm::itype_t itype) {
1298
+
1299
+ YALE_STORAGE *s = NM_STORAGE_YALE(left),
1300
+ *t = NM_STORAGE_YALE(right);
1301
+
1302
+ size_t* shape = ALLOC_N(size_t, 2);
1303
+ shape[0] = s->shape[0];
1304
+ shape[1] = s->shape[1];
1305
+
1306
+ std::array<size_t,2> s_offsets = get_offsets(s),
1307
+ t_offsets = get_offsets(t);
1308
+
1309
+ VALUE s_init = default_value(s),
1310
+ t_init = default_value(t);
1311
+
1312
+ RETURN_SIZED_ENUMERATOR(left, 0, 0, 0);
1313
+
1314
+ if (init == Qnil)
1315
+ init = rb_yield_values(2, s_init, t_init);
1316
+
1317
+ YALE_STORAGE* r = nm_yale_storage_create(nm::RUBYOBJ, shape, 2, NM_MAX(s->capacity, t->capacity), itype);
1318
+ nm_yale_storage_init(r, &init);
1319
+
1320
+ IJAManager<IType> sm(s, itype),
1321
+ tm(t, itype);
1322
+
1323
+ for (IType ri = 0; ri < shape[0]; ++ri) {
1324
+ RowIterator<IType> sit(s, sm.ija, ri + s_offsets[0], shape[1], s_offsets[1]);
1325
+ RowIterator<IType> tit(t, tm.ija, ri + t_offsets[0], shape[1], t_offsets[1]);
1326
+
1327
+ RowIterator<IType> rit(r, reinterpret_cast<IType*>(r->ija), ri, shape[1]);
1328
+ while (!rit.end() && (!sit.end() || !tit.end())) {
1329
+ VALUE rv;
1330
+ IType rj;
1331
+
1332
+ // Perform the computation. Use a default value if the matrix doesn't have some value stored.
1333
+ if (tit.end() || (!sit.end() && sit.offset_j() < tit.offset_j())) {
1334
+ rv = rb_yield_values(2, sit.obj(), t_init);
1335
+ rj = sit.offset_j();
1336
+ ++sit;
1337
+
1338
+ } else if (sit.end() || (!tit.end() && sit.offset_j() > tit.offset_j())) {
1339
+ rv = rb_yield_values(2, s_init, tit.obj());
1340
+ rj = tit.offset_j();
1341
+ ++tit;
1342
+
1343
+ } else { // same index
1344
+ rv = rb_yield_values(2, sit.obj(), tit.obj());
1345
+ rj = sit.offset_j();
1346
+ ++sit;
1347
+ ++tit;
1348
+ }
1349
+
1350
+ rit.insert(rj, rv); // handles increment (and testing for default, etc)
1351
+
1352
+ }
1353
+
1354
+ // Update the row end information.
1355
+ rit.update_row_end();
1356
+ }
1357
+
1358
+ NMATRIX* m = nm_create(nm::YALE_STORE, reinterpret_cast<STORAGE*>(r));
1359
+ return Data_Wrap_Struct(CLASS_OF(left), nm_yale_storage_mark, nm_delete, m);
1360
+ }
1361
+
1362
+
1224
1363
  } // end of namespace nm::yale_storage
1225
1364
 
1226
1365
 
1227
1366
  // Helper function used only for the RETURN_SIZED_ENUMERATOR macro. Returns the length of
1228
1367
  // the matrix's storage.
1229
- static VALUE nm_yale_enumerator_length(VALUE nmatrix) {
1368
+ static VALUE nm_yale_stored_enumerator_length(VALUE nmatrix) {
1230
1369
  long len = nm_yale_storage_get_size(NM_STORAGE_YALE(nmatrix));
1231
1370
  return LONG2NUM(len);
1232
1371
  }
1233
1372
 
1234
1373
 
1374
+
1235
1375
  template <typename DType, typename IType>
1236
- struct yale_each_stored_with_indices_helper {
1237
- static VALUE iterate(VALUE nm) {
1376
+ struct yale_iteration_helper {
1238
1377
 
1378
+ static VALUE iterate_with_indices(VALUE nm) {
1239
1379
  YALE_STORAGE* s = NM_STORAGE_YALE(nm);
1240
- DType* a = reinterpret_cast<DType*>(s->a);
1241
- IType* ija = reinterpret_cast<IType*>(s->ija);
1380
+ DType* a = reinterpret_cast<DType*>(s->a);
1381
+ IType* ija = reinterpret_cast<IType*>(s->ija);
1242
1382
 
1243
1383
  // If we don't have a block, return an enumerator.
1244
1384
  RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_enumerator_length);
1245
1385
 
1386
+ // Iterate in two dimensions.
1387
+ for (long i = 0; i < s->shape[0]; ++i) {
1388
+ VALUE ii = LONG2NUM(i);
1389
+
1390
+ IType k = ija[i], k_next = ija[i+1];
1391
+
1392
+ for (long j = 0; j < s->shape[1]; ++j) {
1393
+ VALUE v, jj = LONG2NUM(j);
1394
+
1395
+ // zero is stored in s->shape[0]
1396
+ if (i == j) {
1397
+ v = rubyobj_from_cval(&(a[i]), NM_DTYPE(nm)).rval;
1398
+ } else {
1399
+ // Walk through the row until we find the correct location.
1400
+ while (ija[k] < j && k < k_next) ++k;
1401
+ if (k < k_next && ija[k] == j) {
1402
+ v = rubyobj_from_cval(&(a[k]), NM_DTYPE(nm)).rval;
1403
+ ++k;
1404
+ } else v = rubyobj_from_cval(&(a[s->shape[0]]), NM_DTYPE(nm)).rval;
1405
+ }
1406
+ rb_yield_values(3, v, ii, jj);
1407
+ }
1408
+ }
1409
+
1410
+ return nm;
1411
+ }
1412
+
1413
+
1414
+ static VALUE iterate_stored_with_indices(VALUE nm) {
1415
+
1416
+ YALE_STORAGE* s = NM_STORAGE_YALE(nm);
1417
+ DType* a = reinterpret_cast<DType*>(s->a);
1418
+ IType* ija = reinterpret_cast<IType*>(s->ija);
1419
+
1420
+ // If we don't have a block, return an enumerator.
1421
+ RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_stored_enumerator_length);
1422
+
1246
1423
  // Iterate along diagonal
1247
1424
  for (size_t k = 0; k < s->shape[0]; ++k) {
1248
1425
  VALUE ii = LONG2NUM(k),
@@ -1263,7 +1440,7 @@ struct yale_each_stored_with_indices_helper {
1263
1440
  jj = LONG2NUM(j);
1264
1441
 
1265
1442
  VALUE v = rubyobj_from_cval(&(a[p]), NM_DTYPE(nm)).rval;
1266
- rb_yield_values(3, v, ii, jj );
1443
+ rb_yield_values(3, v, ii, jj);
1267
1444
  }
1268
1445
  }
1269
1446
 
@@ -1273,9 +1450,8 @@ struct yale_each_stored_with_indices_helper {
1273
1450
 
1274
1451
 
1275
1452
  template <typename IType>
1276
- struct yale_each_stored_with_indices_helper<RubyObject, IType> {
1277
- static VALUE iterate(VALUE nm) {
1278
-
1453
+ struct yale_iteration_helper<RubyObject, IType> {
1454
+ static VALUE iterate_with_indices(VALUE nm) {
1279
1455
  YALE_STORAGE* s = NM_STORAGE_YALE(nm);
1280
1456
  RubyObject* a = reinterpret_cast<RubyObject*>(s->a);
1281
1457
  IType* ija = reinterpret_cast<IType*>(s->ija);
@@ -1283,6 +1459,42 @@ struct yale_each_stored_with_indices_helper<RubyObject, IType> {
1283
1459
  // If we don't have a block, return an enumerator.
1284
1460
  RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_enumerator_length);
1285
1461
 
1462
+ // Iterate in two dimensions.
1463
+ for (long i = 0; i < s->shape[0]; ++i) {
1464
+ VALUE ii = LONG2NUM(i);
1465
+
1466
+ IType k = ija[i], k_next = ija[i+1];
1467
+
1468
+ for (long j = 0; j < s->shape[1]; ++j) {
1469
+ VALUE v, jj = LONG2NUM(j);
1470
+
1471
+ // zero is stored in s->shape[0]
1472
+ if (i == j) {
1473
+ v = a[i].rval;
1474
+ } else {
1475
+ // Walk through the row until we find the correct location.
1476
+ while (ija[k] < j && k < k_next) ++k;
1477
+ if (k < k_next && ija[k] == j) {
1478
+ v = a[k].rval;
1479
+ ++k;
1480
+ } else v = a[s->shape[0]].rval;
1481
+ }
1482
+ rb_yield_values(3, v, ii, jj);
1483
+ }
1484
+ }
1485
+
1486
+ return nm;
1487
+ }
1488
+
1489
+ static VALUE iterate_stored_with_indices(VALUE nm) {
1490
+
1491
+ YALE_STORAGE* s = NM_STORAGE_YALE(nm);
1492
+ RubyObject* a = reinterpret_cast<RubyObject*>(s->a);
1493
+ IType* ija = reinterpret_cast<IType*>(s->ija);
1494
+
1495
+ // If we don't have a block, return an enumerator.
1496
+ RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_stored_enumerator_length);
1497
+
1286
1498
  // Iterate along diagonal
1287
1499
  for (size_t k = 0; k < s->shape[0]; ++k) {
1288
1500
  VALUE ii = LONG2NUM(k),
@@ -1292,8 +1504,8 @@ struct yale_each_stored_with_indices_helper<RubyObject, IType> {
1292
1504
 
1293
1505
  // Iterate through non-diagonal elements, row by row
1294
1506
  for (long i = 0; i < s->shape[0]; ++i) {
1295
- long p = static_cast<long>( ija[i] ),
1296
- next_p = static_cast<long>( ija[i+1] );
1507
+ IType p = ija[i],
1508
+ next_p = ija[i+1];
1297
1509
 
1298
1510
  for (; p < next_p; ++p) {
1299
1511
  long j = static_cast<long>(ija[p]);
@@ -1315,7 +1527,12 @@ struct yale_each_stored_with_indices_helper<RubyObject, IType> {
1315
1527
  */
1316
1528
  template <typename DType, typename IType>
1317
1529
  static VALUE yale_each_stored_with_indices(VALUE nm) {
1318
- return yale_each_stored_with_indices_helper<DType, IType>::iterate(nm);
1530
+ return yale_iteration_helper<DType, IType>::iterate_stored_with_indices(nm);
1531
+ }
1532
+
1533
+ template <typename DType, typename IType>
1534
+ static VALUE yale_each_with_indices(VALUE nm) {
1535
+ return yale_iteration_helper<DType, IType>::iterate_with_indices(nm);
1319
1536
  }
1320
1537
 
1321
1538
 
@@ -1345,7 +1562,6 @@ void nm_init_yale_functions() {
1345
1562
  rb_define_method(cNMatrix_YaleFunctions, "yale_lu", (METHOD)nm_lu, 0);
1346
1563
 
1347
1564
  rb_define_method(cNMatrix_YaleFunctions, "yale_nd_row", (METHOD)nm_nd_row, -1);
1348
- rb_define_method(cNMatrix_YaleFunctions, "yale_vector_insert", (METHOD)nm_vector_insert, -1);
1349
1565
 
1350
1566
  rb_define_const(cNMatrix_YaleFunctions, "YALE_GROWTH_CONSTANT", rb_float_new(nm::yale_storage::GROWTH_CONSTANT));
1351
1567
  }
@@ -1356,7 +1572,18 @@ void nm_init_yale_functions() {
1356
1572
  /////////////////
1357
1573
 
1358
1574
 
1575
+ /* C interface for NMatrix#each_with_indices (Yale) */
1576
+ VALUE nm_yale_each_with_indices(VALUE nmatrix) {
1577
+ nm::dtype_t d = NM_DTYPE(nmatrix);
1578
+ nm::itype_t i = NM_ITYPE(nmatrix);
1579
+
1580
+ NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_each_with_indices, VALUE, VALUE)
1581
+
1582
+ return ttable[d][i](nmatrix);
1583
+ }
1584
+
1359
1585
 
1586
+ /* C interface for NMatrix#each_stored_with_indices (Yale) */
1360
1587
  VALUE nm_yale_each_stored_with_indices(VALUE nmatrix) {
1361
1588
  nm::dtype_t d = NM_DTYPE(nmatrix);
1362
1589
  nm::itype_t i = NM_ITYPE(nmatrix);
@@ -1367,6 +1594,7 @@ VALUE nm_yale_each_stored_with_indices(VALUE nmatrix) {
1367
1594
  }
1368
1595
 
1369
1596
 
1597
+
1370
1598
  /*
1371
1599
  * C accessor for inserting some value in a matrix (or replacing an existing cell).
1372
1600
  */
@@ -1422,10 +1650,9 @@ void* nm_yale_storage_ref(STORAGE* storage, SLICE* slice) {
1422
1650
  return ttable[casted_storage->dtype][casted_storage->itype](casted_storage, slice);
1423
1651
  }
1424
1652
 
1653
+
1425
1654
  /*
1426
1655
  * C accessor for determining whether two YALE_STORAGE objects have the same contents.
1427
- *
1428
- * FIXME: Is this for element-wise or whole-matrix equality?
1429
1656
  */
1430
1657
  bool nm_yale_storage_eqeq(const STORAGE* left, const STORAGE* right) {
1431
1658
  NAMED_LRI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::eqeq, bool, const YALE_STORAGE* left, const YALE_STORAGE* right);
@@ -1435,10 +1662,11 @@ bool nm_yale_storage_eqeq(const STORAGE* left, const STORAGE* right) {
1435
1662
  return ttable[casted_left->dtype][right->dtype][casted_left->itype](casted_left, (const YALE_STORAGE*)right);
1436
1663
  }
1437
1664
 
1665
+
1438
1666
  /*
1439
1667
  * Copy constructor for changing dtypes. (C accessor)
1440
1668
  */
1441
- STORAGE* nm_yale_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype) {
1669
+ STORAGE* nm_yale_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype, void* dummy) {
1442
1670
  NAMED_LRI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::cast_copy, YALE_STORAGE*, const YALE_STORAGE* rhs, nm::dtype_t new_dtype);
1443
1671
 
1444
1672
  const YALE_STORAGE* casted_rhs = reinterpret_cast<const YALE_STORAGE*>(rhs);
@@ -1446,6 +1674,7 @@ STORAGE* nm_yale_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype) {
1446
1674
  return (STORAGE*)ttable[new_dtype][casted_rhs->dtype][casted_rhs->itype](casted_rhs, new_dtype);
1447
1675
  }
1448
1676
 
1677
+
1449
1678
  /*
1450
1679
  * Returns size of Yale storage as a size_t (no matter what the itype is). (C accessor)
1451
1680
  */
@@ -1455,6 +1684,32 @@ size_t nm_yale_storage_get_size(const YALE_STORAGE* storage) {
1455
1684
  return ttable[storage->itype](storage);
1456
1685
  }
1457
1686
 
1687
+
1688
+ /*
1689
+ * Return a void pointer to the matrix's default value entry.
1690
+ */
1691
+ static void* default_value_ptr(const YALE_STORAGE* s) {
1692
+ return reinterpret_cast<void*>(reinterpret_cast<char*>(s->a) + (s->shape[0] * DTYPE_SIZES[s->dtype]));
1693
+ }
1694
+
1695
+
1696
+ /*
1697
+ * Return the matrix's default value as a Ruby VALUE.
1698
+ */
1699
+ static VALUE default_value(const YALE_STORAGE* s) {
1700
+ if (s->dtype == nm::RUBYOBJ) return *reinterpret_cast<VALUE*>(default_value_ptr(s));
1701
+ else return rubyobj_from_cval(default_value_ptr(s), s->dtype).rval;
1702
+ }
1703
+
1704
+
1705
+ /*
1706
+ * Check to see if a default value is some form of zero. Easy for non-Ruby object matrices, which should always be 0.
1707
+ */
1708
+ static bool default_value_is_numeric_zero(const YALE_STORAGE* s) {
1709
+ return rb_funcall(default_value(s), rb_intern("=="), 1, INT2FIX(0)) == Qtrue;
1710
+ }
1711
+
1712
+
1458
1713
  /*
1459
1714
  * C accessor for allocating a yale storage object for cast-copying. Copies the IJA vector, does not copy the A vector.
1460
1715
  */
@@ -1476,8 +1731,8 @@ STORAGE* nm_yale_storage_copy_transposed(const STORAGE* rhs_base) {
1476
1731
 
1477
1732
  size_t size = nm_yale_storage_get_size(rhs);
1478
1733
 
1479
- YALE_STORAGE* lhs = nm_yale_storage_create(rhs->dtype, shape, 2, size, nm::UINT8);
1480
- nm_yale_storage_init(lhs);
1734
+ YALE_STORAGE* lhs = nm_yale_storage_create(rhs->dtype, shape, 2, size, rhs->itype);
1735
+ nm_yale_storage_init(lhs, default_value_ptr(rhs));
1481
1736
 
1482
1737
  NAMED_LI_DTYPE_TEMPLATE_TABLE(transp, nm::math::transpose_yale, void, const size_t n, const size_t m, const void* ia_, const void* ja_, const void* a_, const bool diaga, void* ib_, void* jb_, void* b_, const bool move);
1483
1738
 
@@ -1498,6 +1753,11 @@ STORAGE* nm_yale_storage_matrix_multiply(const STORAGE_PAIR& casted_storage, siz
1498
1753
  YALE_STORAGE* left = reinterpret_cast<YALE_STORAGE*>(casted_storage.left);
1499
1754
  YALE_STORAGE* right = reinterpret_cast<YALE_STORAGE*>(casted_storage.right);
1500
1755
 
1756
+ if (!default_value_is_numeric_zero(left) || !default_value_is_numeric_zero(right)) {
1757
+ rb_raise(rb_eNotImpError, "matrix default value must be some form of zero (not false or nil) for multiplication");
1758
+ return NULL;
1759
+ }
1760
+
1501
1761
  // Determine the itype for the matrix that will be returned.
1502
1762
  nm::itype_t itype = nm_yale_storage_itype_by_shape(resulting_shape),
1503
1763
  max_itype = NM_MAX_ITYPE(left->itype, right->itype);
@@ -1506,70 +1766,6 @@ STORAGE* nm_yale_storage_matrix_multiply(const STORAGE_PAIR& casted_storage, siz
1506
1766
  return ttable[left->dtype][itype](casted_storage, resulting_shape, vector, itype);
1507
1767
  }
1508
1768
 
1509
- /*
1510
- * Documentation goes here.
1511
- */
1512
- STORAGE* nm_yale_storage_ew_op(nm::ewop_t op, const STORAGE* left, const STORAGE* right, VALUE scalar) {
1513
- OP_ITYPE_DTYPE_TEMPLATE_TABLE(nm::yale_storage::ew_op, YALE_STORAGE*, const YALE_STORAGE*, const YALE_STORAGE*, nm::dtype_t);
1514
-
1515
- YALE_STORAGE* new_l = NULL, * new_r = NULL;
1516
- YALE_STORAGE* result;
1517
-
1518
- const YALE_STORAGE* casted_l, * casted_r;
1519
-
1520
- nm::dtype_t new_dtype;
1521
-
1522
- if (left->dtype != right->dtype) {
1523
-
1524
- new_dtype = Upcast[left->dtype][right->dtype];
1525
-
1526
- if (left->dtype != new_dtype) {
1527
- new_l = reinterpret_cast<YALE_STORAGE*>(nm_yale_storage_cast_copy( left, new_dtype));
1528
- }
1529
-
1530
- if (right->dtype != new_dtype) {
1531
- new_r = reinterpret_cast<YALE_STORAGE*>(nm_yale_storage_cast_copy(right, new_dtype));
1532
- }
1533
-
1534
- if (static_cast<uint8_t>(op) < nm::NUM_NONCOMP_EWOPS) {
1535
- result = ttable[op][new_l->itype][new_dtype]( left->dtype == new_dtype ?
1536
- reinterpret_cast<const YALE_STORAGE*>( left) :
1537
- reinterpret_cast<const YALE_STORAGE*>(new_l),
1538
-
1539
- right->dtype == new_dtype ?
1540
- reinterpret_cast<const YALE_STORAGE*>(right) :
1541
- reinterpret_cast<const YALE_STORAGE*>(new_r),
1542
-
1543
- new_dtype);
1544
-
1545
- } else {
1546
- rb_raise(rb_eNotImpError, "Elementwise comparison is not yet implemented for the Yale storage class.");
1547
- }
1548
-
1549
- if (new_l != NULL) {
1550
- nm_yale_storage_delete(new_l);
1551
- }
1552
-
1553
- if (new_r != NULL) {
1554
- nm_yale_storage_delete(new_r);
1555
- }
1556
-
1557
- return result;
1558
-
1559
- } else {
1560
-
1561
- casted_l = reinterpret_cast<const YALE_STORAGE*>( left);
1562
- casted_r = reinterpret_cast<const YALE_STORAGE*>(right);
1563
-
1564
- if (static_cast<uint8_t>(op) < nm::NUM_NONCOMP_EWOPS) {
1565
-
1566
- return ttable[op][casted_l->itype][casted_l->dtype](casted_l, casted_r, casted_l->dtype);
1567
-
1568
- } else {
1569
- rb_raise(rb_eNotImpError, "Elementwise comparison is not yet implemented for the Yale storage class.");
1570
- }
1571
- }
1572
- }
1573
1769
 
1574
1770
  ///////////////
1575
1771
  // Lifecycle //
@@ -1620,10 +1816,11 @@ YALE_STORAGE* nm_yale_storage_create(nm::dtype_t dtype, size_t* shape, size_t di
1620
1816
  void nm_yale_storage_delete(STORAGE* s) {
1621
1817
  if (s) {
1622
1818
  YALE_STORAGE* storage = (YALE_STORAGE*)s;
1623
- free(storage->shape);
1624
- free(storage->ija);
1625
- free(storage->a);
1626
- free(storage);
1819
+ xfree(storage->shape);
1820
+ xfree(storage->offset);
1821
+ xfree(storage->ija);
1822
+ xfree(storage->a);
1823
+ xfree(storage);
1627
1824
  }
1628
1825
  }
1629
1826
 
@@ -1632,10 +1829,10 @@ void nm_yale_storage_delete(STORAGE* s) {
1632
1829
  *
1633
1830
  * Initializes the IJA vector of the YALE_STORAGE matrix.
1634
1831
  */
1635
- void nm_yale_storage_init(YALE_STORAGE* s) {
1636
- NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::init, void, YALE_STORAGE* s);
1832
+ void nm_yale_storage_init(YALE_STORAGE* s, void* init_val) {
1833
+ NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::init, void, YALE_STORAGE*, void*);
1637
1834
 
1638
- ttable[s->dtype][s->itype](s);
1835
+ ttable[s->dtype][s->itype](s, init_val);
1639
1836
  }
1640
1837
 
1641
1838
 
@@ -1664,8 +1861,12 @@ static YALE_STORAGE* alloc(nm::dtype_t dtype, size_t* shape, size_t dim, nm::ity
1664
1861
  s->ndnz = 0;
1665
1862
  s->dtype = dtype;
1666
1863
  s->shape = shape;
1864
+ s->offset = ALLOC_N(size_t, dim);
1865
+ for (size_t i = 0; i < dim; ++i)
1866
+ s->offset[i] = 0;
1667
1867
  s->dim = dim;
1668
1868
  s->itype = nm_yale_storage_itype_by_shape(shape);
1869
+ s->src = reinterpret_cast<STORAGE*>(s);
1669
1870
 
1670
1871
  // See if a higher itype has been requested.
1671
1872
  if (static_cast<int8_t>(s->itype) < static_cast<int8_t>(min_itype))
@@ -1723,8 +1924,14 @@ static VALUE nm_a(int argc, VALUE* argv, VALUE self) {
1723
1924
  if (idx == Qnil) {
1724
1925
  VALUE* vals = ALLOCA_N(VALUE, size);
1725
1926
 
1726
- for (size_t i = 0; i < size; ++i) {
1727
- vals[i] = rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*i, s->dtype).rval;
1927
+ if (NM_DTYPE(self) == nm::RUBYOBJ) {
1928
+ for (size_t i = 0; i < size; ++i) {
1929
+ vals[i] = reinterpret_cast<VALUE*>(s->a)[i];
1930
+ }
1931
+ } else {
1932
+ for (size_t i = 0; i < size; ++i) {
1933
+ vals[i] = rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*i, s->dtype).rval;
1934
+ }
1728
1935
  }
1729
1936
  VALUE ary = rb_ary_new4(size, vals);
1730
1937
 
@@ -1757,9 +1964,16 @@ static VALUE nm_d(int argc, VALUE* argv, VALUE self) {
1757
1964
  if (idx == Qnil) {
1758
1965
  VALUE* vals = ALLOCA_N(VALUE, s->shape[0]);
1759
1966
 
1760
- for (size_t i = 0; i < s->shape[0]; ++i) {
1761
- vals[i] = rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*i, s->dtype).rval;
1967
+ if (NM_DTYPE(self) == nm::RUBYOBJ) {
1968
+ for (size_t i = 0; i < s->shape[0]; ++i) {
1969
+ vals[i] = reinterpret_cast<VALUE*>(s->a)[i];
1970
+ }
1971
+ } else {
1972
+ for (size_t i = 0; i < s->shape[0]; ++i) {
1973
+ vals[i] = rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*i, s->dtype).rval;
1974
+ }
1762
1975
  }
1976
+
1763
1977
  return rb_ary_new4(s->shape[0], vals);
1764
1978
  } else {
1765
1979
  size_t index = FIX2INT(idx);
@@ -1782,8 +1996,14 @@ static VALUE nm_lu(VALUE self) {
1782
1996
 
1783
1997
  VALUE* vals = ALLOCA_N(VALUE, size - s->shape[0] - 1);
1784
1998
 
1785
- for (size_t i = 0; i < size - s->shape[0] - 1; ++i) {
1786
- vals[i] = rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*(s->shape[0] + 1 + i), s->dtype).rval;
1999
+ if (NM_DTYPE(self) == nm::RUBYOBJ) {
2000
+ for (size_t i = 0; i < size - s->shape[0] - 1; ++i) {
2001
+ vals[i] = reinterpret_cast<VALUE*>(s->a)[s->shape[0] + 1 + i];
2002
+ }
2003
+ } else {
2004
+ for (size_t i = 0; i < size - s->shape[0] - 1; ++i) {
2005
+ vals[i] = rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*(s->shape[0] + 1 + i), s->dtype).rval;
2006
+ }
1787
2007
  }
1788
2008
 
1789
2009
  VALUE ary = rb_ary_new4(size - s->shape[0] - 1, vals);
@@ -1882,20 +2102,18 @@ static VALUE nm_ija(int argc, VALUE* argv, VALUE self) {
1882
2102
  * yale_nd_row -> ...
1883
2103
  *
1884
2104
  * This function gets the non-diagonal contents of a Yale matrix row.
1885
- * The first argument should be the row index. The optional second argument may be :hash or :array, but defaults
1886
- * to :hash. If :array is given, it will only return the Hash keys (the column indices).
2105
+ * The first argument should be the row index. The optional second argument may be :hash or :keys, but defaults
2106
+ * to :hash. If :keys is given, it will only return the Hash keys (the column indices).
1887
2107
  *
1888
2108
  * This function is meant to accomplish its purpose as efficiently as possible. It does not check for appropriate
1889
2109
  * range.
1890
- *
1891
- * FIXME: :array doesn't make sense. This should be :keys or :values to indicate which array we want.
1892
2110
  */
1893
2111
  static VALUE nm_nd_row(int argc, VALUE* argv, VALUE self) {
1894
2112
  VALUE i_, as;
1895
2113
  rb_scan_args(argc, argv, "11", &i_, &as);
1896
2114
 
1897
- bool array = false;
1898
- if (as != Qnil && rb_to_id(as) != nm_rb_hash) array = true;
2115
+ bool keys = false;
2116
+ if (as != Qnil && rb_to_id(as) != nm_rb_hash) keys = true;
1899
2117
 
1900
2118
  size_t i = FIX2INT(i_);
1901
2119
 
@@ -1912,7 +2130,7 @@ static VALUE nm_nd_row(int argc, VALUE* argv, VALUE self) {
1912
2130
  //std::cerr << "diff = " << diff << "\tpos = " << pos << "\tnextpos = " << nextpos << std::endl;
1913
2131
 
1914
2132
  VALUE ret; // HERE
1915
- if (array) {
2133
+ if (keys) {
1916
2134
  ret = rb_ary_new3(diff);
1917
2135
 
1918
2136
  for (size_t idx = pos; idx < nextpos; ++idx) {
@@ -1933,7 +2151,7 @@ static VALUE nm_nd_row(int argc, VALUE* argv, VALUE self) {
1933
2151
 
1934
2152
  /*
1935
2153
  * call-seq:
1936
- * yale_vector_insert -> Fixnum
2154
+ * yale_vector_set(i, column_index_array, cell_contents_array, pos) -> Fixnum
1937
2155
  *
1938
2156
  * Insert at position pos an array of non-diagonal elements with column indices given. Note that the column indices and values
1939
2157
  * must be storage-contiguous -- that is, you can't insert them around existing elements in some row, only amid some
@@ -1949,18 +2167,18 @@ static VALUE nm_nd_row(int argc, VALUE* argv, VALUE self) {
1949
2167
  * lead to undefined behavior.
1950
2168
  *
1951
2169
  * Example:
1952
- * m.yale_vector_insert(3, [0,3,4], [1,1,1], 15)
2170
+ * m.yale_vector_set(3, [0,3,4], [1,1,1], 15)
1953
2171
  *
1954
2172
  * The example above inserts the values 1, 1, and 1 in columns 0, 3, and 4, assumed to be located at position 15 (which
1955
2173
  * corresponds to row 3).
1956
2174
  *
1957
2175
  * Example:
1958
- * next = m.yale_vector_insert(3, [0,3,4], [1,1,1])
2176
+ * next = m.yale_vector_set(3, [0,3,4], [1,1,1])
1959
2177
  *
1960
2178
  * This example determines that i=3 is at position 15 automatically. The value returned, next, is the position where the
1961
2179
  * next value(s) should be inserted.
1962
2180
  */
1963
- static VALUE nm_vector_insert(int argc, VALUE* argv, VALUE self) { //, VALUE i_, VALUE jv, VALUE vv, VALUE pos_) {
2181
+ VALUE nm_vector_set(int argc, VALUE* argv, VALUE self) { //, VALUE i_, VALUE jv, VALUE vv, VALUE pos_) {
1964
2182
 
1965
2183
  // i, jv, vv are mandatory; pos is optional; thus "31"
1966
2184
  VALUE i_, jv, vv, pos_;
@@ -2002,4 +2220,46 @@ static VALUE nm_vector_insert(int argc, VALUE* argv, VALUE self) { //, VALUE i_,
2002
2220
  }
2003
2221
 
2004
2222
 
2223
+
2224
+
2225
+ /*
2226
+ * call-seq:
2227
+ * __yale_default_value__ -> ...
2228
+ *
2229
+ * Get the default_value property from a yale matrix.
2230
+ */
2231
+ VALUE nm_yale_default_value(VALUE self) {
2232
+ return default_value(NM_STORAGE_YALE(self));
2233
+ }
2234
+
2235
+
2236
+ /*
2237
+ * call-seq:
2238
+ * __yale_map_merged_stored__(right) -> Enumerator
2239
+ *
2240
+ * A map operation on two Yale matrices which only iterates across the stored indices.
2241
+ */
2242
+ VALUE nm_yale_map_merged_stored(VALUE left, VALUE right, VALUE init) {
2243
+ YALE_STORAGE *s = NM_STORAGE_YALE(left),
2244
+ *t = NM_STORAGE_YALE(right);
2245
+
2246
+ ITYPE_TEMPLATE_TABLE(nm::yale_storage::map_merged_stored, VALUE, VALUE l, VALUE r, VALUE init, nm::itype_t)
2247
+
2248
+ nm::itype_t itype = NM_MAX_ITYPE(s->itype, t->itype);
2249
+ return ttable[itype](left, right, init, itype);
2250
+ }
2251
+
2252
+
2253
+ /*
2254
+ * call-seq:
2255
+ * __yale_map_stored__ -> Enumerator
2256
+ *
2257
+ * A map operation on two Yale matrices which only iterates across the stored indices.
2258
+ */
2259
+ VALUE nm_yale_map_stored(VALUE self) {
2260
+ ITYPE_TEMPLATE_TABLE(nm::yale_storage::map_stored, VALUE, VALUE)
2261
+
2262
+ return ttable[NM_ITYPE(self)](self);
2263
+ }
2264
+
2005
2265
  } // end of extern "C" block