RubyGems - nmatrix - Versions diffs - 0.0.5 → 0.0.6 - Mend

nmatrix 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

checksums.yaml +4 -4
data/History.txt +102 -10
data/README.rdoc +24 -32
data/Rakefile +1 -1
data/ext/nmatrix/data/complex.h +9 -0
data/ext/nmatrix/data/data.cpp +78 -4
data/ext/nmatrix/data/data.h +86 -54
data/ext/nmatrix/data/rational.h +2 -0
data/ext/nmatrix/data/ruby_object.h +38 -8
data/ext/nmatrix/extconf.rb +13 -7
data/ext/nmatrix/nmatrix.cpp +262 -139
data/ext/nmatrix/nmatrix.h +11 -4
data/ext/nmatrix/storage/common.cpp +20 -13
data/ext/nmatrix/storage/common.h +18 -12
data/ext/nmatrix/storage/dense.cpp +122 -192
data/ext/nmatrix/storage/dense.h +4 -2
data/ext/nmatrix/storage/list.cpp +467 -636
data/ext/nmatrix/storage/list.h +6 -3
data/ext/nmatrix/storage/storage.cpp +83 -46
data/ext/nmatrix/storage/storage.h +7 -7
data/ext/nmatrix/storage/yale.cpp +621 -361
data/ext/nmatrix/storage/yale.h +21 -9
data/ext/nmatrix/ttable_helper.rb +27 -31
data/ext/nmatrix/types.h +1 -1
data/ext/nmatrix/util/math.cpp +9 -10
data/ext/nmatrix/util/sl_list.cpp +1 -7
data/ext/nmatrix/util/sl_list.h +0 -118
data/lib/nmatrix/blas.rb +59 -18
data/lib/nmatrix/monkeys.rb +0 -52
data/lib/nmatrix/nmatrix.rb +136 -9
data/lib/nmatrix/nvector.rb +33 -0
data/lib/nmatrix/shortcuts.rb +95 -16
data/lib/nmatrix/version.rb +1 -1
data/lib/nmatrix/yale_functions.rb +25 -19
data/spec/blas_spec.rb +1 -19
data/spec/elementwise_spec.rb +132 -17
data/spec/lapack_spec.rb +0 -3
data/spec/nmatrix_list_spec.rb +18 -0
data/spec/nmatrix_spec.rb +44 -18
data/spec/nmatrix_yale_spec.rb +1 -3
data/spec/shortcuts_spec.rb +26 -36
data/spec/slice_spec.rb +2 -4
metadata +2 -2

data/ext/nmatrix/storage/list.h CHANGED Viewed

@@ -82,7 +82,7 @@ extern "C" {
   // Accessors //
   ///////////////
-  VALUE nm_list_each_stored_with_indices(VALUE nmatrix);
+  VALUE nm_list_each_with_indices(VALUE nmatrix, bool stored);
   void* nm_list_storage_ref(STORAGE* s, SLICE* slice);
   void* nm_list_storage_get(STORAGE* s, SLICE* slice);
   void* nm_list_storage_insert(STORAGE* s, SLICE* slice, void* val);
@@ -98,7 +98,6 @@ extern "C" {
   // Math //
   //////////
-  STORAGE* nm_list_storage_ew_op(nm::ewop_t op, const STORAGE* left, const STORAGE* right, VALUE scalar);
   STORAGE* nm_list_storage_matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector);
@@ -122,9 +121,13 @@ extern "C" {
   LIST_STORAGE* nm_list_storage_copy(const LIST_STORAGE* rhs);
   STORAGE*      nm_list_storage_copy_transposed(const STORAGE* rhs_base);
-  STORAGE*      nm_list_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype);
+  STORAGE*      nm_list_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype, void*);
   VALUE         nm_list_storage_to_hash(const LIST_STORAGE* s, const nm::dtype_t dtype);
+  // Exposed functions
+  VALUE nm_to_hash(VALUE self);
+  VALUE nm_list_map_merged_stored(VALUE left, VALUE right, VALUE init);
+  VALUE nm_list_default_value(VALUE self);
 } // end of extern "C" block
 #endif // LIST_H

data/ext/nmatrix/storage/storage.cpp CHANGED Viewed

@@ -161,7 +161,7 @@ DENSE_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype
 			for (RIType j = 0; j < rhs->shape[1]; ++j) { // Move to next dense position.
         // Fill in zeros (except for diagonal)
-        if (i == j) lhs_elements[pos] = rhs_a[i];
+        if (i == j) lhs_elements[pos] = static_cast<LDType>(rhs_a[i]);
 				else        lhs_elements[pos] = LCAST_ZERO;
 				++pos;
@@ -173,10 +173,10 @@ DENSE_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype
 			for (size_t j = 0; j < rhs->shape[1]; ++j) {
         if (i == j) {
-          lhs_elements[pos] = rhs_a[i];
+          lhs_elements[pos] = static_cast<LDType>(rhs_a[i]);
         } else if (j == jj) {
-          lhs_elements[pos] = rhs_a[ija]; // Copy from rhs.
+          lhs_elements[pos] = static_cast<LDType>(rhs_a[ija]); // Copy from rhs.
           // Get next.
           ++ija;
@@ -214,14 +214,14 @@ static void cast_copy_list_contents(LDType* lhs, const LIST* rhs, RDType* defaul
     if (!curr || (curr->key > (size_t)(last_key+1))) {
-      if (recursions == 0)  lhs[pos] = *default_val;
+      if (recursions == 0)  lhs[pos] = static_cast<LDType>(*default_val);
       else               		cast_copy_list_default<LDType,RDType>(lhs, default_val, pos, shape, dim, max_elements, recursions-1);
       ++last_key;
     } else {
-      if (recursions == 0)  lhs[pos] = *reinterpret_cast<RDType*>(curr->val);
+      if (recursions == 0)  lhs[pos] = static_cast<LDType>(*reinterpret_cast<RDType*>(curr->val));
       else                	cast_copy_list_contents<LDType,RDType>(lhs, (const LIST*)(curr->val),
                                                                                          default_val, pos, shape, dim, max_elements, recursions-1);
@@ -240,7 +240,7 @@ template <typename LDType,typename RDType>
 static void cast_copy_list_default(LDType* lhs, RDType* default_val, size_t& pos, const size_t* shape, size_t dim, size_t max_elements, size_t recursions) {
 	for (size_t i = 0; i < shape[dim - 1 - recursions]; ++i, ++pos) {
-    if (recursions == 0)    lhs[pos] = *default_val;
+    if (recursions == 0)    lhs[pos] = static_cast<LDType>(*default_val);
     else                  	cast_copy_list_default<LDType,RDType>(lhs, default_val, pos, shape, dim, max_elements, recursions-1);
   }
@@ -261,7 +261,7 @@ static bool cast_copy_contents_dense(LIST* lhs, const RDType* rhs, RDType* zero,
  * Creation of list storage from dense storage.
  */
 template <typename LDType, typename RDType>
-LIST_STORAGE* create_from_dense_storage(const DENSE_STORAGE* rhs, dtype_t l_dtype) {
+LIST_STORAGE* create_from_dense_storage(const DENSE_STORAGE* rhs, dtype_t l_dtype, void* init) {
   LDType* l_default_val = ALLOC_N(LDType, 1);
   RDType* r_default_val = ALLOCA_N(RDType, 1); // clean up when finished with this function
@@ -274,13 +274,16 @@ LIST_STORAGE* create_from_dense_storage(const DENSE_STORAGE* rhs, dtype_t l_dtyp
   memset(coords, 0, rhs->dim * sizeof(size_t));
   // set list default_val to 0
-  if (l_dtype == RUBYOBJ)  	*l_default_val = INT2FIX(0);
-  else    	                *l_default_val = 0;
+  if (init) *l_default_val = *reinterpret_cast<LDType*>(init);
+  else {
+    if (l_dtype == RUBYOBJ)  	*l_default_val = INT2FIX(0);
+    else    	                *l_default_val = 0;
+  }
   // need test default value for comparing to elements in dense matrix
-  if (rhs->dtype == l_dtype)  	  *r_default_val = *l_default_val;
-  else if (rhs->dtype == RUBYOBJ) *r_default_val = INT2FIX(0);
-  else  	                        *r_default_val = 0;
+  if (rhs->dtype == l_dtype || rhs->dtype != RUBYOBJ) *r_default_val = static_cast<RDType>(*l_default_val);
+  else                                                *r_default_val = rubyobj_from_cval(l_default_val, l_dtype);
   LIST_STORAGE* lhs = nm_list_storage_create(l_dtype, shape, rhs->dim, l_default_val);
@@ -320,7 +323,7 @@ LIST_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype)
   // copy default value from the zero location in the Yale matrix
   LDType* default_val = ALLOC_N(LDType, 1);
-  *default_val        = R_ZERO;
+  *default_val        = static_cast<LDType>(R_ZERO);
   LIST_STORAGE* lhs = nm_list_storage_create(l_dtype, shape, rhs->dim, default_val);
@@ -353,8 +356,8 @@ LIST_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype)
         // Is there a nonzero diagonal item between the previously added item and the current one?
         if (jj > i && add_diag) {
           // Allocate and copy insertion value
-          insert_val = ALLOC_N(LDType, 1);
-          *insert_val        = rhs_a[i];
+          insert_val  = ALLOC_N(LDType, 1);
+          *insert_val = static_cast<LDType>(rhs_a[i]);
           // insert the item in the list at the appropriate location
           if (last_added) 	last_added = list::insert_after(last_added, i, insert_val);
@@ -366,7 +369,7 @@ LIST_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype)
         // now allocate and add the current item
         insert_val  = ALLOC_N(LDType, 1);
-        *insert_val = rhs_a[ija];
+        *insert_val = static_cast<LDType>(rhs_a[ija]);
         if (last_added)    	last_added = list::insert_after(last_added, jj, insert_val);
         else              	last_added = list::insert(curr_row, false, jj, insert_val);
@@ -376,8 +379,8 @@ LIST_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype)
       if (add_diag) {
       	// still haven't added the diagonal.
-        insert_val = ALLOC_N(LDType, 1);
-        *insert_val        = rhs_a[i];
+        insert_val         = ALLOC_N(LDType, 1);
+        *insert_val        = static_cast<LDType>(rhs_a[i]);
         // insert the item in the list at the appropriate location
         if (last_added)    	last_added = list::insert_after(last_added, i, insert_val);
@@ -417,7 +420,7 @@ static bool cast_copy_contents_dense(LIST* lhs, const RDType* rhs, RDType* zero,
         // Create a copy of our value that we will insert in the list
         LDType* insert_value = ALLOC_N(LDType, 1);
-        *insert_value        = (LDType)(rhs[pos]);
+        *insert_value        = static_cast<LDType>(rhs[pos]);
         if (!lhs->first)    prev = list::insert(lhs, false, coords[dim-1-recursions], insert_value);
         else               	prev = list::insert_after(prev, coords[dim-1-recursions], insert_value);
@@ -454,16 +457,19 @@ namespace yale_storage { // FIXME: Move to yale.cpp
    * Creation of yale storage from dense storage.
    */
   template <typename LDType, typename RDType, typename LIType>
-  YALE_STORAGE* create_from_dense_storage(const DENSE_STORAGE* rhs, dtype_t l_dtype) {
+  YALE_STORAGE* create_from_dense_storage(const DENSE_STORAGE* rhs, dtype_t l_dtype, void* init) {
     if (rhs->dim != 2) rb_raise(nm_eStorageTypeError, "can only convert matrices of dim 2 to yale");
     LIType pos = 0;
     LIType ndnz = 0;
-    RDType R_ZERO; // need zero for easier comparisons
-    if (rhs->dtype == RUBYOBJ)  R_ZERO = INT2FIX(0);
-    else                        R_ZERO = 0;
+    // We need a zero value. This should nearly always be zero, but sometimes you might want false or nil.
+    LDType    L_INIT(0);
+    if (init) {
+      if (l_dtype == RUBYOBJ) L_INIT = *reinterpret_cast<VALUE*>(init);
+      else                    L_INIT = rubyobj_from_cval(init, rhs->dtype);
+    }
+    RDType R_INIT = static_cast<RDType>(L_INIT);
     RDType* rhs_elements = reinterpret_cast<RDType*>(rhs->elements);
@@ -471,7 +477,7 @@ namespace yale_storage { // FIXME: Move to yale.cpp
     for (size_t i = rhs->shape[0]; i-- > 0;) {
       for (size_t j = rhs->shape[1]; j-- > 0;) {
         pos = rhs->stride[0]*(i + rhs->offset[0]) + rhs->stride[1]*(j + rhs->offset[1]);
-        if (i != j && rhs_elements[pos] != R_ZERO)	++ndnz;
+        if (i != j && rhs_elements[pos] != R_INIT)	++ndnz;
         // move forward 1 position in dense matrix elements array
       }
@@ -494,7 +500,7 @@ namespace yale_storage { // FIXME: Move to yale.cpp
     LIType* lhs_ija   = reinterpret_cast<LIType*>(lhs->ija);
     // Set the zero position in the yale matrix
-    lhs_a[shape[0]] = R_ZERO;
+    lhs_a[shape[0]]   = L_INIT;
     // Start just after the zero position.
     LIType ija = shape[0]+1;
@@ -510,11 +516,10 @@ namespace yale_storage { // FIXME: Move to yale.cpp
         pos = rhs->stride[0]*(i + rhs->offset[0]) + rhs->stride[1]*(j + rhs->offset[1]); // calc position with offsets
         if (i == j) { // copy to diagonal
-          lhs_a[i]  = rhs_elements[pos];
-        } else if (rhs_elements[pos] != R_ZERO) { // copy nonzero to LU
+          lhs_a[i]     = static_cast<LDType>(rhs_elements[pos]);
+        } else if (rhs_elements[pos] != R_INIT) { // copy nonzero to LU
           lhs_ija[ija] = j; // write column index
-          lhs_a[ija] = rhs_elements[pos];
+          lhs_a[ija]   = static_cast<LDType>(rhs_elements[pos]);
           ++ija;
         }
@@ -534,9 +539,12 @@ namespace yale_storage { // FIXME: Move to yale.cpp
   YALE_STORAGE* create_from_list_storage(const LIST_STORAGE* rhs, nm::dtype_t l_dtype) {
     if (rhs->dim != 2) rb_raise(nm_eStorageTypeError, "can only convert matrices of dim 2 to yale");
-    if ((rhs->dtype == RUBYOBJ and (*reinterpret_cast<RubyObject*>(rhs->default_val)) == RubyObject(INT2FIX(0)))
-        || strncmp(reinterpret_cast<const char*>(rhs->default_val), "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", DTYPE_SIZES[rhs->dtype]))
-      rb_raise(nm_eStorageTypeError, "list matrix must have default value of 0 to convert to yale");
+    if (rhs->dtype == RUBYOBJ) {
+      VALUE init_val = *reinterpret_cast<VALUE*>(rhs->default_val);
+      if (rb_funcall(init_val, rb_intern("!="), 1, Qnil) == Qtrue && rb_funcall(init_val, rb_intern("!="), 1, Qfalse) == Qtrue && rb_funcall(init_val, rb_intern("!="), 1, INT2FIX(0)) == Qtrue)
+        rb_raise(nm_eStorageTypeError, "list matrix of Ruby objects must have default value equal to 0, nil, or false to convert to yale");
+    } else if (strncmp(reinterpret_cast<const char*>(rhs->default_val), "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", DTYPE_SIZES[rhs->dtype]))
+      rb_raise(nm_eStorageTypeError, "list matrix of non-Ruby objects must have default value of 0 to convert to yale");
     size_t ndnz = nm_list_storage_count_nd_elements(rhs);
@@ -552,7 +560,7 @@ namespace yale_storage { // FIXME: Move to yale.cpp
       rb_raise(nm_eStorageTypeError, "conversion failed; capacity of %ld requested, max allowable is %ld", (unsigned long)request_capacity, (unsigned long)(lhs->capacity));
     // Initialize the A and IJA arrays
-    init<LDType,LIType>(lhs);
+    init<LDType,LIType>(lhs, rhs->default_val);
     LIType* lhs_ija = reinterpret_cast<LIType*>(lhs->ija);
     LDType* lhs_a   = reinterpret_cast<LDType*>(lhs->a);
@@ -602,7 +610,6 @@ namespace yale_storage { // FIXME: Move to yale.cpp
 extern "C" {
   /*
    * The following functions represent stype casts -- conversions from one
    * stype to another. Each of these is the C accessor for a templated C++
@@ -610,47 +617,77 @@ extern "C" {
    */
-  STORAGE* nm_yale_storage_from_dense(const STORAGE* right, nm::dtype_t l_dtype) {
-    NAMED_LRI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::create_from_dense_storage, YALE_STORAGE*, const DENSE_STORAGE* rhs, nm::dtype_t l_dtype);
+  STORAGE* nm_yale_storage_from_dense(const STORAGE* right, nm::dtype_t l_dtype, void* init) {
+    NAMED_LRI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::create_from_dense_storage, YALE_STORAGE*, const DENSE_STORAGE* rhs, nm::dtype_t l_dtype, void*);
     nm::itype_t itype = nm_yale_storage_default_itype((const YALE_STORAGE*)right);
-    return (STORAGE*)ttable[l_dtype][right->dtype][itype]((const DENSE_STORAGE*)right, l_dtype);
+    if (!ttable[l_dtype][right->dtype][itype]) {
+      rb_raise(nm_eDataTypeError, "casting between these dtypes is undefined");
+      return NULL;
+    }
+    return (STORAGE*)ttable[l_dtype][right->dtype][itype]((const DENSE_STORAGE*)right, l_dtype, init);
   }
-  STORAGE* nm_yale_storage_from_list(const STORAGE* right, nm::dtype_t l_dtype) {
+  STORAGE* nm_yale_storage_from_list(const STORAGE* right, nm::dtype_t l_dtype, void* dummy) {
     NAMED_LRI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::create_from_list_storage, YALE_STORAGE*, const LIST_STORAGE* rhs, nm::dtype_t l_dtype);
     nm::itype_t itype = nm_yale_storage_default_itype((const YALE_STORAGE*)right);
+    if (!ttable[l_dtype][right->dtype][itype]) {
+      rb_raise(nm_eDataTypeError, "casting between these dtypes is undefined");
+      return NULL;
+    }
     return (STORAGE*)ttable[l_dtype][right->dtype][itype]((const LIST_STORAGE*)right, l_dtype);
   }
-  STORAGE* nm_dense_storage_from_list(const STORAGE* right, nm::dtype_t l_dtype) {
+  STORAGE* nm_dense_storage_from_list(const STORAGE* right, nm::dtype_t l_dtype, void* dummy) {
     NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::create_from_list_storage, DENSE_STORAGE*, const LIST_STORAGE* rhs, nm::dtype_t l_dtype);
+    if (!ttable[l_dtype][right->dtype]) {
+      rb_raise(nm_eDataTypeError, "casting between these dtypes is undefined");
+      return NULL;
+    }
     return (STORAGE*)ttable[l_dtype][right->dtype]((const LIST_STORAGE*)right, l_dtype);
   }
-  STORAGE* nm_dense_storage_from_yale(const STORAGE* right, nm::dtype_t l_dtype) {
+  STORAGE* nm_dense_storage_from_yale(const STORAGE* right, nm::dtype_t l_dtype, void* dummy) {
     NAMED_LRI_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::create_from_yale_storage, DENSE_STORAGE*, const YALE_STORAGE* rhs, nm::dtype_t l_dtype);
     const YALE_STORAGE* casted_right = reinterpret_cast<const YALE_STORAGE*>(right);
+    if (!ttable[l_dtype][right->dtype][casted_right->itype]) {
+      rb_raise(nm_eDataTypeError, "casting between these dtypes is undefined");
+      return NULL;
+    }
     return reinterpret_cast<STORAGE*>(ttable[l_dtype][right->dtype][casted_right->itype](casted_right, l_dtype));
   }
-  STORAGE* nm_list_storage_from_dense(const STORAGE* right, nm::dtype_t l_dtype) {
-    NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::list_storage::create_from_dense_storage, LIST_STORAGE*, const DENSE_STORAGE*, nm::dtype_t);
+  STORAGE* nm_list_storage_from_dense(const STORAGE* right, nm::dtype_t l_dtype, void* init) {
+    NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::list_storage::create_from_dense_storage, LIST_STORAGE*, const DENSE_STORAGE*, nm::dtype_t, void*);
+    if (!ttable[l_dtype][right->dtype]) {
+      rb_raise(nm_eDataTypeError, "casting between these dtypes is undefined");
+      return NULL;
+    }
-    return (STORAGE*)ttable[l_dtype][right->dtype]((DENSE_STORAGE*)right, l_dtype);
+    return (STORAGE*)ttable[l_dtype][right->dtype]((DENSE_STORAGE*)right, l_dtype, init);
   }
-  STORAGE* nm_list_storage_from_yale(const STORAGE* right, nm::dtype_t l_dtype) {
+  STORAGE* nm_list_storage_from_yale(const STORAGE* right, nm::dtype_t l_dtype, void* dummy) {
     NAMED_LRI_DTYPE_TEMPLATE_TABLE(ttable, nm::list_storage::create_from_yale_storage, LIST_STORAGE*, const YALE_STORAGE* rhs, nm::dtype_t l_dtype);
     const YALE_STORAGE* casted_right = reinterpret_cast<const YALE_STORAGE*>(right);
+    if (!ttable[l_dtype][right->dtype][casted_right->itype]) {
+      rb_raise(nm_eDataTypeError, "casting between these dtypes is undefined");
+      return NULL;
+    }
     return (STORAGE*)ttable[l_dtype][right->dtype][casted_right->itype](casted_right, l_dtype);
   }

data/ext/nmatrix/storage/storage.h CHANGED Viewed

@@ -34,7 +34,7 @@
  * Standard Includes
  */
-#include <stdlib.h>
+#include <cstdlib>
 /*
  * Project Includes
@@ -86,12 +86,12 @@ extern "C" {
   // Copying and Casting //
   /////////////////////////
-  STORAGE*	  nm_dense_storage_from_list(const STORAGE* right, nm::dtype_t l_dtype);
-  STORAGE*	  nm_dense_storage_from_yale(const STORAGE* right, nm::dtype_t l_dtype);
-  STORAGE*		nm_list_storage_from_dense(const STORAGE* right, nm::dtype_t l_dtype);
-  STORAGE*		nm_list_storage_from_yale(const STORAGE* right,  nm::dtype_t l_dtype);
-  STORAGE*		nm_yale_storage_from_list(const STORAGE* right,  nm::dtype_t l_dtype);
-  STORAGE*		nm_yale_storage_from_dense(const STORAGE* right, nm::dtype_t l_dtype);
+  STORAGE*	  nm_dense_storage_from_list(const STORAGE* right, nm::dtype_t l_dtype, void*);
+  STORAGE*	  nm_dense_storage_from_yale(const STORAGE* right, nm::dtype_t l_dtype, void*);
+  STORAGE*		nm_list_storage_from_dense(const STORAGE* right, nm::dtype_t l_dtype, void*);
+  STORAGE*		nm_list_storage_from_yale(const STORAGE* right,  nm::dtype_t l_dtype, void*);
+  STORAGE*		nm_yale_storage_from_list(const STORAGE* right,  nm::dtype_t l_dtype, void*);
+  STORAGE*		nm_yale_storage_from_dense(const STORAGE* right, nm::dtype_t l_dtype, void*);
 } // end of extern "C" block

data/ext/nmatrix/storage/yale.cpp CHANGED Viewed

@@ -43,6 +43,11 @@
 #include <algorithm>  // std::min
 #include <cstdio>     // std::fprintf
 #include <iostream>
+#include <array>
+#define RB_P(OBJ) \
+	rb_funcall(rb_stderr, rb_intern("print"), 1, rb_funcall(OBJ, rb_intern("object_id"), 0)); \
+	rb_funcall(rb_stderr, rb_intern("puts"), 1, rb_funcall(OBJ, rb_intern("inspect"), 0));
 /*
  * Project Includes
@@ -81,6 +86,9 @@ extern "C" {
   static YALE_STORAGE*  nm_copy_alloc_struct(const YALE_STORAGE* rhs, const nm::dtype_t new_dtype, const size_t new_capacity, const size_t new_size);
   static YALE_STORAGE*	alloc(nm::dtype_t dtype, size_t* shape, size_t dim, nm::itype_t min_itype);
+  static void* default_value_ptr(const YALE_STORAGE* s);
+  static VALUE default_value(const YALE_STORAGE* s);
   /* Ruby-accessible functions */
   static VALUE nm_size(VALUE self);
   static VALUE nm_a(int argc, VALUE* argv, VALUE self);
@@ -91,7 +99,6 @@ extern "C" {
   static VALUE nm_ija(int argc, VALUE* argv, VALUE self);
   static VALUE nm_nd_row(int argc, VALUE* argv, VALUE self);
-  static VALUE nm_vector_insert(int argc, VALUE* argv, VALUE self);
 } // end extern "C" block
@@ -107,6 +114,9 @@ static bool						ndrow_eqeq_ndrow(const YALE_STORAGE* l, const YALE_STORAGE* r,
 template <typename LDType, typename RDType, typename IType>
 static bool           eqeq(const YALE_STORAGE* left, const YALE_STORAGE* right);
+template <typename LDType, typename RDType, typename IType>
+static bool eqeq_different_defaults(const YALE_STORAGE* s, const LDType& s_init, const YALE_STORAGE* t, const RDType& t_init);
 template <typename IType>
 static YALE_STORAGE*	copy_alloc_struct(const YALE_STORAGE* rhs, const dtype_t new_dtype, const size_t new_capacity, const size_t new_size);
@@ -127,8 +137,6 @@ static char           vector_insert(YALE_STORAGE* s, size_t pos, size_t* j, void
 template <typename DType, typename IType>
 static char           vector_insert_resize(YALE_STORAGE* s, size_t current_size, size_t pos, size_t* j, size_t n, bool struct_only);
-template <typename nm::ewop_t op, typename IType, typename DType>
-YALE_STORAGE* ew_op(const YALE_STORAGE* left, const YALE_STORAGE* right, dtype_t dtype);
 /*
  * Functions
@@ -239,7 +247,7 @@ YALE_STORAGE* create_from_old_yale(dtype_t dtype, size_t* shape, void* r_ia, voi
  * Uses the left as a template for the creation of a new one.
  */
 template <typename DType, typename IType>
-YALE_STORAGE* create_merged(const YALE_STORAGE* left, const YALE_STORAGE* right) {
+YALE_STORAGE* create_merged__(const YALE_STORAGE* left, const YALE_STORAGE* right) {
   char ins_type;
   size_t size = get_size<IType>(left);
@@ -305,7 +313,7 @@ YALE_STORAGE* create_merged(const YALE_STORAGE* left, const YALE_STORAGE* right)
  * Called when most YALE_STORAGE objects are created.
  */
 template <typename DType, typename IType>
-void init(YALE_STORAGE* s) {
+void init(YALE_STORAGE* s, void* init_val) {
   IType IA_INIT = s->shape[0] + 1;
   IType* ija = reinterpret_cast<IType*>(s->ija);
@@ -314,7 +322,7 @@ void init(YALE_STORAGE* s) {
     ija[i] = IA_INIT; // set initial values for IJA
   }
-  clear_diagonal_and_zero<DType>(s);
+  clear_diagonal_and_zero<DType>(s, init_val);
 }
 size_t max_size(YALE_STORAGE* s) {
@@ -376,7 +384,7 @@ void* get(YALE_STORAGE* storage, SLICE* slice) {
     rb_raise(nm_eStorageTypeError, "conversion failed; capacity of %ld requested, max allowable is %ld", request_capacity, ns->capacity);
    // Initialize the A and IJA arrays
-  init<DType,IType>(ns);
+  init<DType,IType>(ns, default_value_ptr(storage));
   IType* dst_ija = reinterpret_cast<IType*>(ns->ija);
   DType* dst_a   = reinterpret_cast<DType*>(ns->a);
@@ -516,6 +524,13 @@ char set(YALE_STORAGE* storage, SLICE* slice, void* value) {
  */
 template <typename LDType, typename RDType, typename IType>
 static bool eqeq(const YALE_STORAGE* left, const YALE_STORAGE* right) {
+  LDType l_init = reinterpret_cast<LDType*>(left->a )[left->shape[0] ];
+  RDType r_init = reinterpret_cast<RDType*>(right->a)[right->shape[0]];
+  // If the defaults are different between the two matrices, or if slicing is involved, use this other function instead:
+  if (l_init != r_init || left->src != left || right->src != right)
+    return eqeq_different_defaults<LDType,RDType,IType>(left, l_init, right, r_init);
   LDType* la = reinterpret_cast<LDType*>(left->a);
   RDType* ra = reinterpret_cast<RDType*>(right->a);
@@ -555,6 +570,8 @@ static bool eqeq(const YALE_STORAGE* left, const YALE_STORAGE* right) {
   return true;
 }
 /*
  * Are two non-diagonal rows the same? We already know.
  */
@@ -573,6 +590,9 @@ static bool ndrow_eqeq_ndrow(const YALE_STORAGE* l, const YALE_STORAGE* r, IType
   IType ja = std::min(l_ja, r_ja);
+  LDType LZERO = la[l->shape[0]];
+  RDType RZERO = ra[r->shape[0]];
   while (!(l_no_more && r_no_more)) {
     if (l_ja == r_ja) {
@@ -599,7 +619,7 @@ static bool ndrow_eqeq_ndrow(const YALE_STORAGE* l, const YALE_STORAGE* r, IType
     } else if (l_no_more || ja < l_ja) {
-      if (ra[r_ija] != 0) return false;
+      if (ra[r_ija] != RZERO) return false;
       ++r_ija;
       if (r_ija < r_ija_next) {
@@ -613,7 +633,7 @@ static bool ndrow_eqeq_ndrow(const YALE_STORAGE* l, const YALE_STORAGE* r, IType
     } else if (r_no_more || ja < r_ja) {
-      if (la[l_ija] != 0) return false;
+      if (la[l_ija] != LZERO) return false;
       ++l_ija;
       if (l_ija < l_ija_next) {
@@ -658,243 +678,6 @@ static bool ndrow_is_empty(const YALE_STORAGE* s, IType ija, const IType ija_nex
 #define YALE_IJ(s) (reinterpret_cast<IType*>(s->ija) + s->shape[0] + 1)
 #define YALE_COUNT(yale) (yale->ndnz + yale->shape[0])
-template <typename nm::ewop_t op, typename IType, typename DType>
-YALE_STORAGE* ew_op(const YALE_STORAGE* left, const YALE_STORAGE* right, dtype_t dtype) {
-	size_t  init_capacity;
-	size_t* new_shape;
-	unsigned int	da_index,
-								la_index,
-								ra_index,
-								a_index_offset,
-								la_row_max,
-								ra_row_max,
-								row_index;
-	DType tmp_result;
-	DType * la = reinterpret_cast<DType*> (left->a),
-				* ra = reinterpret_cast<DType*>(right->a),
-				* da;
-	YALE_STORAGE* dest;
-	new_shape			= reinterpret_cast<size_t*>(ALLOC_N(size_t, 2));
-	new_shape[0]	= left->shape[0];
-	new_shape[1]	= left->shape[1];
-	init_capacity = std::min(left->ndnz + right->ndnz + new_shape[0], new_shape[0] * new_shape[1]);
-	dest	= nm_yale_storage_create(dtype, new_shape, 2, init_capacity, left->itype);
-	da		= reinterpret_cast<DType*>(dest->a);
-	// Calculate diagonal values.
-	for (da_index = 0; da_index < dest->shape[0]; ++da_index) {
-		da[da_index] = ew_op_switch<op, DType, DType>(la[da_index], ra[da_index]);
-	}
-	// Set the zero representation seperator.
-	da[da_index] = typeid(DType) == typeid(RubyObject) ? INT2FIX(0) : 0;
-	/*
-	 * Calculate the offset between start of the A arrays and the non-diagonal
-	 * entries.
-	 */
-	a_index_offset = dest->shape[0] + 1;
-	// Re-base the A arrays.
-	la = la + a_index_offset;
-	ra = ra + a_index_offset;
-	da = da + a_index_offset;
-	// Initialize our A array indices.
-	la_index = ra_index = da_index = 0;
-	// Calculate the non-diagonal values.
-	for (row_index = 0; row_index < dest->shape[0]; ++row_index) {
-		/*
-		 * Each row.
-		 */
-		printf("Row %d\n", row_index);
-		// Get row bounds.
-		la_row_max = YALE_IA( left)[row_index + 1] - a_index_offset;
-		ra_row_max = YALE_IA(right)[row_index + 1] - a_index_offset;
-		printf("Left  : Row Start: %d - Row End %d\n", la_index + a_index_offset, la_row_max + a_index_offset);
-		printf("Right : Row Start: %d - Row End %d\n", ra_index + a_index_offset, ra_row_max + a_index_offset);
-		/*
-		 * Set this row's left bound (which is also the previous row's right
-		 * bound).
-		 */
-		YALE_IA(dest)[row_index] = da_index + a_index_offset;
-		printf("Left bound of row %d in destination: %d\n", (int)row_index, (int)YALE_IA(dest)[row_index]);
-		// Iterate over non-diagonal entries in this row.
-		while (la_index < la_row_max and ra_index < ra_row_max) {
-			/*
-			 * Elements are present on both the left- and right-hand side.
-			 */
-			printf("Marker 0\n");
-			if (YALE_IJ(left)[la_index] == YALE_IJ(right)[ra_index]) {
-				/*
-				 * Current left- and right-hand values are in the same row and
-				 * column.
-				 */
-				printf("Calculating value for [%d, %d].\n", (int)row_index, (int)YALE_IJ(left)[la_index]);
-				tmp_result = ew_op_switch<op, DType, DType>(la[la_index], ra[ra_index]);
-				if (tmp_result != 0) {
-					printf("Setting value for [%d, %d] at index %d in destination's A array.\n", (int)row_index, (int)YALE_IJ(left)[la_index], (int)(da_index + a_index_offset));
-					da[da_index]						= tmp_result;
-					YALE_IJ(dest)[da_index] = YALE_IJ(left)[la_index];
-					++da_index;
-				} else {
-					printf("Result was 0.  Skipping.\n");
-				}
-				++la_index;
-				++ra_index;
-			} else if (YALE_IJ(left)[la_index] < YALE_IJ(right)[ra_index]) {
-				/*
-				 * The right-hand index is ahead of the left-hand index.
-				 */
-				if (op != EW_MUL) {
-					// If this is multiplion there is no point in doing the operation.
-					tmp_result = ew_op_switch<op, DType, DType>(la[la_index], typeid(DType) == typeid(RubyObject) ? INT2FIX(0) : 0);
-					printf("Setting value for [%d, %d].\n", (int)row_index, (int)YALE_IJ(left)[la_index]);
-					if (tmp_result != 0) {
-						da[da_index]						= tmp_result;
-						YALE_IJ(dest)[da_index] = YALE_IJ(left)[la_index];
-						++da_index;
-					}
-				}
-				++la_index;
-			} else {
-				/*
-				 * The left-hand index is ahead of the right-hand index.
-				 */
-				if (op != EW_MUL) {
-					// If this is multiplion there is no point in doing the operation.
-					tmp_result = ew_op_switch<op, DType, DType>(typeid(DType) == typeid(RubyObject) ? INT2FIX(0) : 0, ra[ra_index]);
-					printf("Setting value for [%d, %d].\n", (int)row_index, (int)YALE_IJ(right)[ra_index]);
-					if (tmp_result != 0) {
-						da[da_index]						= tmp_result;
-						YALE_IJ(dest)[da_index] = YALE_IJ(right)[ra_index];
-						++da_index;
-					}
-				}
-				++ra_index;
-			}
-		}
-		if (op != EW_MUL) {
-			/*
-			 * Process the remaining elements on the left- or right-hand side.  One or
-			 * the other, or neither, of the following loops may execute, but not
-			 * both.
-			 *
-			 * If we are doing multiplication this is unnecessary as all remaining
-			 * operations will produce a zero value.
-			 */
-			while (la_index < la_row_max) {
-				/*
-				 * Process the remaining elements on the left-hand side.
-				 */
-				printf("Marker 1\n");
-				tmp_result = ew_op_switch<op, DType, DType>(la[la_index], typeid(DType) == typeid(RubyObject) ? INT2FIX(0) : 0);
-				printf("Setting value for [%d, %d].\n", (int)row_index, (int)YALE_IJ(left)[la_index]);
-				if (tmp_result != 0) {
-					da[da_index]						= tmp_result;
-					YALE_IJ(dest)[da_index] = YALE_IJ(left)[la_index];
-					++da_index;
-				}
-				++la_index;
-			}
-			while (ra_index < ra_row_max) {
-				/*
-				 * Process the remaining elements on the right-hand side.
-				 */
-				printf("Marker 2\n");
-				tmp_result = ew_op_switch<op, DType, DType>(typeid(DType) == typeid(RubyObject) ? INT2FIX(0) : 0, ra[ra_index]);
-				printf("Setting value for [%d, %d].\n", (int)row_index, (int)YALE_IJ(right)[ra_index]);
-				if (tmp_result != 0) {
-					da[da_index]						= tmp_result;
-					YALE_IJ(dest)[da_index] = YALE_IJ(right)[ra_index];
-					++da_index;
-				}
-				++ra_index;
-			}
-		}
-		// Advance the row indices.
-		la_index = la_row_max;
-		ra_index = ra_row_max;
-		printf("End of row %d\n\n", row_index);
-	}
-	// Set the last row's right bound.
-	YALE_IA(dest)[row_index] = da_index + a_index_offset;
-	printf("Right bound of row %d in destination: %d\n", row_index - 1, da_index + a_index_offset);
-	// Set the number of non-diagonal non-zero entries in the destination matrix.
-	dest->ndnz = da_index;
-	printf("Number of non-diagonal non-zero entries: %ld\n\n", (unsigned long)(dest->ndnz));
-	// Set the capacity of the destination matrix.
-	dest->capacity = dest->shape[0] + dest->ndnz + 1;
-	// Resize the destination matrix.
-	dest->a		= realloc(dest->a,   sizeof(DType) * dest->capacity);
-	dest->ija = realloc(dest->ija, sizeof(IType) * dest->capacity);
-	return dest;
-}
 /////////////
 // Utility //
 /////////////
@@ -923,6 +706,36 @@ int binary_search(YALE_STORAGE* s, IType left, IType right, IType key) {
 }
+/*
+ * Resize yale storage vectors A and IJA, copying values.
+ */
+static void vector_grow(YALE_STORAGE* s) {
+  size_t new_capacity = s->capacity * GROWTH_CONSTANT;
+  size_t max_capacity = max_size(s);
+  if (new_capacity > max_capacity) new_capacity = max_capacity;
+  void* new_ija       = ALLOC_N(char, ITYPE_SIZES[s->itype] * new_capacity);
+  NM_CHECK_ALLOC(new_ija);
+  void* new_a         = ALLOC_N(char, DTYPE_SIZES[s->dtype] * new_capacity);
+  NM_CHECK_ALLOC(new_a);
+  void* old_ija       = s->ija;
+  void* old_a         = s->a;
+  memcpy(new_ija, old_ija, s->capacity * ITYPE_SIZES[s->itype]);
+  memcpy(new_a,   old_a,   s->capacity * DTYPE_SIZES[s->dtype]);
+  s->capacity         = new_capacity;
+  xfree(old_ija);
+  xfree(old_a);
+  s->ija              = new_ija;
+  s->a                = new_a;
+}
 /*
  * Resize yale storage vectors A and IJA in preparation for an insertion.
@@ -979,14 +792,12 @@ static char vector_insert_resize(YALE_STORAGE* s, size_t current_size, size_t po
   s->capacity = new_capacity;
-  free(s->ija);
-  free(s->a);
+  xfree(s->ija);
+  xfree(s->a);
   s->ija = reinterpret_cast<void*>(new_ija);
   s->a   = reinterpret_cast<void*>(new_a);
-  fprintf(stderr, "resize\n");
   return 'i';
 }
@@ -1145,6 +956,8 @@ static YALE_STORAGE* copy_alloc_struct(const YALE_STORAGE* rhs, const dtype_t ne
   YALE_STORAGE* lhs = ALLOC( YALE_STORAGE );
   lhs->dim          = rhs->dim;
   lhs->shape        = ALLOC_N( size_t, lhs->dim );
+  lhs->offset       = ALLOC_N( size_t, lhs->dim );
+  memcpy(lhs->shape, rhs->shape, lhs->dim * sizeof(size_t));
   memcpy(lhs->shape, rhs->shape, lhs->dim * sizeof(size_t));
   lhs->itype        = rhs->itype;
   lhs->capacity     = new_capacity;
@@ -1153,6 +966,7 @@ static YALE_STORAGE* copy_alloc_struct(const YALE_STORAGE* rhs, const dtype_t ne
   lhs->ija          = ALLOC_N( IType, lhs->capacity );
   lhs->a            = ALLOC_N( char, DTYPE_SIZES[new_dtype] * lhs->capacity );
+  lhs->src          = lhs;
   // Now copy the contents -- but only within the boundaries set by the size. Leave
   // the rest uninitialized.
@@ -1176,9 +990,7 @@ static STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resu
   IType* ijl;
   if (left->itype == result_itype) ijl = reinterpret_cast<IType*>(left->ija);
   else {  // make a temporary copy of the IJA vector for L with the correct itype
-    std::cerr << "changing left itype from " << static_cast<uint8_t>(left->itype) << " to " << static_cast<int8_t>(result_itype) << std::endl;
     size_t length = nm_yale_storage_get_size(left);
-    std::cerr << "length = " << length << std::endl;
     ijl = ALLOCA_N(IType, length);
     copy_recast_itype_vector(reinterpret_cast<void*>(left->ija), left->itype, reinterpret_cast<void*>(ijl), result_itype, length);
   }
@@ -1186,9 +998,7 @@ static STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resu
   IType* ijr;
   if (right->itype == result_itype) ijr = reinterpret_cast<IType*>(right->ija);
   else {  // make a temporary copy of the IJA vector for R with the correct itype
-    std::cerr << "changing right itype from " << static_cast<uint8_t>(right->itype) << " to " << static_cast<int8_t>(result_itype) << std::endl;
     size_t length = nm_yale_storage_get_size(right);
-    std::cerr << "length = " << length << std::endl;
     ijr = ALLOCA_N(IType, length);
     copy_recast_itype_vector(reinterpret_cast<void*>(right->ija), right->itype, reinterpret_cast<void*>(ijr), result_itype, length);
   }
@@ -1200,7 +1010,7 @@ static STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resu
   // Create result storage.
   YALE_STORAGE* result = nm_yale_storage_create(left->dtype, resulting_shape, 2, result_ndnz, result_itype);
-  init<DType,IType>(result);
+  init<DType,IType>(result, NULL);
   IType* ija = reinterpret_cast<IType*>(result->ija);
   // Symbolic multiplication step (build the structure)
@@ -1221,28 +1031,395 @@ static STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resu
 }
+/*
+ * Get the sum of offsets from the original matrix (for sliced iteration).
+ */
+static std::array<size_t,2> get_offsets(YALE_STORAGE* x) {
+  std::array<size_t, 2> offsets{ {0,0} };
+  while (x != x->src) {
+    offsets[0] += x->offset[0];
+    offsets[1] += x->offset[1];
+    x = reinterpret_cast<YALE_STORAGE*>(x->src);
+  }
+  return offsets;
+}
+static VALUE obj_at(YALE_STORAGE* s, size_t k) {
+  if (s->dtype == nm::RUBYOBJ)  return reinterpret_cast<VALUE*>(s->a)[k];
+  else  return rubyobj_from_cval(reinterpret_cast<void*>(reinterpret_cast<char*>(s->a) + k * DTYPE_SIZES[s->dtype]), s->dtype).rval;
+}
+template <typename IType>
+class IJAManager {
+protected:
+  bool needs_free;
+public:
+  IType* ija;
+  IJAManager(YALE_STORAGE* s, itype_t temp_itype) : needs_free(false), ija(reinterpret_cast<IType*>(s->ija)) {
+    if (s->itype != temp_itype) {
+      size_t len  = nm_yale_storage_get_size(s);
+      needs_free  = true;
+      ija         = ALLOC_N(IType, len);
+      copy_recast_itype_vector(s->ija, s->itype, reinterpret_cast<void*>(ija), temp_itype, len);
+    }
+  }
+  ~IJAManager() {
+    if (needs_free) xfree(ija);
+  }
+};
+template <typename IType>
+class RowIterator {
+protected:
+  YALE_STORAGE* s;
+  IType* ija;
+  void*  a;
+  IType i, k, k_end;
+  size_t j_offset, j_shape;
+  bool diag, End;
+  VALUE init;
+public:
+  RowIterator(YALE_STORAGE* s_, IType* ija_, IType i_, size_t j_shape_, size_t j_offset_ = 0)
+    : s(s_),
+      ija(ija_),
+      a(s->a),
+      i(i_),
+      k(ija[i]),
+      k_end(ija[i+1]),
+      j_offset(j_offset_),
+      j_shape(j_shape_),
+      diag(row_has_no_nd() || diag_is_first()),
+      End(false),
+      init(default_value(s))
+    { }
+  RowIterator(YALE_STORAGE* s_, IType i_, size_t j_shape_, size_t j_offset_ = 0)
+    : s(s_),
+      ija(reinterpret_cast<IType*>(s->ija)),
+      a(s->a),
+      i(i_),
+      k(ija[i]),
+      k_end(ija[i+1]),
+      j_offset(j_offset_),
+      j_shape(j_shape_),
+      diag(row_has_no_nd() || diag_is_first()),
+      End(false),
+      init(default_value(s))
+  { }
+  RowIterator(const RowIterator& rhs) : s(rhs.s), ija(rhs.ija), a(s->a), i(rhs.i), k(rhs.k), k_end(rhs.k_end), j_offset(rhs.j_offset), j_shape(rhs.j_shape), diag(rhs.diag), End(rhs.End), init(rhs.init) { }
+  VALUE obj() const {
+    return diag ? obj_at(s, i) : obj_at(s, k);
+  }
+  template <typename T>
+  T cobj() const {
+    if (typeid(T) == typeid(RubyObject)) return obj();
+    return diag ? reinterpret_cast<T*>(s->a)[i] : reinterpret_cast<T*>(s->a)[k];
+  }
+  inline IType proper_j() const {
+    //if (!diag && k >= s->capacity) {
+    //  std::cerr << "proper_j(): Warning: (nondiag) k exceeded capacity at row " << int(i) << ": k=" << int(k) << ", cap=" << s->capacity << std::endl;
+    //  throw;
+    //}
+    return diag ? i : ija[k];
+  }
+  inline IType offset_j() const {
+    return proper_j() - j_offset;
+  }
+  /* Returns true if an additional value is inserted, false if it goes on the diagonal */
+  bool insert(IType j, VALUE v) {
+    if (j == i) { // insert regardless on diagonal
+      reinterpret_cast<VALUE*>(a)[j] = v;
+      return false;
+    } else {
+      if (rb_funcall(v, rb_intern("!="), 1, init) == Qtrue) {
+        if (k >= s->capacity) {
+          vector_grow(s);
+          ija = reinterpret_cast<IType*>(s->ija);
+          a   = s->a;
+        }
+        reinterpret_cast<VALUE*>(a)[k] = v;
+        ija[k] = j;
+        k++;
+        return true;
+      }
+      return false;
+    }
+  }
+  void update_row_end() {
+    ija[i+1] = k;
+    k_end    = k;
+  }
+  /* Past the j_shape? */
+  inline bool end() const {
+    if (End)  return true;
+    //if (diag) return i - j_offset >= j_shape;
+    //else return k >= s->capacity || ija[k] - j_offset >= j_shape;
+    return (diag ? i : ija[k]) - j_offset >= j_shape;
+  }
+  inline bool row_has_no_nd() const { return ija[i] == k_end; /* k_start == k_end */  }
+  inline bool diag_is_first() const { return i < ija[ija[i]];  }
+  inline bool diag_is_last() const  { return i > ija[k_end-1]; } // only works if !row_has_no_nd()
+  inline bool k_is_last_nd() const  { return k == k_end-1;     }
+  inline bool k_is_last() const     { return k_is_last_nd() && !diag_is_last(); }
+  inline bool diag_is_ahead() const { return i > ija[k]; }
+  inline bool row_has_diag() const  { return i < s->shape[1];  }
+  inline bool diag_is_next() const  { // assumes we've already tested for diag, row_has_no_nd(), diag_is_first()
+    if (i == ija[k]+1) return true; // definite next
+    else if (k+1 < k_end && i >= ija[k+1]+1) return false; // at least one item before it
+    else return true;
+  }
+  RowIterator<IType>& operator++() {
+    if (diag) {                                             // we're at the diagonal
+      if (row_has_no_nd() || diag_is_last()) End = true;    //  and there are no non-diagonals (or none still to visit)
+      diag = false;
+    } else if (!row_has_diag()) {                           // row has no diagonal entries
+      if (row_has_no_nd() || k_is_last_nd()) End = true;    // row is totally empty, or we're at last entry
+      else k++;                                             // still entries to visit
+//    } else if (row_has_no_nd()) { // in this case we started at diag, so don't check it
+    } else { // not at diag but it exists somewhere in the row, and row has at least one nd entry
+      if (diag_is_ahead()) { // diag is ahead
+        if (k_is_last_nd()) diag = true; // diag is next and last
+        else if (diag_is_next()) {       // diag is next and not last
+          diag = true;
+          k++;
+        } else k++;                      // diag is not next
+      } else {                           // diag is past
+        if (k_is_last_nd()) End = true;  //   and we're at the end
+        else k++;                        //   and we're not at the end
+      }
+    }
+    //if (k >= s->capacity)
+    //  std::cerr << "operator++: Warning: k has exceeded capacity for row " << int(i) << "; k=" << int(k) << ", cap=" << s->capacity << std::endl;
+    return *this;
+  }
+  RowIterator<IType> operator++(int unused) {
+    RowIterator<IType> x(*this);
+    ++(*this);
+    return x;
+  }
+};
+template <typename IType>
+static VALUE map_stored(VALUE self) {
+  YALE_STORAGE* s = NM_STORAGE_YALE(self);
+  size_t* shape   = ALLOC_N(size_t, 2);
+  shape[0]        = s->shape[0];
+  shape[1]        = s->shape[1];
+  std::array<size_t,2>  s_offsets = get_offsets(s);
+  RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_yale_enumerator_length);
+  VALUE init      = rb_yield(default_value(s));
+  YALE_STORAGE* r = nm_yale_storage_create(nm::RUBYOBJ, shape, 2, s->capacity, NM_ITYPE(self));
+  nm_yale_storage_init(r, &init);
+  for (IType ri = 0; ri < shape[0]; ++ri) {
+    RowIterator<IType> sit(s, ri + s_offsets[0], shape[1], s_offsets[1]);
+    RowIterator<IType> rit(r, ri, shape[1]);
+    while (!sit.end()) {
+      VALUE rv = rb_yield(sit.obj());
+      VALUE rj = sit.offset_j();
+      rit.insert(rj, rv);
+      ++sit;
+    }
+    // Update the row end information.
+    rit.update_row_end();
+  }
+  NMATRIX* m = nm_create(nm::YALE_STORE, reinterpret_cast<STORAGE*>(r));
+  return Data_Wrap_Struct(CLASS_OF(self), nm_yale_storage_mark, nm_delete, m);
+}
+/*
+ * eqeq function for slicing and different defaults.
+ */
+template <typename LDType, typename RDType, typename IType>
+static bool eqeq_different_defaults(const YALE_STORAGE* s, const LDType& s_init, const YALE_STORAGE* t, const RDType& t_init) {
+  std::array<size_t,2>  s_offsets = get_offsets(const_cast<YALE_STORAGE*>(s)),
+                        t_offsets = get_offsets(const_cast<YALE_STORAGE*>(t));
+  for (IType ri = 0; ri < s->shape[0]; ++ri) {
+    RowIterator<IType> sit(const_cast<YALE_STORAGE*>(s), reinterpret_cast<IType*>(s->ija), ri + s_offsets[0], s->shape[1], s_offsets[1]);
+    RowIterator<IType> tit(const_cast<YALE_STORAGE*>(t), reinterpret_cast<IType*>(t->ija), ri + t_offsets[0], s->shape[1], t_offsets[1]);
+    while (!sit.end() || !tit.end()) {
+      // Perform the computation. Use a default value if the matrix doesn't have some value stored.
+      if (tit.end() || (!sit.end() && sit.offset_j() < tit.offset_j())) {
+        if (sit.template cobj<LDType>() != t_init) return false;
+        ++sit;
+      } else if (sit.end() || (!tit.end() && sit.offset_j() > tit.offset_j())) {
+        if (s_init != tit.template cobj<RDType>()) return false;
+        ++tit;
+      } else {  // same index
+        if (sit.template cobj<LDType>() != tit.template cobj<RDType>()) return false;
+        ++sit;
+        ++tit;
+      }
+    }
+  }
+  return true;
+}
+template <typename IType>
+static VALUE map_merged_stored(VALUE left, VALUE right, VALUE init, nm::itype_t itype) {
+  YALE_STORAGE *s = NM_STORAGE_YALE(left),
+               *t = NM_STORAGE_YALE(right);
+  size_t* shape   = ALLOC_N(size_t, 2);
+  shape[0]        = s->shape[0];
+  shape[1]        = s->shape[1];
+  std::array<size_t,2>  s_offsets = get_offsets(s),
+                        t_offsets = get_offsets(t);
+  VALUE s_init    = default_value(s),
+        t_init    = default_value(t);
+  RETURN_SIZED_ENUMERATOR(left, 0, 0, 0);
+  if (init == Qnil)
+    init          = rb_yield_values(2, s_init, t_init);
+  YALE_STORAGE* r = nm_yale_storage_create(nm::RUBYOBJ, shape, 2, NM_MAX(s->capacity, t->capacity), itype);
+  nm_yale_storage_init(r, &init);
+  IJAManager<IType> sm(s, itype),
+                    tm(t, itype);
+  for (IType ri = 0; ri < shape[0]; ++ri) {
+    RowIterator<IType> sit(s, sm.ija, ri + s_offsets[0], shape[1], s_offsets[1]);
+    RowIterator<IType> tit(t, tm.ija, ri + t_offsets[0], shape[1], t_offsets[1]);
+    RowIterator<IType> rit(r, reinterpret_cast<IType*>(r->ija), ri, shape[1]);
+    while (!rit.end() && (!sit.end() || !tit.end())) {
+      VALUE rv;
+      IType rj;
+      // Perform the computation. Use a default value if the matrix doesn't have some value stored.
+      if (tit.end() || (!sit.end() && sit.offset_j() < tit.offset_j())) {
+        rv = rb_yield_values(2, sit.obj(), t_init);
+        rj = sit.offset_j();
+        ++sit;
+      } else if (sit.end() || (!tit.end() && sit.offset_j() > tit.offset_j())) {
+        rv = rb_yield_values(2, s_init, tit.obj());
+        rj = tit.offset_j();
+        ++tit;
+      } else {  // same index
+        rv = rb_yield_values(2, sit.obj(), tit.obj());
+        rj = sit.offset_j();
+        ++sit;
+        ++tit;
+      }
+      rit.insert(rj, rv); // handles increment (and testing for default, etc)
+    }
+    // Update the row end information.
+    rit.update_row_end();
+  }
+  NMATRIX* m = nm_create(nm::YALE_STORE, reinterpret_cast<STORAGE*>(r));
+  return Data_Wrap_Struct(CLASS_OF(left), nm_yale_storage_mark, nm_delete, m);
+}
 } // end of namespace nm::yale_storage
 // Helper function used only for the RETURN_SIZED_ENUMERATOR macro. Returns the length of
 // the matrix's storage.
-static VALUE nm_yale_enumerator_length(VALUE nmatrix) {
+static VALUE nm_yale_stored_enumerator_length(VALUE nmatrix) {
   long len = nm_yale_storage_get_size(NM_STORAGE_YALE(nmatrix));
   return LONG2NUM(len);
 }
 template <typename DType, typename IType>
-struct yale_each_stored_with_indices_helper {
-  static VALUE iterate(VALUE nm) {
+struct yale_iteration_helper {
+  static VALUE iterate_with_indices(VALUE nm) {
     YALE_STORAGE* s = NM_STORAGE_YALE(nm);
-    DType* a    = reinterpret_cast<DType*>(s->a);
-    IType* ija  = reinterpret_cast<IType*>(s->ija);
+    DType* a        = reinterpret_cast<DType*>(s->a);
+    IType* ija      = reinterpret_cast<IType*>(s->ija);
     // If we don't have a block, return an enumerator.
     RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_enumerator_length);
+    // Iterate in two dimensions.
+    for (long i = 0; i < s->shape[0]; ++i) {
+      VALUE ii = LONG2NUM(i);
+      IType k = ija[i], k_next = ija[i+1];
+      for (long j = 0; j < s->shape[1]; ++j) {
+        VALUE v, jj = LONG2NUM(j);
+        // zero is stored in s->shape[0]
+        if (i == j) {
+          v = rubyobj_from_cval(&(a[i]), NM_DTYPE(nm)).rval;
+        } else {
+          // Walk through the row until we find the correct location.
+          while (ija[k] < j && k < k_next) ++k;
+          if (k < k_next && ija[k] == j) {
+            v = rubyobj_from_cval(&(a[k]), NM_DTYPE(nm)).rval;
+            ++k;
+          } else v = rubyobj_from_cval(&(a[s->shape[0]]), NM_DTYPE(nm)).rval;
+        }
+        rb_yield_values(3, v, ii, jj);
+      }
+    }
+    return nm;
+  }
+  static VALUE iterate_stored_with_indices(VALUE nm) {
+    YALE_STORAGE* s = NM_STORAGE_YALE(nm);
+    DType* a        = reinterpret_cast<DType*>(s->a);
+    IType* ija      = reinterpret_cast<IType*>(s->ija);
+    // If we don't have a block, return an enumerator.
+    RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_stored_enumerator_length);
     // Iterate along diagonal
     for (size_t k = 0; k < s->shape[0]; ++k) {
       VALUE ii = LONG2NUM(k),
@@ -1263,7 +1440,7 @@ struct yale_each_stored_with_indices_helper {
               jj = LONG2NUM(j);
         VALUE v = rubyobj_from_cval(&(a[p]), NM_DTYPE(nm)).rval;
-        rb_yield_values(3, v, ii, jj );
+        rb_yield_values(3, v, ii, jj);
       }
     }
@@ -1273,9 +1450,8 @@ struct yale_each_stored_with_indices_helper {
 template <typename IType>
-struct yale_each_stored_with_indices_helper<RubyObject, IType> {
-  static VALUE iterate(VALUE nm) {
+struct yale_iteration_helper<RubyObject, IType> {
+  static VALUE iterate_with_indices(VALUE nm) {
     YALE_STORAGE* s = NM_STORAGE_YALE(nm);
     RubyObject* a   = reinterpret_cast<RubyObject*>(s->a);
     IType* ija      = reinterpret_cast<IType*>(s->ija);
@@ -1283,6 +1459,42 @@ struct yale_each_stored_with_indices_helper<RubyObject, IType> {
     // If we don't have a block, return an enumerator.
     RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_enumerator_length);
+    // Iterate in two dimensions.
+    for (long i = 0; i < s->shape[0]; ++i) {
+      VALUE ii = LONG2NUM(i);
+      IType k = ija[i], k_next = ija[i+1];
+      for (long j = 0; j < s->shape[1]; ++j) {
+        VALUE v, jj = LONG2NUM(j);
+        // zero is stored in s->shape[0]
+        if (i == j) {
+          v = a[i].rval;
+        } else {
+          // Walk through the row until we find the correct location.
+          while (ija[k] < j && k < k_next) ++k;
+          if (k < k_next && ija[k] == j) {
+            v = a[k].rval;
+            ++k;
+          } else v = a[s->shape[0]].rval;
+        }
+        rb_yield_values(3, v, ii, jj);
+      }
+    }
+    return nm;
+  }
+  static VALUE iterate_stored_with_indices(VALUE nm) {
+    YALE_STORAGE* s = NM_STORAGE_YALE(nm);
+    RubyObject* a   = reinterpret_cast<RubyObject*>(s->a);
+    IType* ija      = reinterpret_cast<IType*>(s->ija);
+    // If we don't have a block, return an enumerator.
+    RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_stored_enumerator_length);
     // Iterate along diagonal
     for (size_t k = 0; k < s->shape[0]; ++k) {
       VALUE ii = LONG2NUM(k),
@@ -1292,8 +1504,8 @@ struct yale_each_stored_with_indices_helper<RubyObject, IType> {
     // Iterate through non-diagonal elements, row by row
     for (long i = 0; i < s->shape[0]; ++i) {
-      long p      = static_cast<long>( ija[i]   ),
-           next_p = static_cast<long>( ija[i+1] );
+      IType p      = ija[i],
+            next_p = ija[i+1];
       for (; p < next_p; ++p) {
         long j = static_cast<long>(ija[p]);
@@ -1315,7 +1527,12 @@ struct yale_each_stored_with_indices_helper<RubyObject, IType> {
  */
 template <typename DType, typename IType>
 static VALUE yale_each_stored_with_indices(VALUE nm) {
-  return yale_each_stored_with_indices_helper<DType, IType>::iterate(nm);
+  return yale_iteration_helper<DType, IType>::iterate_stored_with_indices(nm);
+}
+template <typename DType, typename IType>
+static VALUE yale_each_with_indices(VALUE nm) {
+  return yale_iteration_helper<DType, IType>::iterate_with_indices(nm);
 }
@@ -1345,7 +1562,6 @@ void nm_init_yale_functions() {
   rb_define_method(cNMatrix_YaleFunctions, "yale_lu", (METHOD)nm_lu, 0);
   rb_define_method(cNMatrix_YaleFunctions, "yale_nd_row", (METHOD)nm_nd_row, -1);
-  rb_define_method(cNMatrix_YaleFunctions, "yale_vector_insert", (METHOD)nm_vector_insert, -1);
   rb_define_const(cNMatrix_YaleFunctions, "YALE_GROWTH_CONSTANT", rb_float_new(nm::yale_storage::GROWTH_CONSTANT));
 }
@@ -1356,7 +1572,18 @@ void nm_init_yale_functions() {
 /////////////////
+/* C interface for NMatrix#each_with_indices (Yale) */
+VALUE nm_yale_each_with_indices(VALUE nmatrix) {
+  nm::dtype_t d = NM_DTYPE(nmatrix);
+  nm::itype_t i = NM_ITYPE(nmatrix);
+  NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_each_with_indices, VALUE, VALUE)
+  return ttable[d][i](nmatrix);
+}
+/* C interface for NMatrix#each_stored_with_indices (Yale) */
 VALUE nm_yale_each_stored_with_indices(VALUE nmatrix) {
   nm::dtype_t d = NM_DTYPE(nmatrix);
   nm::itype_t i = NM_ITYPE(nmatrix);
@@ -1367,6 +1594,7 @@ VALUE nm_yale_each_stored_with_indices(VALUE nmatrix) {
 }
 /*
  * C accessor for inserting some value in a matrix (or replacing an existing cell).
  */
@@ -1422,10 +1650,9 @@ void* nm_yale_storage_ref(STORAGE* storage, SLICE* slice) {
   return ttable[casted_storage->dtype][casted_storage->itype](casted_storage, slice);
 }
 /*
  * C accessor for determining whether two YALE_STORAGE objects have the same contents.
- *
- * FIXME: Is this for element-wise or whole-matrix equality?
  */
 bool nm_yale_storage_eqeq(const STORAGE* left, const STORAGE* right) {
   NAMED_LRI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::eqeq, bool, const YALE_STORAGE* left, const YALE_STORAGE* right);
@@ -1435,10 +1662,11 @@ bool nm_yale_storage_eqeq(const STORAGE* left, const STORAGE* right) {
   return ttable[casted_left->dtype][right->dtype][casted_left->itype](casted_left, (const YALE_STORAGE*)right);
 }
 /*
  * Copy constructor for changing dtypes. (C accessor)
  */
-STORAGE* nm_yale_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype) {
+STORAGE* nm_yale_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype, void* dummy) {
   NAMED_LRI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::cast_copy, YALE_STORAGE*, const YALE_STORAGE* rhs, nm::dtype_t new_dtype);
   const YALE_STORAGE* casted_rhs = reinterpret_cast<const YALE_STORAGE*>(rhs);
@@ -1446,6 +1674,7 @@ STORAGE* nm_yale_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype) {
   return (STORAGE*)ttable[new_dtype][casted_rhs->dtype][casted_rhs->itype](casted_rhs, new_dtype);
 }
 /*
  * Returns size of Yale storage as a size_t (no matter what the itype is). (C accessor)
  */
@@ -1455,6 +1684,32 @@ size_t nm_yale_storage_get_size(const YALE_STORAGE* storage) {
   return ttable[storage->itype](storage);
 }
+/*
+ * Return a void pointer to the matrix's default value entry.
+ */
+static void* default_value_ptr(const YALE_STORAGE* s) {
+  return reinterpret_cast<void*>(reinterpret_cast<char*>(s->a) + (s->shape[0] * DTYPE_SIZES[s->dtype]));
+}
+/*
+ * Return the matrix's default value as a Ruby VALUE.
+ */
+static VALUE default_value(const YALE_STORAGE* s) {
+  if (s->dtype == nm::RUBYOBJ) return *reinterpret_cast<VALUE*>(default_value_ptr(s));
+  else return rubyobj_from_cval(default_value_ptr(s), s->dtype).rval;
+}
+/*
+ * Check to see if a default value is some form of zero. Easy for non-Ruby object matrices, which should always be 0.
+ */
+static bool default_value_is_numeric_zero(const YALE_STORAGE* s) {
+  return rb_funcall(default_value(s), rb_intern("=="), 1, INT2FIX(0)) == Qtrue;
+}
 /*
  * C accessor for allocating a yale storage object for cast-copying. Copies the IJA vector, does not copy the A vector.
  */
@@ -1476,8 +1731,8 @@ STORAGE* nm_yale_storage_copy_transposed(const STORAGE* rhs_base) {
   size_t size   = nm_yale_storage_get_size(rhs);
-  YALE_STORAGE* lhs = nm_yale_storage_create(rhs->dtype, shape, 2, size, nm::UINT8);
-  nm_yale_storage_init(lhs);
+  YALE_STORAGE* lhs = nm_yale_storage_create(rhs->dtype, shape, 2, size, rhs->itype);
+  nm_yale_storage_init(lhs, default_value_ptr(rhs));
   NAMED_LI_DTYPE_TEMPLATE_TABLE(transp, nm::math::transpose_yale, void, const size_t n, const size_t m, const void* ia_, const void* ja_, const void* a_, const bool diaga, void* ib_, void* jb_, void* b_, const bool move);
@@ -1498,6 +1753,11 @@ STORAGE* nm_yale_storage_matrix_multiply(const STORAGE_PAIR& casted_storage, siz
   YALE_STORAGE* left = reinterpret_cast<YALE_STORAGE*>(casted_storage.left);
   YALE_STORAGE* right = reinterpret_cast<YALE_STORAGE*>(casted_storage.right);
+  if (!default_value_is_numeric_zero(left) || !default_value_is_numeric_zero(right)) {
+    rb_raise(rb_eNotImpError, "matrix default value must be some form of zero (not false or nil) for multiplication");
+    return NULL;
+  }
   // Determine the itype for the matrix that will be returned.
   nm::itype_t itype = nm_yale_storage_itype_by_shape(resulting_shape),
               max_itype = NM_MAX_ITYPE(left->itype, right->itype);
@@ -1506,70 +1766,6 @@ STORAGE* nm_yale_storage_matrix_multiply(const STORAGE_PAIR& casted_storage, siz
   return ttable[left->dtype][itype](casted_storage, resulting_shape, vector, itype);
 }
-/*
- * Documentation goes here.
- */
-STORAGE* nm_yale_storage_ew_op(nm::ewop_t op, const STORAGE* left, const STORAGE* right, VALUE scalar) {
-	OP_ITYPE_DTYPE_TEMPLATE_TABLE(nm::yale_storage::ew_op, YALE_STORAGE*, const YALE_STORAGE*, const YALE_STORAGE*, nm::dtype_t);
-	YALE_STORAGE* new_l = NULL, * new_r = NULL;
-	YALE_STORAGE* result;
-	const YALE_STORAGE* casted_l, * casted_r;
-	nm::dtype_t new_dtype;
-	if (left->dtype != right->dtype) {
-		new_dtype = Upcast[left->dtype][right->dtype];
-		if (left->dtype != new_dtype) {
-			new_l = reinterpret_cast<YALE_STORAGE*>(nm_yale_storage_cast_copy( left, new_dtype));
-		}
-		if (right->dtype != new_dtype) {
-			new_r = reinterpret_cast<YALE_STORAGE*>(nm_yale_storage_cast_copy(right, new_dtype));
-		}
-		if (static_cast<uint8_t>(op) < nm::NUM_NONCOMP_EWOPS) {
-			result = ttable[op][new_l->itype][new_dtype](	left->dtype  == new_dtype ?
-																											reinterpret_cast<const YALE_STORAGE*>( left) :
-																											reinterpret_cast<const YALE_STORAGE*>(new_l),
-																										right->dtype == new_dtype ?
-																											reinterpret_cast<const YALE_STORAGE*>(right) :
-																											reinterpret_cast<const YALE_STORAGE*>(new_r),
-																										new_dtype);
-		} else {
-			rb_raise(rb_eNotImpError, "Elementwise comparison is not yet implemented for the Yale storage class.");
-		}
-		if (new_l != NULL) {
-			nm_yale_storage_delete(new_l);
-		}
-		if (new_r != NULL) {
-			nm_yale_storage_delete(new_r);
-		}
-		return result;
-	} else {
-		casted_l = reinterpret_cast<const YALE_STORAGE*>( left);
-		casted_r = reinterpret_cast<const YALE_STORAGE*>(right);
-		if (static_cast<uint8_t>(op) < nm::NUM_NONCOMP_EWOPS) {
-			return ttable[op][casted_l->itype][casted_l->dtype](casted_l, casted_r, casted_l->dtype);
-		} else {
-			rb_raise(rb_eNotImpError, "Elementwise comparison is not yet implemented for the Yale storage class.");
-		}
-	}
-}
 ///////////////
 // Lifecycle //
@@ -1620,10 +1816,11 @@ YALE_STORAGE* nm_yale_storage_create(nm::dtype_t dtype, size_t* shape, size_t di
 void nm_yale_storage_delete(STORAGE* s) {
   if (s) {
     YALE_STORAGE* storage = (YALE_STORAGE*)s;
-    free(storage->shape);
-    free(storage->ija);
-    free(storage->a);
-    free(storage);
+    xfree(storage->shape);
+    xfree(storage->offset);
+    xfree(storage->ija);
+    xfree(storage->a);
+    xfree(storage);
   }
 }
@@ -1632,10 +1829,10 @@ void nm_yale_storage_delete(STORAGE* s) {
  *
  * Initializes the IJA vector of the YALE_STORAGE matrix.
  */
-void nm_yale_storage_init(YALE_STORAGE* s) {
-  NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::init, void, YALE_STORAGE* s);
+void nm_yale_storage_init(YALE_STORAGE* s, void* init_val) {
+  NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::init, void, YALE_STORAGE*, void*);
-  ttable[s->dtype][s->itype](s);
+  ttable[s->dtype][s->itype](s, init_val);
 }
@@ -1664,8 +1861,12 @@ static YALE_STORAGE* alloc(nm::dtype_t dtype, size_t* shape, size_t dim, nm::ity
   s->ndnz        = 0;
   s->dtype       = dtype;
   s->shape       = shape;
+  s->offset      = ALLOC_N(size_t, dim);
+  for (size_t i = 0; i < dim; ++i)
+    s->offset[i] = 0;
   s->dim         = dim;
   s->itype       = nm_yale_storage_itype_by_shape(shape);
+  s->src         = reinterpret_cast<STORAGE*>(s);
   // See if a higher itype has been requested.
   if (static_cast<int8_t>(s->itype) < static_cast<int8_t>(min_itype))
@@ -1723,8 +1924,14 @@ static VALUE nm_a(int argc, VALUE* argv, VALUE self) {
   if (idx == Qnil) {
     VALUE* vals = ALLOCA_N(VALUE, size);
-    for (size_t i = 0; i < size; ++i) {
-      vals[i] = rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*i, s->dtype).rval;
+    if (NM_DTYPE(self) == nm::RUBYOBJ) {
+      for (size_t i = 0; i < size; ++i) {
+        vals[i] = reinterpret_cast<VALUE*>(s->a)[i];
+      }
+    } else {
+      for (size_t i = 0; i < size; ++i) {
+        vals[i] = rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*i, s->dtype).rval;
+      }
     }
     VALUE ary = rb_ary_new4(size, vals);
@@ -1757,9 +1964,16 @@ static VALUE nm_d(int argc, VALUE* argv, VALUE self) {
   if (idx == Qnil) {
     VALUE* vals = ALLOCA_N(VALUE, s->shape[0]);
-    for (size_t i = 0; i < s->shape[0]; ++i) {
-      vals[i] = rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*i, s->dtype).rval;
+    if (NM_DTYPE(self) == nm::RUBYOBJ) {
+      for (size_t i = 0; i < s->shape[0]; ++i) {
+        vals[i] = reinterpret_cast<VALUE*>(s->a)[i];
+      }
+    } else {
+      for (size_t i = 0; i < s->shape[0]; ++i) {
+        vals[i] = rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*i, s->dtype).rval;
+      }
     }
     return rb_ary_new4(s->shape[0], vals);
   } else {
     size_t index = FIX2INT(idx);
@@ -1782,8 +1996,14 @@ static VALUE nm_lu(VALUE self) {
   VALUE* vals = ALLOCA_N(VALUE, size - s->shape[0] - 1);
-  for (size_t i = 0; i < size - s->shape[0] - 1; ++i) {
-    vals[i] = rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*(s->shape[0] + 1 + i), s->dtype).rval;
+  if (NM_DTYPE(self) == nm::RUBYOBJ) {
+    for (size_t i = 0; i < size - s->shape[0] - 1; ++i) {
+      vals[i] = reinterpret_cast<VALUE*>(s->a)[s->shape[0] + 1 + i];
+    }
+  } else {
+    for (size_t i = 0; i < size - s->shape[0] - 1; ++i) {
+      vals[i] = rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*(s->shape[0] + 1 + i), s->dtype).rval;
+    }
   }
   VALUE ary = rb_ary_new4(size - s->shape[0] - 1, vals);
@@ -1882,20 +2102,18 @@ static VALUE nm_ija(int argc, VALUE* argv, VALUE self) {
  *     yale_nd_row -> ...
  *
  * This function gets the non-diagonal contents of a Yale matrix row.
- * The first argument should be the row index. The optional second argument may be :hash or :array, but defaults
- * to :hash. If :array is given, it will only return the Hash keys (the column indices).
+ * The first argument should be the row index. The optional second argument may be :hash or :keys, but defaults
+ * to :hash. If :keys is given, it will only return the Hash keys (the column indices).
  *
  * This function is meant to accomplish its purpose as efficiently as possible. It does not check for appropriate
  * range.
- *
- * FIXME: :array doesn't make sense. This should be :keys or :values to indicate which array we want.
  */
 static VALUE nm_nd_row(int argc, VALUE* argv, VALUE self) {
   VALUE i_, as;
   rb_scan_args(argc, argv, "11", &i_, &as);
-  bool array = false;
-  if (as != Qnil && rb_to_id(as) != nm_rb_hash) array = true;
+  bool keys = false;
+  if (as != Qnil && rb_to_id(as) != nm_rb_hash) keys = true;
   size_t i = FIX2INT(i_);
@@ -1912,7 +2130,7 @@ static VALUE nm_nd_row(int argc, VALUE* argv, VALUE self) {
   //std::cerr << "diff = " << diff << "\tpos = " << pos << "\tnextpos = " << nextpos << std::endl;
   VALUE ret; // HERE
-  if (array) {
+  if (keys) {
     ret = rb_ary_new3(diff);
     for (size_t idx = pos; idx < nextpos; ++idx) {
@@ -1933,7 +2151,7 @@ static VALUE nm_nd_row(int argc, VALUE* argv, VALUE self) {
 /*
  * call-seq:
- *     yale_vector_insert -> Fixnum
+ *     yale_vector_set(i, column_index_array, cell_contents_array, pos) -> Fixnum
  *
  * Insert at position pos an array of non-diagonal elements with column indices given. Note that the column indices and values
  * must be storage-contiguous -- that is, you can't insert them around existing elements in some row, only amid some
@@ -1949,18 +2167,18 @@ static VALUE nm_nd_row(int argc, VALUE* argv, VALUE self) {
  * lead to undefined behavior.
  *
  * Example:
- *    m.yale_vector_insert(3, [0,3,4], [1,1,1], 15)
+ *    m.yale_vector_set(3, [0,3,4], [1,1,1], 15)
  *
  * The example above inserts the values 1, 1, and 1 in columns 0, 3, and 4, assumed to be located at position 15 (which
  * corresponds to row 3).
  *
  * Example:
- *    next = m.yale_vector_insert(3, [0,3,4], [1,1,1])
+ *    next = m.yale_vector_set(3, [0,3,4], [1,1,1])
  *
  * This example determines that i=3 is at position 15 automatically. The value returned, next, is the position where the
  * next value(s) should be inserted.
  */
-static VALUE nm_vector_insert(int argc, VALUE* argv, VALUE self) { //, VALUE i_, VALUE jv, VALUE vv, VALUE pos_) {
+VALUE nm_vector_set(int argc, VALUE* argv, VALUE self) { //, VALUE i_, VALUE jv, VALUE vv, VALUE pos_) {
   // i, jv, vv are mandatory; pos is optional; thus "31"
   VALUE i_, jv, vv, pos_;
@@ -2002,4 +2220,46 @@ static VALUE nm_vector_insert(int argc, VALUE* argv, VALUE self) { //, VALUE i_,
 }
+/*
+ * call-seq:
+ *     __yale_default_value__ -> ...
+ *
+ * Get the default_value property from a yale matrix.
+ */
+VALUE nm_yale_default_value(VALUE self) {
+  return default_value(NM_STORAGE_YALE(self));
+}
+/*
+ * call-seq:
+ *     __yale_map_merged_stored__(right) -> Enumerator
+ *
+ * A map operation on two Yale matrices which only iterates across the stored indices.
+ */
+VALUE nm_yale_map_merged_stored(VALUE left, VALUE right, VALUE init) {
+  YALE_STORAGE *s = NM_STORAGE_YALE(left),
+               *t = NM_STORAGE_YALE(right);
+  ITYPE_TEMPLATE_TABLE(nm::yale_storage::map_merged_stored, VALUE, VALUE l, VALUE r, VALUE init, nm::itype_t)
+  nm::itype_t itype = NM_MAX_ITYPE(s->itype, t->itype);
+  return ttable[itype](left, right, init, itype);
+}
+/*
+ * call-seq:
+ *     __yale_map_stored__ -> Enumerator
+ *
+ * A map operation on two Yale matrices which only iterates across the stored indices.
+ */
+VALUE nm_yale_map_stored(VALUE self) {
+  ITYPE_TEMPLATE_TABLE(nm::yale_storage::map_stored, VALUE, VALUE)
+  return ttable[NM_ITYPE(self)](self);
+}
 } // end of extern "C" block