RubyGems - nmatrix - Versions diffs - 0.0.4 → 0.0.5 - Mend

nmatrix 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

checksums.yaml +7 -0
data/History.txt +68 -2
data/Manifest.txt +1 -0
data/README.rdoc +8 -7
data/Rakefile +13 -2
data/ext/nmatrix/data/complex.h +19 -1
data/ext/nmatrix/data/data.h +8 -0
data/ext/nmatrix/data/ruby_object.h +1 -0
data/ext/nmatrix/extconf.rb +6 -4
data/ext/nmatrix/nmatrix.cpp +97 -35
data/ext/nmatrix/nmatrix.h +2 -0
data/ext/nmatrix/ruby_constants.cpp +11 -1
data/ext/nmatrix/ruby_constants.h +6 -1
data/ext/nmatrix/storage/dense.cpp +2 -2
data/ext/nmatrix/storage/yale.cpp +303 -49
data/ext/nmatrix/storage/yale.h +3 -0
data/ext/nmatrix/util/math.cpp +112 -0
data/ext/nmatrix/util/math.h +372 -72
data/lib/nmatrix/blas.rb +55 -9
data/lib/nmatrix/nmatrix.rb +315 -2
data/lib/nmatrix/nvector.rb +156 -95
data/lib/nmatrix/version.rb +1 -1
data/lib/nmatrix/yale_functions.rb +112 -0
data/spec/blas_spec.rb +11 -0
data/spec/elementwise_spec.rb +4 -1
data/spec/io_spec.rb +8 -0
data/spec/lapack_spec.rb +37 -15
data/spec/leakcheck.rb +16 -0
data/spec/math_spec.rb +6 -2
data/spec/nmatrix_spec.rb +209 -3
data/spec/nmatrix_yale_spec.rb +55 -0
data/spec/nvector_spec.rb +33 -14
data/spec/slice_spec.rb +26 -17
data/spec/spec_helper.rb +17 -0
metadata +60 -45
data/ext/nmatrix/new_extconf.rb +0 -55

data/ext/nmatrix/nmatrix.h CHANGED Viewed

@@ -323,6 +323,8 @@ NM_DEF_STRUCT_POST(NMATRIX);  // };
 #define NM_CHECK_ALLOC(x) if (!x) rb_raise(rb_eNoMemError, "insufficient memory");
+#define RB_FILE_EXISTS(fn)   (rb_funcall(rb_const_get(rb_cObject, rb_intern("File")), rb_intern("exists?"), 1, (fn)) == Qtrue)
 #define CheckNMatrixType(v)   if (TYPE(v) != T_DATA || (RDATA(v)->dfree != (RUBY_DATA_FUNC)nm_delete && RDATA(v)->dfree != (RUBY_DATA_FUNC)nm_delete_ref)) rb_raise(rb_eTypeError, "expected NMatrix on left-hand side of operation");
 #define NM_IsNMatrix(obj) \

data/ext/nmatrix/ruby_constants.cpp CHANGED Viewed

@@ -55,6 +55,9 @@ ID	nm_rb_real,
 		nm_rb_list,
 		nm_rb_yale,
+		nm_rb_row,
+		nm_rb_column,
 		nm_rb_add,
 		nm_rb_sub,
 		nm_rb_mul,
@@ -68,7 +71,9 @@ ID	nm_rb_real,
 		nm_rb_eql,
 		nm_rb_neql,
 		nm_rb_gte,
-		nm_rb_lte;
+		nm_rb_lte,
+		nm_rb_hash;
 VALUE cNMatrix,
       cNMatrix_IO,
@@ -122,4 +127,9 @@ void nm_init_ruby_constants(void) {
 	nm_rb_lower             = rb_intern("lower");
 	nm_rb_unit              = rb_intern("unit");
 	nm_rb_nonunit           = rb_intern("nonunit");
+	nm_rb_hash              = rb_intern("hash");
+	nm_rb_column            = rb_intern("column");
+	nm_rb_row               = rb_intern("row");
 }

data/ext/nmatrix/ruby_constants.h CHANGED Viewed

@@ -57,6 +57,9 @@ extern ID nm_rb_real,
 					nm_rb_dense,
 					nm_rb_list,
 					nm_rb_yale,
+          nm_rb_row,
+          nm_rb_column,
 					nm_rb_add,
 					nm_rb_sub,
@@ -71,7 +74,9 @@ extern ID nm_rb_real,
 					nm_rb_eql,
 					nm_rb_neql,
 					nm_rb_gte,
-					nm_rb_lte;
+					nm_rb_lte,
+					nm_rb_hash;
 extern VALUE	cNMatrix,
               cNMatrix_IO,

data/ext/nmatrix/storage/dense.cpp CHANGED Viewed

@@ -253,7 +253,7 @@ VALUE nm_dense_each_with_indices(VALUE nmatrix) {
   nm_dense_storage_delete(sliced_dummy);
-  return Qnil;
+  return nmatrix;
 }
@@ -299,7 +299,7 @@ VALUE nm_dense_each(VALUE nmatrix) {
   nm_dense_storage_delete(sliced_dummy);
-  return Qnil;
+  return nmatrix;
 }

data/ext/nmatrix/storage/yale.cpp CHANGED Viewed

@@ -68,6 +68,11 @@
 #define NM_MIN(a,b) (((a)<(b))?(a):(b))
 #endif
+#ifndef NM_MAX_ITYPE
+#define NM_MAX_ITYPE(a,b) ((static_cast<int8_t>(a) > static_cast<int8_t>(b)) ? static_cast<nm::itype_t>(a) : static_cast<nm::itype_t>(b))
+#define NM_MIN_ITYPE(a,b) ((static_cast<int8_t>(a) < static_cast<int8_t>(b)) ? static_cast<nm::itype_t>(a) : static_cast<nm::itype_t>(b))
+#endif
 /*
  * Forward Declarations
  */
@@ -78,12 +83,16 @@ extern "C" {
   /* Ruby-accessible functions */
   static VALUE nm_size(VALUE self);
-  static VALUE nm_a(VALUE self);
-  static VALUE nm_d(VALUE self);
+  static VALUE nm_a(int argc, VALUE* argv, VALUE self);
+  static VALUE nm_d(int argc, VALUE* argv, VALUE self);
   static VALUE nm_lu(VALUE self);
   static VALUE nm_ia(VALUE self);
   static VALUE nm_ja(VALUE self);
-  static VALUE nm_ija(VALUE self);
+  static VALUE nm_ija(int argc, VALUE* argv, VALUE self);
+  static VALUE nm_nd_row(int argc, VALUE* argv, VALUE self);
+  static VALUE nm_vector_insert(int argc, VALUE* argv, VALUE self);
 } // end extern "C" block
@@ -104,11 +113,16 @@ static YALE_STORAGE*	copy_alloc_struct(const YALE_STORAGE* rhs, const dtype_t ne
 template <typename IType>
 static void						increment_ia_after(YALE_STORAGE* s, IType ija_size, IType i, IType n);
+template <typename IType>
+static void           c_increment_ia_after(YALE_STORAGE* s, size_t ija_size, size_t i, size_t n) {
+  increment_ia_after<IType>(s, ija_size, i, n);
+}
 template <typename IType>
 static IType				  insert_search(YALE_STORAGE* s, IType left, IType right, IType key, bool* found);
 template <typename DType, typename IType>
-static char           vector_insert(YALE_STORAGE* s, size_t pos, size_t* j, DType* val, size_t n, bool struct_only);
+static char           vector_insert(YALE_STORAGE* s, size_t pos, size_t* j, void* val_, size_t n, bool struct_only);
 template <typename DType, typename IType>
 static char           vector_insert_resize(YALE_STORAGE* s, size_t current_size, size_t pos, size_t* j, size_t n, bool struct_only);
@@ -120,6 +134,27 @@ YALE_STORAGE* ew_op(const YALE_STORAGE* left, const YALE_STORAGE* right, dtype_t
  * Functions
  */
+/*
+ * Copy a vector from one IType or DType to another.
+ */
+template <typename LType, typename RType>
+static inline void copy_recast_vector(const void* in_, void* out_, size_t length) {
+  const RType* in = reinterpret_cast<const RType*>(in_);
+  LType* out      = reinterpret_cast<LType*>(out_);
+  for (size_t i = 0; i < length; ++i) {
+    out[i] = in[i];
+  }
+  out;
+}
+static inline void copy_recast_itype_vector(const void* in, nm::itype_t in_itype, void* out, nm::itype_t out_itype, size_t length) {
+  NAMED_LR_ITYPE_TEMPLATE_TABLE(ttable, copy_recast_vector, void, const void* in_, void* out_, size_t length);
+  ttable[out_itype][in_itype](in, out, length);
+}
 /*
  * Create Yale storage from IA, JA, and A vectors given in Old Yale format (probably from a file, since NMatrix only uses
  * new Yale for its storage).
@@ -647,7 +682,7 @@ YALE_STORAGE* ew_op(const YALE_STORAGE* left, const YALE_STORAGE* right, dtype_t
 	YALE_STORAGE* dest;
-	new_shape			= reinterpret_cast<size_t*>(calloc(2, sizeof(size_t)));
+	new_shape			= reinterpret_cast<size_t*>(ALLOC_N(size_t, 2));
 	new_shape[0]	= left->shape[0];
 	new_shape[1]	= left->shape[1];
@@ -932,11 +967,11 @@ static char vector_insert_resize(YALE_STORAGE* s, size_t current_size, size_t po
   // Copy all values subsequent to the insertion site to the new IJA and new A, leaving room (size n) for insertion.
   if (struct_only) {
-    for (size_t i = pos; i < current_size - pos + n - 1; ++i) {
+    for (size_t i = pos; i < current_size; ++i) {
       new_ija[i+n] = old_ija[i];
     }
   } else {
-    for (size_t i = pos; i < current_size - pos + n - 1; ++i) {
+    for (size_t i = pos; i < current_size; ++i) {
       new_ija[i+n] = old_ija[i];
       new_a[i+n] = old_a[i];
     }
@@ -964,11 +999,13 @@ static char vector_insert_resize(YALE_STORAGE* s, size_t current_size, size_t po
  *	question.)
  */
 template <typename DType, typename IType>
-static char vector_insert(YALE_STORAGE* s, size_t pos, size_t* j, DType* val, size_t n, bool struct_only) {
+static char vector_insert(YALE_STORAGE* s, size_t pos, size_t* j, void* val_, size_t n, bool struct_only) {
   if (pos < s->shape[0]) {
-    rb_raise(rb_eArgError, "vector insert pos is before beginning of ja; this should not happen");
+    rb_raise(rb_eArgError, "vector insert pos (%d) is before beginning of ja (%d); this should not happen", pos, s->shape[0]);
   }
+  DType* val = reinterpret_cast<DType*>(val_);
   size_t size = get_size<IType>(s);
   IType* ija = reinterpret_cast<IType*>(s->ija);
@@ -1098,6 +1135,7 @@ static inline size_t get_size(const YALE_STORAGE* storage) {
   return static_cast<size_t>(reinterpret_cast<IType*>(storage->ija)[ storage->shape[0] ]);
 }
 /*
  * Allocate for a copy or copy-cast operation, and copy the IJA portion of the
  * matrix (the structure).
@@ -1125,7 +1163,7 @@ static YALE_STORAGE* copy_alloc_struct(const YALE_STORAGE* rhs, const dtype_t ne
 }
 template <typename DType, typename IType>
-static STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector) {
+static STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector, nm::itype_t result_itype) {
   YALE_STORAGE *left  = (YALE_STORAGE*)(casted_storage.left),
                *right = (YALE_STORAGE*)(casted_storage.right);
@@ -1133,24 +1171,49 @@ static STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resu
   // same for left and right.
   // int8_t dtype = left->dtype;
+  // Massage the IType arrays into the correct form.
+  IType* ijl;
+  if (left->itype == result_itype) ijl = reinterpret_cast<IType*>(left->ija);
+  else {  // make a temporary copy of the IJA vector for L with the correct itype
+    std::cerr << "changing left itype from " << static_cast<uint8_t>(left->itype) << " to " << static_cast<int8_t>(result_itype) << std::endl;
+    size_t length = nm_yale_storage_get_size(left);
+    std::cerr << "length = " << length << std::endl;
+    ijl = ALLOCA_N(IType, length);
+    copy_recast_itype_vector(reinterpret_cast<void*>(left->ija), left->itype, reinterpret_cast<void*>(ijl), result_itype, length);
+  }
+  IType* ijr;
+  if (right->itype == result_itype) ijr = reinterpret_cast<IType*>(right->ija);
+  else {  // make a temporary copy of the IJA vector for R with the correct itype
+    std::cerr << "changing right itype from " << static_cast<uint8_t>(right->itype) << " to " << static_cast<int8_t>(result_itype) << std::endl;
+    size_t length = nm_yale_storage_get_size(right);
+    std::cerr << "length = " << length << std::endl;
+    ijr = ALLOCA_N(IType, length);
+    copy_recast_itype_vector(reinterpret_cast<void*>(right->ija), right->itype, reinterpret_cast<void*>(ijr), result_itype, length);
+  }
+  // First, count the ndnz of the result.
+  // TODO: This basically requires running symbmm twice to get the exact ndnz size. That's frustrating. Are there simple
+  // cases where we can avoid running it?
+  size_t result_ndnz = nm::math::symbmm<IType>(resulting_shape[0], left->shape[1], resulting_shape[1], ijl, ijl, true, ijr, ijr, true, NULL, true);
   // Create result storage.
-  nm::itype_t result_itype = static_cast<uint8_t>(left->itype) < static_cast<uint8_t>(right->itype) ? right->itype : left->itype;
-  YALE_STORAGE* result = nm_yale_storage_create(left->dtype, resulting_shape, 2, left->capacity + right->capacity, result_itype);
+  YALE_STORAGE* result = nm_yale_storage_create(left->dtype, resulting_shape, 2, result_ndnz, result_itype);
   init<DType,IType>(result);
-  IType* ijl = reinterpret_cast<IType*>(left->ija);
-  IType* ijr = reinterpret_cast<IType*>(right->ija);
   IType* ija = reinterpret_cast<IType*>(result->ija);
   // Symbolic multiplication step (build the structure)
-  nm::math::symbmm<IType>(result->shape[0], result->shape[1], ijl, ijl, true, ijr, ijr, true, ija, true);
+  nm::math::symbmm<IType>(resulting_shape[0], left->shape[1], resulting_shape[1], ijl, ijl, true, ijr, ijr, true, ija, true);
   // Numeric multiplication step (fill in the elements)
-  nm::math::numbmm<DType,IType>(result->shape[0], result->shape[1],
+  nm::math::numbmm<DType,IType>(result->shape[0], left->shape[1], result->shape[1],
                                 ijl, ijl, reinterpret_cast<DType*>(left->a), true,
                                 ijr, ijr, reinterpret_cast<DType*>(right->a), true,
                                 ija, ija, reinterpret_cast<DType*>(result->a), true);
   // Sort the columns
   nm::math::smmp_sort_columns<DType,IType>(result->shape[0], ija, ija, reinterpret_cast<DType*>(result->a));
@@ -1273,13 +1336,17 @@ void nm_init_yale_functions() {
 	 */
   cNMatrix_YaleFunctions = rb_define_module_under(cNMatrix, "YaleFunctions");
-  rb_define_method(cNMatrix_YaleFunctions, "yale_ija", (METHOD)nm_ija, 0);
-  rb_define_method(cNMatrix_YaleFunctions, "yale_a", (METHOD)nm_a, 0);
+  rb_define_method(cNMatrix_YaleFunctions, "yale_ija", (METHOD)nm_ija, -1);
+  rb_define_method(cNMatrix_YaleFunctions, "yale_a", (METHOD)nm_a, -1);
   rb_define_method(cNMatrix_YaleFunctions, "yale_size", (METHOD)nm_size, 0);
   rb_define_method(cNMatrix_YaleFunctions, "yale_ia", (METHOD)nm_ia, 0);
   rb_define_method(cNMatrix_YaleFunctions, "yale_ja", (METHOD)nm_ja, 0);
-  rb_define_method(cNMatrix_YaleFunctions, "yale_d", (METHOD)nm_d, 0);
+  rb_define_method(cNMatrix_YaleFunctions, "yale_d", (METHOD)nm_d, -1);
   rb_define_method(cNMatrix_YaleFunctions, "yale_lu", (METHOD)nm_lu, 0);
+  rb_define_method(cNMatrix_YaleFunctions, "yale_nd_row", (METHOD)nm_nd_row, -1);
+  rb_define_method(cNMatrix_YaleFunctions, "yale_vector_insert", (METHOD)nm_vector_insert, -1);
   rb_define_const(cNMatrix_YaleFunctions, "YALE_GROWTH_CONSTANT", rb_float_new(nm::yale_storage::GROWTH_CONSTANT));
 }
@@ -1325,6 +1392,25 @@ void* nm_yale_storage_get(STORAGE* storage, SLICE* slice) {
   return ttable[casted_storage->dtype][casted_storage->itype](casted_storage, slice);
 }
+/*
+ * C accessor for yale_storage::vector_insert
+ */
+static char nm_yale_storage_vector_insert(YALE_STORAGE* s, size_t pos, size_t* js, void* vals, size_t n, bool struct_only, nm::dtype_t dtype, nm::itype_t itype) {
+  NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::vector_insert, char, YALE_STORAGE*, size_t, size_t*, void*, size_t, bool);
+  return ttable[dtype][itype](s, pos, js, vals, n, struct_only);
+}
+/*
+ * C accessor for yale_storage::increment_ia_after, typically called after ::vector_insert
+ */
+static void nm_yale_storage_increment_ia_after(YALE_STORAGE* s, size_t ija_size, size_t i, size_t n, nm::itype_t itype) {
+  NAMED_ITYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::c_increment_ia_after, void, YALE_STORAGE*, size_t, size_t, size_t);
+  ttable[itype](s, ija_size, i, n);
+}
 /*
  * C accessor for yale_storage::ref, which returns a pointer to the correct location in a YALE_STORAGE object
  * for some set of coordinates.
@@ -1403,14 +1489,21 @@ STORAGE* nm_yale_storage_copy_transposed(const STORAGE* rhs_base) {
 /*
  * C accessor for multiplying two YALE_STORAGE matrices, which have already been casted to the same dtype.
  *
- * FIXME: What happens if the two matrices have different itypes?
+ * FIXME: There should be some mathematical way to determine the worst-case IType based on the input ITypes. Right now
+ * it just uses the default.
  */
 STORAGE* nm_yale_storage_matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector) {
-  LI_DTYPE_TEMPLATE_TABLE(nm::yale_storage::matrix_multiply, STORAGE*, const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector);
+  LI_DTYPE_TEMPLATE_TABLE(nm::yale_storage::matrix_multiply, STORAGE*, const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector, nm::itype_t resulting_itype);
-  YALE_STORAGE* storage_access = (YALE_STORAGE*)(casted_storage.left);
+  YALE_STORAGE* left = reinterpret_cast<YALE_STORAGE*>(casted_storage.left);
+  YALE_STORAGE* right = reinterpret_cast<YALE_STORAGE*>(casted_storage.right);
-  return ttable[storage_access->dtype][storage_access->itype](casted_storage, resulting_shape, vector);
+  // Determine the itype for the matrix that will be returned.
+  nm::itype_t itype = nm_yale_storage_itype_by_shape(resulting_shape),
+              max_itype = NM_MAX_ITYPE(left->itype, right->itype);
+  if (static_cast<int8_t>(itype) < static_cast<int8_t>(max_itype)) itype = max_itype;
+  return ttable[left->dtype][itype](casted_storage, resulting_shape, vector, itype);
 }
 /*
@@ -1545,6 +1638,7 @@ void nm_yale_storage_init(YALE_STORAGE* s) {
   ttable[s->dtype][s->itype](s);
 }
 /*
  * Ruby GC mark function for YALE_STORAGE. C accessible.
  */
@@ -1615,42 +1709,64 @@ static VALUE nm_size(VALUE self) {
 /*
  * call-seq:
  *     yale_a -> Array
+ *     yale_d(index) -> ...
  *
  * Get the A array of a Yale matrix (which stores the diagonal and the LU portions of the matrix).
  */
-static VALUE nm_a(VALUE self) {
-  YALE_STORAGE* s = NM_STORAGE_YALE(self);
+static VALUE nm_a(int argc, VALUE* argv, VALUE self) {
+  VALUE idx;
+  rb_scan_args(argc, argv, "01", &idx);
+  YALE_STORAGE* s = NM_STORAGE_YALE(self);
   size_t size = nm_yale_storage_get_size(s);
-  VALUE* vals = ALLOCA_N(VALUE, size);
-  for (size_t i = 0; i < size; ++i) {
-    vals[i] = rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*i, s->dtype).rval;
-  }
-  VALUE ary = rb_ary_new4(size, vals);
+  if (idx == Qnil) {
+    VALUE* vals = ALLOCA_N(VALUE, size);
-  for (size_t i = size; i < s->capacity; ++i)
-    rb_ary_push(ary, Qnil);
+    for (size_t i = 0; i < size; ++i) {
+      vals[i] = rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*i, s->dtype).rval;
+    }
+    VALUE ary = rb_ary_new4(size, vals);
-  return ary;
+    for (size_t i = size; i < s->capacity; ++i)
+      rb_ary_push(ary, Qnil);
+    return ary;
+  } else {
+    size_t index = FIX2INT(idx);
+    if (index >= size) rb_raise(rb_eRangeError, "out of range");
+    return rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype] * index, s->dtype).rval;
+  }
 }
 /*
  * call-seq:
  *     yale_d -> Array
+ *     yale_d(index) -> ...
  *
  * Get the diagonal ("D") portion of the A array of a Yale matrix.
  */
-static VALUE nm_d(VALUE self) {
+static VALUE nm_d(int argc, VALUE* argv, VALUE self) {
+  VALUE idx;
+  rb_scan_args(argc, argv, "01", &idx);
   YALE_STORAGE* s = NM_STORAGE_YALE(self);
-  VALUE* vals = ALLOCA_N(VALUE, s->shape[0]);
+  if (idx == Qnil) {
+    VALUE* vals = ALLOCA_N(VALUE, s->shape[0]);
+    for (size_t i = 0; i < s->shape[0]; ++i) {
+      vals[i] = rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*i, s->dtype).rval;
+    }
+    return rb_ary_new4(s->shape[0], vals);
+  } else {
+    size_t index = FIX2INT(idx);
+    if (index >= s->shape[0]) rb_raise(rb_eRangeError, "out of range");
-  for (size_t i = 0; i < s->shape[0]; ++i) {
-    vals[i] = rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*i, s->dtype).rval;
+    return rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype] * index, s->dtype).rval;
   }
-  return rb_ary_new4(s->shape[0], vals);
 }
 /*
@@ -1726,26 +1842,164 @@ static VALUE nm_ja(VALUE self) {
 /*
  * call-seq:
  *     yale_ija -> Array
+ *     yale_ija(index) -> ...
  *
- * Get the IJA array of a Yale matrix.
+ * Get the IJA array of a Yale matrix (or a component of the IJA array).
  */
-static VALUE nm_ija(VALUE self) {
-  YALE_STORAGE* s = NM_STORAGE_YALE(self);
+static VALUE nm_ija(int argc, VALUE* argv, VALUE self) {
+  VALUE idx;
+  rb_scan_args(argc, argv, "01", &idx);
+  YALE_STORAGE* s = NM_STORAGE_YALE(self);
   size_t size = nm_yale_storage_get_size(s);
-  VALUE* vals = ALLOCA_N(VALUE, size);
+  if (idx == Qnil) {
-  for (size_t i = 0; i < size; ++i) {
-    vals[i] = rubyobj_from_cval_by_itype((char*)(s->ija) + ITYPE_SIZES[s->itype]*i, s->itype).rval;
+    VALUE* vals = ALLOCA_N(VALUE, size);
+    for (size_t i = 0; i < size; ++i) {
+      vals[i] = rubyobj_from_cval_by_itype((char*)(s->ija) + ITYPE_SIZES[s->itype]*i, s->itype).rval;
+    }
+   VALUE ary = rb_ary_new4(size, vals);
+    for (size_t i = size; i < s->capacity; ++i)
+      rb_ary_push(ary, Qnil);
+    return ary;
+  } else {
+    size_t index = FIX2INT(idx);
+    if (index >= size) rb_raise(rb_eRangeError, "out of range");
+    return rubyobj_from_cval_by_itype((char*)(s->ija) + ITYPE_SIZES[s->itype] * index, s->itype).rval;
   }
+}
- VALUE ary = rb_ary_new4(size, vals);
-  for (size_t i = size; i < s->capacity; ++i)
-    rb_ary_push(ary, Qnil);
+/*
+ * call-seq:
+ *     yale_nd_row -> ...
+ *
+ * This function gets the non-diagonal contents of a Yale matrix row.
+ * The first argument should be the row index. The optional second argument may be :hash or :array, but defaults
+ * to :hash. If :array is given, it will only return the Hash keys (the column indices).
+ *
+ * This function is meant to accomplish its purpose as efficiently as possible. It does not check for appropriate
+ * range.
+ *
+ * FIXME: :array doesn't make sense. This should be :keys or :values to indicate which array we want.
+ */
+static VALUE nm_nd_row(int argc, VALUE* argv, VALUE self) {
+  VALUE i_, as;
+  rb_scan_args(argc, argv, "11", &i_, &as);
-  return ary;
+  bool array = false;
+  if (as != Qnil && rb_to_id(as) != nm_rb_hash) array = true;
+  size_t i = FIX2INT(i_);
+  YALE_STORAGE* s   = NM_STORAGE_YALE(self);
+  nm::dtype_t dtype = NM_DTYPE(self);
+  nm::itype_t itype = NM_ITYPE(self);
+  // get the position as a size_t
+  // TODO: Come up with a faster way to get this than transforming to a Ruby object first.
+  size_t pos = FIX2INT(rubyobj_from_cval_by_itype((char*)(s->ija) + ITYPE_SIZES[itype]*i, itype).rval);
+  size_t nextpos = FIX2INT(rubyobj_from_cval_by_itype((char*)(s->ija) + ITYPE_SIZES[itype]*(i+1), itype).rval);
+  size_t diff = nextpos - pos;
+  //std::cerr << "diff = " << diff << "\tpos = " << pos << "\tnextpos = " << nextpos << std::endl;
+  VALUE ret; // HERE
+  if (array) {
+    ret = rb_ary_new3(diff);
+    for (size_t idx = pos; idx < nextpos; ++idx) {
+      rb_ary_store(ret, idx - pos, rubyobj_from_cval_by_itype((char*)(s->ija) + ITYPE_SIZES[s->itype]*idx, s->itype).rval);
+    }
+  } else {
+    ret = rb_hash_new();
+    for (size_t idx = pos; idx < nextpos; ++idx) {
+      rb_hash_aset(ret, rubyobj_from_cval_by_itype((char*)(s->ija) + ITYPE_SIZES[s->itype]*idx, s->itype).rval,
+                        rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*idx, s->dtype).rval);
+    }
+  }
+  return ret;
+}
+/*
+ * call-seq:
+ *     yale_vector_insert -> Fixnum
+ *
+ * Insert at position pos an array of non-diagonal elements with column indices given. Note that the column indices and values
+ * must be storage-contiguous -- that is, you can't insert them around existing elements in some row, only amid some
+ * elements in some row. You *can* insert them around a diagonal element, since this is stored separately. This function
+ * may not be used for the insertion of diagonal elements in most cases, as these are already present in the data
+ * structure and are typically modified by replacement rather than insertion.
+ *
+ * The last argument, pos, may be nil if you want to insert at the beginning of a row. Otherwise it needs to be provided.
+ * Don't expect this function to know the difference. It really does very little checking, because its goal is to make
+ * multiple contiguous insertion as quick as possible.
+ *
+ * You should also not attempt to insert values which are the default (0). These are not supposed to be stored, and may
+ * lead to undefined behavior.
+ *
+ * Example:
+ *    m.yale_vector_insert(3, [0,3,4], [1,1,1], 15)
+ *
+ * The example above inserts the values 1, 1, and 1 in columns 0, 3, and 4, assumed to be located at position 15 (which
+ * corresponds to row 3).
+ *
+ * Example:
+ *    next = m.yale_vector_insert(3, [0,3,4], [1,1,1])
+ *
+ * This example determines that i=3 is at position 15 automatically. The value returned, next, is the position where the
+ * next value(s) should be inserted.
+ */
+static VALUE nm_vector_insert(int argc, VALUE* argv, VALUE self) { //, VALUE i_, VALUE jv, VALUE vv, VALUE pos_) {
+  // i, jv, vv are mandatory; pos is optional; thus "31"
+  VALUE i_, jv, vv, pos_;
+  rb_scan_args(argc, argv, "31", &i_, &jv, &vv, &pos_);
+  size_t len   = RARRAY_LEN(jv); // need length in order to read the arrays in
+  size_t vvlen = RARRAY_LEN(vv);
+  if (len != vvlen)
+    rb_raise(rb_eArgError, "lengths must match between j array (%d) and value array (%d)", len, vvlen);
+  YALE_STORAGE* s   = NM_STORAGE_YALE(self);
+  nm::dtype_t dtype = NM_DTYPE(self);
+  nm::itype_t itype = NM_ITYPE(self);
+  size_t i   = FIX2INT(i_);    // get the row
+  // get the position as a size_t
+  // TODO: Come up with a faster way to get this than transforming to a Ruby object first.
+  if (pos_ == Qnil) pos_ = rubyobj_from_cval_by_itype((char*)(s->ija) + ITYPE_SIZES[itype]*i, itype).rval;
+  size_t pos = FIX2INT(pos_);
+  // Allocate the j array and the values array
+  size_t* j  = ALLOCA_N(size_t, len);
+  void* vals = ALLOCA_N(char, DTYPE_SIZES[dtype] * len);
+  // Copy array contents
+  for (size_t idx = 0; idx < len; ++idx) {
+    j[idx] = FIX2INT(rb_ary_entry(jv, idx));
+    rubyval_to_cval(rb_ary_entry(vv, idx), dtype, (char*)vals + idx * DTYPE_SIZES[dtype]);
+  }
+  char ins_type = nm_yale_storage_vector_insert(s, pos, j, vals, len, false, dtype, itype);
+  nm_yale_storage_increment_ia_after(s, s->shape[0], i, len, itype);
+  s->ndnz += len;
+  // Return the updated position
+  pos += len;
+  return INT2FIX(pos);
 }
 } // end of extern "C" block