RubyGems - nmatrix-lapacke - Versions diffs - 0.2.0 - Mend

nmatrix-lapacke 0.2.0

Files changed (185) hide show

checksums.yaml +7 -0
data/ext/nmatrix/data/complex.h +364 -0
data/ext/nmatrix/data/data.h +638 -0
data/ext/nmatrix/data/meta.h +64 -0
data/ext/nmatrix/data/ruby_object.h +389 -0
data/ext/nmatrix/math/asum.h +120 -0
data/ext/nmatrix/math/cblas_enums.h +36 -0
data/ext/nmatrix/math/cblas_templates_core.h +507 -0
data/ext/nmatrix/math/gemm.h +241 -0
data/ext/nmatrix/math/gemv.h +178 -0
data/ext/nmatrix/math/getrf.h +255 -0
data/ext/nmatrix/math/getrs.h +121 -0
data/ext/nmatrix/math/imax.h +79 -0
data/ext/nmatrix/math/laswp.h +165 -0
data/ext/nmatrix/math/long_dtype.h +49 -0
data/ext/nmatrix/math/math.h +744 -0
data/ext/nmatrix/math/nrm2.h +160 -0
data/ext/nmatrix/math/rot.h +117 -0
data/ext/nmatrix/math/rotg.h +106 -0
data/ext/nmatrix/math/scal.h +71 -0
data/ext/nmatrix/math/trsm.h +332 -0
data/ext/nmatrix/math/util.h +148 -0
data/ext/nmatrix/nm_memory.h +60 -0
data/ext/nmatrix/nmatrix.h +408 -0
data/ext/nmatrix/ruby_constants.h +106 -0
data/ext/nmatrix/storage/common.h +176 -0
data/ext/nmatrix/storage/dense/dense.h +128 -0
data/ext/nmatrix/storage/list/list.h +137 -0
data/ext/nmatrix/storage/storage.h +98 -0
data/ext/nmatrix/storage/yale/class.h +1139 -0
data/ext/nmatrix/storage/yale/iterators/base.h +142 -0
data/ext/nmatrix/storage/yale/iterators/iterator.h +130 -0
data/ext/nmatrix/storage/yale/iterators/row.h +449 -0
data/ext/nmatrix/storage/yale/iterators/row_stored.h +139 -0
data/ext/nmatrix/storage/yale/iterators/row_stored_nd.h +168 -0
data/ext/nmatrix/storage/yale/iterators/stored_diagonal.h +123 -0
data/ext/nmatrix/storage/yale/math/transpose.h +110 -0
data/ext/nmatrix/storage/yale/yale.h +202 -0
data/ext/nmatrix/types.h +54 -0
data/ext/nmatrix/util/io.h +115 -0
data/ext/nmatrix/util/sl_list.h +143 -0
data/ext/nmatrix/util/util.h +78 -0
data/ext/nmatrix_lapacke/extconf.rb +200 -0
data/ext/nmatrix_lapacke/lapacke.cpp +100 -0
data/ext/nmatrix_lapacke/lapacke/include/lapacke.h +16445 -0
data/ext/nmatrix_lapacke/lapacke/include/lapacke_config.h +119 -0
data/ext/nmatrix_lapacke/lapacke/include/lapacke_mangling.h +17 -0
data/ext/nmatrix_lapacke/lapacke/include/lapacke_mangling_with_flags.h +17 -0
data/ext/nmatrix_lapacke/lapacke/include/lapacke_utils.h +579 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgeev.c +89 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgeev_work.c +141 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgesdd.c +106 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgesdd_work.c +158 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgesvd.c +94 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgesvd_work.c +149 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgetrf.c +51 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgetrf_work.c +83 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgetri.c +77 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgetri_work.c +89 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgetrs.c +56 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgetrs_work.c +102 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_cpotrf.c +50 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_cpotrf_work.c +82 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_cpotri.c +50 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_cpotri_work.c +82 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_cpotrs.c +55 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_cpotrs_work.c +101 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgeev.c +78 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgeev_work.c +136 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgesdd.c +88 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgesdd_work.c +153 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgesvd.c +83 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgesvd_work.c +144 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgetrf.c +50 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgetrf_work.c +81 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgetri.c +75 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgetri_work.c +87 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgetrs.c +55 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgetrs_work.c +99 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_dpotrf.c +50 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_dpotrf_work.c +81 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_dpotri.c +50 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_dpotri_work.c +81 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_dpotrs.c +54 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_dpotrs_work.c +97 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgeev.c +78 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgeev_work.c +134 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgesdd.c +88 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgesdd_work.c +152 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgesvd.c +83 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgesvd_work.c +143 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgetrf.c +50 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgetrf_work.c +81 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgetri.c +75 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgetri_work.c +87 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgetrs.c +55 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgetrs_work.c +99 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_spotrf.c +50 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_spotrf_work.c +81 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_spotri.c +50 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_spotri_work.c +81 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_spotrs.c +54 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_spotrs_work.c +97 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgeev.c +89 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgeev_work.c +141 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgesdd.c +106 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgesdd_work.c +158 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgesvd.c +94 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgesvd_work.c +149 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgetrf.c +51 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgetrf_work.c +83 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgetri.c +77 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgetri_work.c +89 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgetrs.c +56 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgetrs_work.c +102 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_zpotrf.c +50 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_zpotrf_work.c +82 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_zpotri.c +50 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_zpotri_work.c +82 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_zpotrs.c +55 -0
data/ext/nmatrix_lapacke/lapacke/src/lapacke_zpotrs_work.c +101 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_cge_nancheck.c +62 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_cge_trans.c +65 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_cpo_nancheck.c +43 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_cpo_trans.c +45 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_ctr_nancheck.c +85 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_ctr_trans.c +85 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_dge_nancheck.c +62 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_dge_trans.c +65 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_dpo_nancheck.c +43 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_dpo_trans.c +45 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_dtr_nancheck.c +85 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_dtr_trans.c +85 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_lsame.c +41 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_sge_nancheck.c +62 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_sge_trans.c +65 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_spo_nancheck.c +43 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_spo_trans.c +45 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_str_nancheck.c +85 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_str_trans.c +85 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_xerbla.c +46 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_zge_nancheck.c +62 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_zge_trans.c +65 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_zpo_nancheck.c +43 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_zpo_trans.c +45 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_ztr_nancheck.c +85 -0
data/ext/nmatrix_lapacke/lapacke/utils/lapacke_ztr_trans.c +85 -0
data/ext/nmatrix_lapacke/lapacke_nmatrix.h +16 -0
data/ext/nmatrix_lapacke/make_lapacke_cpp.rb +9 -0
data/ext/nmatrix_lapacke/math_lapacke.cpp +967 -0
data/ext/nmatrix_lapacke/math_lapacke/cblas_local.h +576 -0
data/ext/nmatrix_lapacke/math_lapacke/cblas_templates_lapacke.h +51 -0
data/ext/nmatrix_lapacke/math_lapacke/lapacke_templates.h +356 -0
data/ext/nmatrix_lapacke/nmatrix_lapacke.cpp +42 -0
data/lib/nmatrix/lapack_ext_common.rb +69 -0
data/lib/nmatrix/lapacke.rb +213 -0
data/spec/00_nmatrix_spec.rb +730 -0
data/spec/01_enum_spec.rb +190 -0
data/spec/02_slice_spec.rb +389 -0
data/spec/03_nmatrix_monkeys_spec.rb +78 -0
data/spec/2x2_dense_double.mat +0 -0
data/spec/4x4_sparse.mat +0 -0
data/spec/4x5_dense.mat +0 -0
data/spec/blas_spec.rb +193 -0
data/spec/elementwise_spec.rb +303 -0
data/spec/homogeneous_spec.rb +99 -0
data/spec/io/fortran_format_spec.rb +88 -0
data/spec/io/harwell_boeing_spec.rb +98 -0
data/spec/io/test.rua +9 -0
data/spec/io_spec.rb +149 -0
data/spec/lapack_core_spec.rb +482 -0
data/spec/leakcheck.rb +16 -0
data/spec/math_spec.rb +730 -0
data/spec/nmatrix_yale_resize_test_associations.yaml +2802 -0
data/spec/nmatrix_yale_spec.rb +286 -0
data/spec/plugins/lapacke/lapacke_spec.rb +303 -0
data/spec/rspec_monkeys.rb +56 -0
data/spec/rspec_spec.rb +34 -0
data/spec/shortcuts_spec.rb +310 -0
data/spec/slice_set_spec.rb +157 -0
data/spec/spec_helper.rb +140 -0
data/spec/stat_spec.rb +203 -0
data/spec/test.pcd +20 -0
data/spec/utm5940.mtx +83844 -0
metadata +262 -0

@@ -0,0 +1,121 @@
+/////////////////////////////////////////////////////////////////////
+// = NMatrix
+//
+// A linear algebra library for scientific computation in Ruby.
+// NMatrix is part of SciRuby.
+//
+// NMatrix was originally inspired by and derived from NArray, by
+// Masahiro Tanaka: http://narray.rubyforge.org
+//
+// == Copyright Information
+//
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
+//
+// Please see LICENSE.txt for additional copyright notices.
+//
+// == Contributing
+//
+// By contributing source code to SciRuby, you agree to be bound by
+// our Contributor Agreement:
+//
+// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
+//
+// == getrs.h
+//
+// getrs function in native C++.
+//
+/*
+ *             Automatically Tuned Linear Algebra Software v3.8.4
+ *                    (C) Copyright 1999 R. Clint Whaley
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions, and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *   3. The name of the ATLAS group or the names of its contributers may
+ *      not be used to endorse or promote products derived from this
+ *      software without specific written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef GETRS_H
+#define GETRS_H
+namespace nm { namespace math {
+/*
+ * Solves a system of linear equations A*X = B with a general NxN matrix A using the LU factorization computed by GETRF.
+ *
+ * From ATLAS 3.8.0.
+ */
+template <typename DType>
+int getrs(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE Trans, const int N, const int NRHS, const DType* A,
+           const int lda, const int* ipiv, DType* B, const int ldb)
+{
+  // enum CBLAS_DIAG Lunit, Uunit; // These aren't used. Not sure why they're declared in ATLAS' src.
+  if (!N || !NRHS) return 0;
+  const DType ONE = 1;
+  if (Order == CblasColMajor) {
+    if (Trans == CblasNoTrans) {
+      nm::math::laswp<DType>(NRHS, B, ldb, 0, N, ipiv, 1);
+      nm::math::trsm<DType>(Order, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, N, NRHS, ONE, A, lda, B, ldb);
+      nm::math::trsm<DType>(Order, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, N, NRHS, ONE, A, lda, B, ldb);
+    } else {
+      nm::math::trsm<DType>(Order, CblasLeft, CblasUpper, Trans, CblasNonUnit, N, NRHS, ONE, A, lda, B, ldb);
+      nm::math::trsm<DType>(Order, CblasLeft, CblasLower, Trans, CblasUnit, N, NRHS, ONE, A, lda, B, ldb);
+      nm::math::laswp<DType>(NRHS, B, ldb, 0, N, ipiv, -1);
+    }
+  } else {
+    if (Trans == CblasNoTrans) {
+      nm::math::trsm<DType>(Order, CblasRight, CblasLower, CblasTrans, CblasNonUnit, NRHS, N, ONE, A, lda, B, ldb);
+      nm::math::trsm<DType>(Order, CblasRight, CblasUpper, CblasTrans, CblasUnit, NRHS, N, ONE, A, lda, B, ldb);
+      nm::math::laswp<DType>(NRHS, B, ldb, 0, N, ipiv, -1);
+    } else {
+      nm::math::laswp<DType>(NRHS, B, ldb, 0, N, ipiv, 1);
+      nm::math::trsm<DType>(Order, CblasRight, CblasUpper, CblasNoTrans, CblasUnit, NRHS, N, ONE, A, lda, B, ldb);
+      nm::math::trsm<DType>(Order, CblasRight, CblasLower, CblasNoTrans, CblasNonUnit, NRHS, N, ONE, A, lda, B, ldb);
+    }
+  }
+  return 0;
+}
+/*
+* Function signature conversion for calling LAPACK's getrs functions as directly as possible.
+*
+* For documentation: http://www.netlib.org/lapack/double/dgetrs.f
+*
+* This function should normally go in math.cpp, but we need it to be available to nmatrix.cpp.
+*/
+template <typename DType>
+inline int clapack_getrs(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE trans, const int n, const int nrhs,
+                         const void* a, const int lda, const int* ipiv, void* b, const int ldb) {
+  return getrs<DType>(order, trans, n, nrhs, reinterpret_cast<const DType*>(a), lda, ipiv, reinterpret_cast<DType*>(b), ldb);
+}
+} } // end nm::math
+#endif // GETRS_H

data/ext/nmatrix/math/imax.h ADDED

@@ -0,0 +1,79 @@
+/////////////////////////////////////////////////////////////////////
+// = NMatrix
+//
+// A linear algebra library for scientific computation in Ruby.
+// NMatrix is part of SciRuby.
+//
+// NMatrix was originally inspired by and derived from NArray, by
+// Masahiro Tanaka: http://narray.rubyforge.org
+//
+// == Copyright Information
+//
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
+//
+// Please see LICENSE.txt for additional copyright notices.
+//
+// == Contributing
+//
+// By contributing source code to SciRuby, you agree to be bound by
+// our Contributor Agreement:
+//
+// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
+//
+// == imax.h
+//
+// BLAS level 1 function imax.
+//
+#ifndef IMAX_H
+#define IMAX_H
+namespace nm { namespace math {
+template<typename DType>
+inline int imax(const int n, const DType *x, const int incx) {
+  if (n < 1 || incx <= 0) {
+    return -1;
+  }
+  if (n == 1) {
+    return 0;
+  }
+  DType dmax;
+  int imax = 0;
+  if (incx == 1) { // if incrementing by 1
+    dmax = abs(x[0]);
+    for (int i = 1; i < n; ++i) {
+      if (std::abs(x[i]) > dmax) {
+        imax = i;
+        dmax = std::abs(x[i]);
+      }
+    }
+  } else { // if incrementing by more than 1
+    dmax = std::abs(x[0]);
+    for (int i = 1, ix = incx; i < n; ++i, ix += incx) {
+      if (std::abs(x[ix]) > dmax) {
+        imax = i;
+        dmax = std::abs(x[ix]);
+      }
+    }
+  }
+  return imax;
+}
+template<typename DType>
+inline int cblas_imax(const int n, const void* x, const int incx) {
+  return imax<DType>(n, reinterpret_cast<const DType*>(x), incx);
+}
+}} // end of namespace nm::math
+#endif /* IMAX_H */

data/ext/nmatrix/math/laswp.h ADDED

@@ -0,0 +1,165 @@
+/////////////////////////////////////////////////////////////////////
+// = NMatrix
+//
+// A linear algebra library for scientific computation in Ruby.
+// NMatrix is part of SciRuby.
+//
+// NMatrix was originally inspired by and derived from NArray, by
+// Masahiro Tanaka: http://narray.rubyforge.org
+//
+// == Copyright Information
+//
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
+//
+// Please see LICENSE.txt for additional copyright notices.
+//
+// == Contributing
+//
+// By contributing source code to SciRuby, you agree to be bound by
+// our Contributor Agreement:
+//
+// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
+//
+// == laswp.h
+//
+// laswp function in native C++.
+//
+/*
+ *             Automatically Tuned Linear Algebra Software v3.8.4
+ *                    (C) Copyright 1999 R. Clint Whaley
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions, and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *   3. The name of the ATLAS group or the names of its contributers may
+ *      not be used to endorse or promote products derived from this
+ *      software without specific written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef LASWP_H
+#define LASWP_H
+namespace nm { namespace math {
+/*
+ * ATLAS function which performs row interchanges on a general rectangular matrix. Modeled after the LAPACK LASWP function.
+ *
+ * This version is templated for use by template <> getrf().
+ */
+template <typename DType>
+inline void laswp(const int N, DType* A, const int lda, const int K1, const int K2, const int *piv, const int inci) {
+  //const int n = K2 - K1; // not sure why this is declared. commented it out because it's unused.
+  int nb = N >> 5;
+  const int mr = N - (nb<<5);
+  const int incA = lda << 5;
+  if (K2 < K1) return;
+  int i1, i2;
+  if (inci < 0) {
+    piv -= (K2-1) * inci;
+    i1 = K2 - 1;
+    i2 = K1;
+  } else {
+    piv += K1 * inci;
+    i1 = K1;
+    i2 = K2-1;
+  }
+  if (nb) {
+    do {
+      const int* ipiv = piv;
+      int i           = i1;
+      int KeepOn;
+      do {
+        int ip = *ipiv; ipiv += inci;
+        if (ip != i) {
+          DType *a0 = &(A[i]),
+                *a1 = &(A[ip]);
+          for (register int h = 32; h; h--) {
+            DType r   = *a0;
+            *a0       = *a1;
+            *a1       = r;
+            a0 += lda;
+            a1 += lda;
+          }
+        }
+        if (inci > 0) KeepOn = (++i <= i2);
+        else          KeepOn = (--i >= i2);
+      } while (KeepOn);
+      A += incA;
+    } while (--nb);
+  }
+  if (mr) {
+    const int* ipiv = piv;
+    int i           = i1;
+    int KeepOn;
+    do {
+      int ip = *ipiv; ipiv += inci;
+      if (ip != i) {
+        DType *a0 = &(A[i]),
+              *a1 = &(A[ip]);
+        for (register int h = mr; h; h--) {
+          DType r   = *a0;
+          *a0       = *a1;
+          *a1       = r;
+          a0 += lda;
+          a1 += lda;
+        }
+      }
+      if (inci > 0) KeepOn = (++i <= i2);
+      else          KeepOn = (--i >= i2);
+    } while (KeepOn);
+  }
+}
+/*
+* Function signature conversion for calling LAPACK's laswp functions as directly as possible.
+*
+* For documentation: http://www.netlib.org/lapack/double/dlaswp.f
+*
+* This function should normally go in math.cpp, but we need it to be available to nmatrix.cpp.
+*/
+template <typename DType>
+inline void clapack_laswp(const int n, void* a, const int lda, const int k1, const int k2, const int* ipiv, const int incx) {
+  laswp<DType>(n, reinterpret_cast<DType*>(a), lda, k1, k2, ipiv, incx);
+}
+} }  // namespace nm::math
+#endif // LASWP_H

data/ext/nmatrix/math/long_dtype.h ADDED

@@ -0,0 +1,49 @@
+/////////////////////////////////////////////////////////////////////
+// = NMatrix
+//
+// A linear algebra library for scientific computation in Ruby.
+// NMatrix is part of SciRuby.
+//
+// NMatrix was originally inspired by and derived from NArray, by
+// Masahiro Tanaka: http://narray.rubyforge.org
+//
+// == Copyright Information
+//
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
+//
+// Please see LICENSE.txt for additional copyright notices.
+//
+// == Contributing
+//
+// By contributing source code to SciRuby, you agree to be bound by
+// our Contributor Agreement:
+//
+// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
+//
+// == long_dtype.h
+//
+// Declarations necessary for the native versions of GEMM and GEMV.
+//
+#ifndef LONG_DTYPE_H
+#define LONG_DTYPE_H
+namespace nm { namespace math {
+  // These allow an increase in precision for intermediate values of gemm and gemv.
+  // See also: http://stackoverflow.com/questions/11873694/how-does-one-increase-precision-in-c-templates-in-a-template-typename-dependen
+  template <typename DType> struct LongDType;
+  template <> struct LongDType<uint8_t> { typedef int16_t type; };
+  template <> struct LongDType<int8_t> { typedef int16_t type; };
+  template <> struct LongDType<int16_t> { typedef int32_t type; };
+  template <> struct LongDType<int32_t> { typedef int64_t type; };
+  template <> struct LongDType<int64_t> { typedef int64_t type; };
+  template <> struct LongDType<float> { typedef double type; };
+  template <> struct LongDType<double> { typedef double type; };
+  template <> struct LongDType<Complex64> { typedef Complex128 type; };
+  template <> struct LongDType<Complex128> { typedef Complex128 type; };
+  template <> struct LongDType<RubyObject> { typedef RubyObject type; };
+}} // end of namespace nm::math
+#endif

data/ext/nmatrix/math/math.h ADDED

@@ -0,0 +1,744 @@
+/////////////////////////////////////////////////////////////////////
+// = NMatrix
+//
+// A linear algebra library for scientific computation in Ruby.
+// NMatrix is part of SciRuby.
+//
+// NMatrix was originally inspired by and derived from NArray, by
+// Masahiro Tanaka: http://narray.rubyforge.org
+//
+// == Copyright Information
+//
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
+//
+// Please see LICENSE.txt for additional copyright notices.
+//
+// == Contributing
+//
+// By contributing source code to SciRuby, you agree to be bound by
+// our Contributor Agreement:
+//
+// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
+//
+// == math.h
+//
+// Header file for math functions, interfacing with BLAS, etc.
+//
+// For instructions on adding CBLAS and CLAPACK functions, see the
+// beginning of math.cpp.
+//
+// Some of these functions are from ATLAS. Here is the license for
+// ATLAS:
+//
+/*
+ *             Automatically Tuned Linear Algebra Software v3.8.4
+ *                    (C) Copyright 1999 R. Clint Whaley
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions, and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *   3. The name of the ATLAS group or the names of its contributers may
+ *      not be used to endorse or promote products derived from this
+ *      software without specific written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef MATH_H
+#define MATH_H
+/*
+ * Standard Includes
+ */
+#include "cblas_enums.h"
+#include <algorithm> // std::min, std::max
+#include <limits> // std::numeric_limits
+/*
+ * Project Includes
+ */
+/*
+ * Macros
+ */
+#define REAL_RECURSE_LIMIT 4
+/*
+ * Data
+ */
+extern "C" {
+  /*
+   * C accessors.
+   */
+  void nm_math_transpose_generic(const size_t M, const size_t N, const void* A, const int lda, void* B, const int ldb, size_t element_size);
+  void nm_math_init_blas(void);
+  /*
+   * Pure math implementations.
+   */
+  void nm_math_solve(VALUE lu, VALUE b, VALUE x, VALUE ipiv);
+  void nm_math_inverse(const int M, void* A_elements, nm::dtype_t dtype);
+  void nm_math_hessenberg(VALUE a);
+  void nm_math_det_exact(const int M, const void* elements, const int lda, nm::dtype_t dtype, void* result);
+  void nm_math_inverse_exact(const int M, const void* A_elements, const int lda, void* B_elements, const int ldb, nm::dtype_t dtype);
+}
+namespace nm {
+  namespace math {
+/*
+ * Types
+ */
+/*
+ * Functions
+ */
+// Yale: numeric matrix multiply c=a*b
+template <typename DType>
+inline void numbmm(const unsigned int n, const unsigned int m, const unsigned int l, const IType* ia, const IType* ja, const DType* a, const bool diaga,
+            const IType* ib, const IType* jb, const DType* b, const bool diagb, IType* ic, IType* jc, DType* c, const bool diagc) {
+  const unsigned int max_lmn = std::max(std::max(m, n), l);
+  IType next[max_lmn];
+  DType sums[max_lmn];
+  DType v;
+  IType head, length, temp, ndnz = 0;
+  IType minmn = std::min(m,n);
+  IType minlm = std::min(l,m);
+  for (IType idx = 0; idx < max_lmn; ++idx) { // initialize scratch arrays
+    next[idx] = std::numeric_limits<IType>::max();
+    sums[idx] = 0;
+  }
+  for (IType i = 0; i < n; ++i) { // walk down the rows
+    head = std::numeric_limits<IType>::max()-1; // head gets assigned as whichever column of B's row j we last visited
+    length = 0;
+    for (IType jj = ia[i]; jj <= ia[i+1]; ++jj) { // walk through entries in each row
+      IType j;
+      if (jj == ia[i+1]) { // if we're in the last entry for this row:
+        if (!diaga || i >= minmn) continue;
+        j   = i;      // if it's a new Yale matrix, and last entry, get the diagonal position (j) and entry (ajj)
+        v   = a[i];
+      } else {
+        j   = ja[jj]; // if it's not the last entry for this row, get the column (j) and entry (ajj)
+        v   = a[jj];
+      }
+      for (IType kk = ib[j]; kk <= ib[j+1]; ++kk) {
+        IType k;
+        if (kk == ib[j+1]) { // Get the column id for that entry
+          if (!diagb || j >= minlm) continue;
+          k  = j;
+          sums[k] += v*b[k];
+        } else {
+          k  = jb[kk];
+          sums[k] += v*b[kk];
+        }
+        if (next[k] == std::numeric_limits<IType>::max()) {
+          next[k] = head;
+          head    = k;
+          ++length;
+        }
+      } // end of kk loop
+    } // end of jj loop
+    for (IType jj = 0; jj < length; ++jj) {
+      if (sums[head] != 0) {
+        if (diagc && head == i) {
+          c[head] = sums[head];
+        } else {
+          jc[n+1+ndnz] = head;
+          c[n+1+ndnz]  = sums[head];
+          ++ndnz;
+        }
+      }
+      temp = head;
+      head = next[head];
+      next[temp] = std::numeric_limits<IType>::max();
+      sums[temp] = 0;
+    }
+    ic[i+1] = n+1+ndnz;
+  }
+} /* numbmm_ */
+/*
+template <typename DType, typename IType>
+inline void new_yale_matrix_multiply(const unsigned int m, const IType* ija, const DType* a, const IType* ijb, const DType* b, YALE_STORAGE* c_storage) {
+  unsigned int n = c_storage->shape[0],
+               l = c_storage->shape[1];
+  // Create a working vector of dimension max(m,l,n) and initial value IType::max():
+  std::vector<IType> mask(std::max(std::max(m,l),n), std::numeric_limits<IType>::max());
+  for (IType i = 0; i < n; ++i) { // A.rows.each_index do |i|
+    IType j, k;
+    size_t ndnz;
+    for (IType jj = ija[i]; jj <= ija[i+1]; ++jj) { // walk through column pointers for row i of A
+      j = (jj == ija[i+1]) ? i : ija[jj];   // Get the current column index (handle diagonals last)
+      if (j >= m) {
+        if (j == ija[jj]) rb_raise(rb_eIndexError, "ija array for left-hand matrix contains an out-of-bounds column index %u at position %u", jj, j);
+        else              break;
+      }
+      for (IType kk = ijb[j]; kk <= ijb[j+1]; ++kk) { // walk through column pointers for row j of B
+        if (j >= m) continue; // first of all, does B *have* a row j?
+        k = (kk == ijb[j+1]) ? j : ijb[kk];   // Get the current column index (handle diagonals last)
+        if (k >= l) {
+          if (k == ijb[kk]) rb_raise(rb_eIndexError, "ija array for right-hand matrix contains an out-of-bounds column index %u at position %u", kk, k);
+          else              break;
+        }
+        if (mask[k] == )
+      }
+    }
+  }
+}
+*/
+// Yale: Symbolic matrix multiply c=a*b
+inline size_t symbmm(const unsigned int n, const unsigned int m, const unsigned int l, const IType* ia, const IType* ja, const bool diaga,
+            const IType* ib, const IType* jb, const bool diagb, IType* ic, const bool diagc) {
+  unsigned int max_lmn = std::max(std::max(m,n), l);
+  IType mask[max_lmn];  // INDEX in the SMMP paper.
+  IType j, k; /* Local variables */
+  size_t ndnz = n;
+  for (IType idx = 0; idx < max_lmn; ++idx)
+    mask[idx] = std::numeric_limits<IType>::max();
+  if (ic) { // Only write to ic if it's supplied; otherwise, we're just counting.
+    if (diagc)  ic[0] = n+1;
+    else        ic[0] = 0;
+  }
+  IType minmn = std::min(m,n);
+  IType minlm = std::min(l,m);
+  for (IType i = 0; i < n; ++i) { // MAIN LOOP: through rows
+    for (IType jj = ia[i]; jj <= ia[i+1]; ++jj) { // merge row lists, walking through columns in each row
+      // j <- column index given by JA[jj], or handle diagonal.
+      if (jj == ia[i+1]) { // Don't really do it the last time -- just handle diagonals in a new yale matrix.
+        if (!diaga || i >= minmn) continue;
+        j = i;
+      } else j = ja[jj];
+      for (IType kk = ib[j]; kk <= ib[j+1]; ++kk) { // Now walk through columns K of row J in matrix B.
+        if (kk == ib[j+1]) {
+          if (!diagb || j >= minlm) continue;
+          k = j;
+        } else k = jb[kk];
+        if (mask[k] != i) {
+          mask[k] = i;
+          ++ndnz;
+        }
+      }
+    }
+    if (diagc && mask[i] == std::numeric_limits<IType>::max()) --ndnz;
+    if (ic) ic[i+1] = ndnz;
+  }
+  return ndnz;
+} /* symbmm_ */
+// In-place quicksort (from Wikipedia) -- called by smmp_sort_columns, below. All functions are inclusive of left, right.
+namespace smmp_sort {
+  const size_t THRESHOLD = 4;  // switch to insertion sort for 4 elements or fewer
+  template <typename DType>
+  void print_array(DType* vals, IType* array, IType left, IType right) {
+    for (IType i = left; i <= right; ++i) {
+      std::cerr << array[i] << ":" << vals[i] << "  ";
+    }
+    std::cerr << std::endl;
+  }
+  template <typename DType>
+  IType partition(DType* vals, IType* array, IType left, IType right, IType pivot) {
+    IType pivotJ = array[pivot];
+    DType pivotV = vals[pivot];
+    // Swap pivot and right
+    array[pivot] = array[right];
+    vals[pivot]  = vals[right];
+    array[right] = pivotJ;
+    vals[right]  = pivotV;
+    IType store = left;
+    for (IType idx = left; idx < right; ++idx) {
+      if (array[idx] <= pivotJ) {
+        // Swap i and store
+        std::swap(array[idx], array[store]);
+        std::swap(vals[idx],  vals[store]);
+        ++store;
+      }
+    }
+    std::swap(array[store], array[right]);
+    std::swap(vals[store],  vals[right]);
+    return store;
+  }
+  // Recommended to use the median of left, right, and mid for the pivot.
+  template <typename I>
+  inline I median(I a, I b, I c) {
+    if (a < b) {
+      if (b < c) return b; // a b c
+      if (a < c) return c; // a c b
+                 return a; // c a b
+    } else { // a > b
+      if (a < c) return a; // b a c
+      if (b < c) return c; // b c a
+                 return b; // c b a
+    }
+  }
+  // Insertion sort is more efficient than quicksort for small N
+  template <typename DType>
+  void insertion_sort(DType* vals, IType* array, IType left, IType right) {
+    for (IType idx = left; idx <= right; ++idx) {
+      IType col_to_insert = array[idx];
+      DType val_to_insert = vals[idx];
+      IType hole_pos = idx;
+      for (; hole_pos > left && col_to_insert < array[hole_pos-1]; --hole_pos) {
+        array[hole_pos] = array[hole_pos - 1];  // shift the larger column index up
+        vals[hole_pos]  = vals[hole_pos - 1];   // value goes along with it
+      }
+      array[hole_pos] = col_to_insert;
+      vals[hole_pos]  = val_to_insert;
+    }
+  }
+  template <typename DType>
+  void quicksort(DType* vals, IType* array, IType left, IType right) {
+    if (left < right) {
+      if (right - left < THRESHOLD) {
+        insertion_sort(vals, array, left, right);
+      } else {
+        // choose any pivot such that left < pivot < right
+        IType pivot = median<IType>(left, right, (IType)(((unsigned long)left + (unsigned long)right) / 2));
+        pivot = partition(vals, array, left, right, pivot);
+        // recursively sort elements smaller than the pivot
+        quicksort<DType>(vals, array, left, pivot-1);
+        // recursively sort elements at least as big as the pivot
+        quicksort<DType>(vals, array, pivot+1, right);
+      }
+    }
+  }
+}; // end of namespace smmp_sort
+/*
+ * For use following symbmm and numbmm. Sorts the matrix entries in each row according to the column index.
+ * This utilizes quicksort, which is an in-place unstable sort (since there are no duplicate entries, we don't care
+ * about stability).
+ *
+ * TODO: It might be worthwhile to do a test for free memory, and if available, use an unstable sort that isn't in-place.
+ *
+ * TODO: It's actually probably possible to write an even faster sort, since symbmm/numbmm are not producing a random
+ * ordering. If someone is doing a lot of Yale matrix multiplication, it might benefit them to consider even insertion
+ * sort.
+ */
+template <typename DType>
+inline void smmp_sort_columns(const size_t n, const IType* ia, IType* ja, DType* a) {
+  for (size_t i = 0; i < n; ++i) {
+    if (ia[i+1] - ia[i] < 2) continue; // no need to sort rows containing only one or two elements.
+    else if (ia[i+1] - ia[i] <= smmp_sort::THRESHOLD) {
+      smmp_sort::insertion_sort<DType>(a, ja, ia[i], ia[i+1]-1); // faster for small rows
+    } else {
+      smmp_sort::quicksort<DType>(a, ja, ia[i], ia[i+1]-1);      // faster for large rows (and may call insertion_sort as well)
+    }
+  }
+}
+// Copies an upper row-major array from U, zeroing U; U is unit, so diagonal is not copied.
+//
+// From ATLAS 3.8.0.
+template <typename DType>
+static inline void trcpzeroU(const int M, const int N, DType* U, const int ldu, DType* C, const int ldc) {
+  for (int i = 0; i != M; ++i) {
+    for (int j = i+1; j < N; ++j) {
+      C[j] = U[j];
+      U[j] = 0;
+    }
+    C += ldc;
+    U += ldu;
+  }
+}
+/*
+ * Un-comment the following lines when we figure out how to calculate NB for each of the ATLAS-derived
+ * functions. This is probably really complicated.
+ *
+ * Also needed: ATL_MulByNB, ATL_DivByNB (both defined in the build process for ATLAS), and ATL_mmMU.
+ *
+ */
+/*
+template <bool RowMajor, bool Upper, typename DType>
+static int trtri_4(const enum CBLAS_DIAG Diag, DType* A, const int lda) {
+  if (RowMajor) {
+    DType *pA0 = A, *pA1 = A+lda, *pA2 = A+2*lda, *pA3 = A+3*lda;
+    DType tmp;
+    if (Upper) {
+      DType A01 = pA0[1], A02 = pA0[2], A03 = pA0[3],
+                          A12 = pA1[2], A13 = pA1[3],
+                                        A23 = pA2[3];
+      if (Diag == CblasNonUnit) {
+        pA0->inverse();
+        (pA1+1)->inverse();
+        (pA2+2)->inverse();
+        (pA3+3)->inverse();
+        pA0[1] = -A01 * pA1[1] * pA0[0];
+        pA1[2] = -A12 * pA2[2] * pA1[1];
+        pA2[3] = -A23 * pA3[3] * pA2[2];
+        pA0[2] = -(A01 * pA1[2] + A02 * pA2[2]) * pA0[0];
+        pA1[3] = -(A12 * pA2[3] + A13 * pA3[3]) * pA1[1];
+        pA0[3] = -(A01 * pA1[3] + A02 * pA2[3] + A03 * pA3[3]) * pA0[0];
+      } else {
+        pA0[1] = -A01;
+        pA1[2] = -A12;
+        pA2[3] = -A23;
+        pA0[2] = -(A01 * pA1[2] + A02);
+        pA1[3] = -(A12 * pA2[3] + A13);
+        pA0[3] = -(A01 * pA1[3] + A02 * pA2[3] + A03);
+      }
+    } else { // Lower
+      DType A10 = pA1[0],
+            A20 = pA2[0], A21 = pA2[1],
+            A30 = PA3[0], A31 = pA3[1], A32 = pA3[2];
+      DType *B10 = pA1,
+            *B20 = pA2,
+            *B30 = pA3,
+            *B21 = pA2+1,
+            *B31 = pA3+1,
+            *B32 = pA3+2;
+      if (Diag == CblasNonUnit) {
+        pA0->inverse();
+        (pA1+1)->inverse();
+        (pA2+2)->inverse();
+        (pA3+3)->inverse();
+        *B10 = -A10 * pA0[0] * pA1[1];
+        *B21 = -A21 * pA1[1] * pA2[2];
+        *B32 = -A32 * pA2[2] * pA3[3];
+        *B20 = -(A20 * pA0[0] + A21 * (*B10)) * pA2[2];
+        *B31 = -(A31 * pA1[1] + A32 * (*B21)) * pA3[3];
+        *B30 = -(A30 * pA0[0] + A31 * (*B10) + A32 * (*B20)) * pA3;
+      } else {
+        *B10 = -A10;
+        *B21 = -A21;
+        *B32 = -A32;
+        *B20 = -(A20 + A21 * (*B10));
+        *B31 = -(A31 + A32 * (*B21));
+        *B30 = -(A30 + A31 * (*B10) + A32 * (*B20));
+      }
+    }
+  } else {
+    rb_raise(rb_eNotImpError, "only row-major implemented at this time");
+  }
+  return 0;
+}
+template <bool RowMajor, bool Upper, typename DType>
+static int trtri_3(const enum CBLAS_DIAG Diag, DType* A, const int lda) {
+  if (RowMajor) {
+    DType tmp;
+    if (Upper) {
+      DType A01 = pA0[1], A02 = pA0[2], A03 = pA0[3],
+                          A12 = pA1[2], A13 = pA1[3];
+      DType *B01 = pA0 + 1,
+            *B02 = pA0 + 2,
+            *B12 = pA1 + 2;
+      if (Diag == CblasNonUnit) {
+        pA0->inverse();
+        (pA1+1)->inverse();
+        (pA2+2)->inverse();
+        *B01 = -A01 * pA1[1] * pA0[0];
+        *B12 = -A12 * pA2[2] * pA1[1];
+        *B02 = -(A01 * (*B12) + A02 * pA2[2]) * pA0[0];
+      } else {
+        *B01 = -A01;
+        *B12 = -A12;
+        *B02 = -(A01 * (*B12) + A02);
+      }
+    } else { // Lower
+      DType *pA0=A, *pA1=A+lda, *pA2=A+2*lda;
+      DType A10=pA1[0],
+            A20=pA2[0], A21=pA2[1];
+      DType *B10 = pA1,
+            *B20 = pA2;
+            *B21 = pA2+1;
+      if (Diag == CblasNonUnit) {
+        pA0->inverse();
+        (pA1+1)->inverse();
+        (pA2+2)->inverse();
+        *B10 = -A10 * pA0[0] * pA1[1];
+        *B21 = -A21 * pA1[1] * pA2[2];
+        *B20 = -(A20 * pA0[0] + A21 * (*B10)) * pA2[2];
+      } else {
+        *B10 = -A10;
+        *B21 = -A21;
+        *B20 = -(A20 + A21 * (*B10));
+      }
+    }
+  } else {
+    rb_raise(rb_eNotImpError, "only row-major implemented at this time");
+  }
+  return 0;
+}
+template <bool RowMajor, bool Upper, bool Real, typename DType>
+static void trtri(const enum CBLAS_DIAG Diag, const int N, DType* A, const int lda) {
+  DType *Age, *Atr;
+  DType tmp;
+  int Nleft, Nright;
+  int ierr = 0;
+  static const DType ONE = 1;
+  static const DType MONE -1;
+  static const DType NONE = -1;
+  if (RowMajor) {
+    // FIXME: Use REAL_RECURSE_LIMIT here for float32 and float64 (instead of 1)
+    if ((Real && N > REAL_RECURSE_LIMIT) || (N > 1)) {
+      Nleft = N >> 1;
+#ifdef NB
+      if (Nleft > NB) NLeft = ATL_MulByNB(ATL_DivByNB(Nleft));
+#endif
+      Nright = N - Nleft;
+      if (Upper) {
+        Age = A + Nleft;
+        Atr = A + (Nleft * (lda+1));
+        nm::math::trsm<DType>(CblasRowMajor, CblasRight, CblasUpper, CblasNoTrans, Diag,
+                              Nleft, Nright, ONE, Atr, lda, Age, lda);
+        nm::math::trsm<DType>(CblasRowMajor, CblasLeft, CblasUpper, CblasNoTrans, Diag,
+                              Nleft, Nright, MONE, A, lda, Age, lda);
+      } else { // Lower
+        Age = A + ((Nleft*lda));
+        Atr = A + (Nleft * (lda+1));
+        nm::math::trsm<DType>(CblasRowMajor, CblasRight, CblasLower, CblasNoTrans, Diag,
+                              Nright, Nleft, ONE, A, lda, Age, lda);
+        nm::math::trsm<DType>(CblasRowMajor, CblasLeft, CblasLower, CblasNoTrans, Diag,
+                              Nright, Nleft, MONE, Atr, lda, Age, lda);
+      }
+      ierr = trtri<RowMajor,Upper,Real,DType>(Diag, Nleft, A, lda);
+      if (ierr) return ierr;
+      ierr = trtri<RowMajor,Upper,Real,DType>(Diag, Nright, Atr, lda);
+      if (ierr) return ierr + Nleft;
+    } else {
+      if (Real) {
+        if (N == 4) {
+          return trtri_4<RowMajor,Upper,Real,DType>(Diag, A, lda);
+        } else if (N == 3) {
+          return trtri_3<RowMajor,Upper,Real,DType>(Diag, A, lda);
+        } else if (N == 2) {
+          if (Diag == CblasNonUnit) {
+            A->inverse();
+            (A+(lda+1))->inverse();
+            if (Upper) {
+              *(A+1)     *=   *A;         // TRI_MUL
+              *(A+1)     *=   *(A+lda+1); // TRI_MUL
+            } else {
+              *(A+lda)   *=   *A;         // TRI_MUL
+              *(A+lda)   *=   *(A+lda+1); // TRI_MUL
+            }
+          }
+          if (Upper) *(A+1)   = -*(A+1);      // TRI_NEG
+          else       *(A+lda) = -*(A+lda);    // TRI_NEG
+        } else if (Diag == CblasNonUnit) A->inverse();
+      } else { // not real
+        if (Diag == CblasNonUnit) A->inverse();
+      }
+    }
+  } else {
+    rb_raise(rb_eNotImpError, "only row-major implemented at this time");
+  }
+  return ierr;
+}
+template <bool RowMajor, bool Real, typename DType>
+int getri(const int N, DType* A, const int lda, const int* ipiv, DType* wrk, const int lwrk) {
+  if (!RowMajor) rb_raise(rb_eNotImpError, "only row-major implemented at this time");
+  int jb, nb, I, ndown, iret;
+  const DType ONE = 1, NONE = -1;
+  int iret = trtri<RowMajor,false,Real,DType>(CblasNonUnit, N, A, lda);
+  if (!iret && N > 1) {
+    jb = lwrk / N;
+    if (jb >= NB) nb = ATL_MulByNB(ATL_DivByNB(jb));
+    else if (jb >= ATL_mmMU) nb = (jb/ATL_mmMU)*ATL_mmMU;
+    else nb = jb;
+    if (!nb) return -6; // need at least 1 row of workspace
+    // only first iteration will have partial block, unroll it
+    jb = N - (N/nb) * nb;
+    if (!jb) jb = nb;
+    I = N - jb;
+    A += lda * I;
+    trcpzeroU<DType>(jb, jb, A+I, lda, wrk, jb);
+    nm::math::trsm<DType>(CblasRowMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasUnit,
+                          jb, N, ONE, wrk, jb, A, lda);
+    if (I) {
+      do {
+        I -= nb;
+        A -= nb * lda;
+        ndown = N-I;
+        trcpzeroU<DType>(nb, ndown, A+I, lda, wrk, ndown);
+        nm::math::gemm<DType>(CblasRowMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasUnit,
+                              nb, N, ONE, wrk, ndown, A, lda);
+      } while (I);
+    }
+    // Apply row interchanges
+    for (I = N - 2; I >= 0; --I) {
+      jb = ipiv[I];
+      if (jb != I) nm::math::swap<DType>(N, A+I*lda, 1, A+jb*lda, 1);
+    }
+  }
+  return iret;
+}
+*/
+/*
+ * Macro for declaring LAPACK specializations of the getrf function.
+ *
+ * type is the DType; call is the specific function to call; cast_as is what the DType* should be
+ * cast to in order to pass it to LAPACK.
+ */
+#define LAPACK_GETRF(type, call, cast_as)                                     \
+template <>                                                                   \
+inline int getrf(const enum CBLAS_ORDER Order, const int M, const int N, type * A, const int lda, int* ipiv) { \
+  int info = call(Order, M, N, reinterpret_cast<cast_as *>(A), lda, ipiv);    \
+  if (!info) return info;                                                     \
+  else {                                                                      \
+    rb_raise(rb_eArgError, "getrf: problem with argument %d\n", info);        \
+    return info;                                                              \
+  }                                                                           \
+}
+/* Specialize for ATLAS types */
+/*LAPACK_GETRF(float,      clapack_sgetrf, float)
+LAPACK_GETRF(double,     clapack_dgetrf, double)
+LAPACK_GETRF(Complex64,  clapack_cgetrf, void)
+LAPACK_GETRF(Complex128, clapack_zgetrf, void)
+*/
+}} // end namespace nm::math
+#endif // MATH_H