RubyGems - nmatrix-gemv - Versions diffs - 0.0.3 - Mend

nmatrix-gemv 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

checksums.yaml +7 -0
data/.gitignore +29 -0
data/.rspec +2 -0
data/.travis.yml +14 -0
data/Gemfile +7 -0
data/README.md +29 -0
data/Rakefile +225 -0
data/ext/nmatrix_gemv/binary_format.txt +53 -0
data/ext/nmatrix_gemv/data/complex.h +399 -0
data/ext/nmatrix_gemv/data/data.cpp +298 -0
data/ext/nmatrix_gemv/data/data.h +771 -0
data/ext/nmatrix_gemv/data/meta.h +70 -0
data/ext/nmatrix_gemv/data/rational.h +436 -0
data/ext/nmatrix_gemv/data/ruby_object.h +471 -0
data/ext/nmatrix_gemv/extconf.rb +254 -0
data/ext/nmatrix_gemv/math.cpp +1639 -0
data/ext/nmatrix_gemv/math/asum.h +143 -0
data/ext/nmatrix_gemv/math/geev.h +82 -0
data/ext/nmatrix_gemv/math/gemm.h +271 -0
data/ext/nmatrix_gemv/math/gemv.h +212 -0
data/ext/nmatrix_gemv/math/ger.h +96 -0
data/ext/nmatrix_gemv/math/gesdd.h +80 -0
data/ext/nmatrix_gemv/math/gesvd.h +78 -0
data/ext/nmatrix_gemv/math/getf2.h +86 -0
data/ext/nmatrix_gemv/math/getrf.h +240 -0
data/ext/nmatrix_gemv/math/getri.h +108 -0
data/ext/nmatrix_gemv/math/getrs.h +129 -0
data/ext/nmatrix_gemv/math/idamax.h +86 -0
data/ext/nmatrix_gemv/math/inc.h +47 -0
data/ext/nmatrix_gemv/math/laswp.h +165 -0
data/ext/nmatrix_gemv/math/long_dtype.h +52 -0
data/ext/nmatrix_gemv/math/math.h +1069 -0
data/ext/nmatrix_gemv/math/nrm2.h +181 -0
data/ext/nmatrix_gemv/math/potrs.h +129 -0
data/ext/nmatrix_gemv/math/rot.h +141 -0
data/ext/nmatrix_gemv/math/rotg.h +115 -0
data/ext/nmatrix_gemv/math/scal.h +73 -0
data/ext/nmatrix_gemv/math/swap.h +73 -0
data/ext/nmatrix_gemv/math/trsm.h +387 -0
data/ext/nmatrix_gemv/nm_memory.h +60 -0
data/ext/nmatrix_gemv/nmatrix_gemv.cpp +90 -0
data/ext/nmatrix_gemv/nmatrix_gemv.h +374 -0
data/ext/nmatrix_gemv/ruby_constants.cpp +153 -0
data/ext/nmatrix_gemv/ruby_constants.h +107 -0
data/ext/nmatrix_gemv/ruby_nmatrix.c +84 -0
data/ext/nmatrix_gemv/ttable_helper.rb +122 -0
data/ext/nmatrix_gemv/types.h +54 -0
data/ext/nmatrix_gemv/util/util.h +78 -0
data/lib/nmatrix-gemv.rb +43 -0
data/lib/nmatrix_gemv/blas.rb +85 -0
data/lib/nmatrix_gemv/nmatrix_gemv.rb +35 -0
data/lib/nmatrix_gemv/rspec.rb +75 -0
data/nmatrix-gemv.gemspec +31 -0
data/spec/blas_spec.rb +154 -0
data/spec/spec_helper.rb +128 -0
metadata +186 -0

data/ext/nmatrix_gemv/math/rotg.h ADDED

@@ -0,0 +1,115 @@
+/////////////////////////////////////////////////////////////////////
+// = NMatrix
+//
+// A linear algebra library for scientific computation in Ruby.
+// NMatrix is part of SciRuby.
+//
+// NMatrix was originally inspired by and derived from NArray, by
+// Masahiro Tanaka: http://narray.rubyforge.org
+//
+// == Copyright Information
+//
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
+//
+// Please see LICENSE.txt for additional copyright notices.
+//
+// == Contributing
+//
+// By contributing source code to SciRuby, you agree to be bound by
+// our Contributor Agreement:
+//
+// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
+//
+// == rotg.h
+//
+// BLAS rotg function in native C++.
+//
+/*
+ *             Automatically Tuned Linear Algebra Software v3.8.4
+ *                    (C) Copyright 1999 R. Clint Whaley
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions, and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *   3. The name of the ATLAS group or the names of its contributers may
+ *      not be used to endorse or promote products derived from this
+ *      software without specific written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef ROTG_H
+# define ROTG_H
+namespace nm { namespace math {
+/* Givens plane rotation. From ATLAS 3.8.4. */
+// FIXME: Not working properly for Ruby objects.
+template <typename DType>
+inline void rotg(DType* a, DType* b, DType* c, DType* s) {
+  DType aa    = std::abs(*a), ab = std::abs(*b);
+  DType roe   = aa > ab ? *a : *b;
+  DType scal  = aa + ab;
+  if (scal == 0) {
+    *c =  1;
+    *s = *a = *b = 0;
+  } else {
+    DType t0  = aa / scal, t1 = ab / scal;
+    DType r   = scal * std::sqrt(t0 * t0 + t1 * t1);
+    if (roe < 0) r = -r;
+    *c = *a / r;
+    *s = *b / r;
+    DType z   = (*c != 0) ? (1 / *c) : DType(1);
+    *a = r;
+    *b = z;
+  }
+}
+template <>
+inline void rotg(float* a, float* b, float* c, float* s) {
+  cblas_srotg(a, b, c, s);
+}
+template <>
+inline void rotg(double* a, double* b, double* c, double* s) {
+  cblas_drotg(a, b, c, s);
+}
+template <>
+inline void rotg(Complex64* a, Complex64* b, Complex64* c, Complex64* s) {
+  cblas_crotg(reinterpret_cast<void*>(a), reinterpret_cast<void*>(b), reinterpret_cast<void*>(c), reinterpret_cast<void*>(s));
+}
+template <>
+inline void rotg(Complex128* a, Complex128* b, Complex128* c, Complex128* s) {
+  cblas_zrotg(reinterpret_cast<void*>(a), reinterpret_cast<void*>(b), reinterpret_cast<void*>(c), reinterpret_cast<void*>(s));
+}
+template <typename DType>
+inline void cblas_rotg(void* a, void* b, void* c, void* s) {
+  rotg<DType>(reinterpret_cast<DType*>(a), reinterpret_cast<DType*>(b), reinterpret_cast<DType*>(c), reinterpret_cast<DType*>(s));
+}
+} } //nm::math
+#endif // ROTG_H

data/ext/nmatrix_gemv/math/scal.h ADDED

@@ -0,0 +1,73 @@
+/////////////////////////////////////////////////////////////////////
+// = NMatrix
+//
+// A linear algebra library for scientific computation in Ruby.
+// NMatrix is part of SciRuby.
+//
+// NMatrix was originally inspired by and derived from NArray, by
+// Masahiro Tanaka: http://narray.rubyforge.org
+//
+// == Copyright Information
+//
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
+//
+// Please see LICENSE.txt for additional copyright notices.
+//
+// == Contributing
+//
+// By contributing source code to SciRuby, you agree to be bound by
+// our Contributor Agreement:
+//
+// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
+//
+// == scal.h
+//
+// LAPACK scal function in native C.
+//
+#ifndef SCAL_H
+#define SCAL_H
+namespace nm { namespace math {
+/*  Purpose */
+/*  ======= */
+/*     DSCAL scales a vector by a constant. */
+/*     uses unrolled loops for increment equal to one. */
+/*  Further Details */
+/*  =============== */
+/*     jack dongarra, linpack, 3/11/78. */
+/*     modified 3/93 to return if incx .le. 0. */
+/*     modified 12/3/93, array(1) declarations changed to array(*) */
+/*  ===================================================================== */
+template <typename DType>
+inline void scal(const int n, const DType da, DType* dx,	const int incx) {
+  // This used to have unrolled loops, like dswap. They were in the way.
+  if (n <= 0 || incx <= 0) return;
+  for (int i = 0; incx < 0 ? i > n*incx : i < n*incx; i += incx) {
+    dx[i] = da * dx[i];
+  }
+} /* scal */
+/*
+ * Function signature conversion for LAPACK's scal function.
+ */
+template <typename DType>
+inline void clapack_scal(const int n, const void* da, void* dx, const int incx) {
+  // FIXME: See if we can call the clapack version instead of our C++ version.
+  scal<DType>(n, *reinterpret_cast<const DType*>(da), reinterpret_cast<DType*>(dx), incx);
+}
+}} // end of nm::math
+#endif

data/ext/nmatrix_gemv/math/swap.h ADDED

@@ -0,0 +1,73 @@
+/////////////////////////////////////////////////////////////////////
+// = NMatrix
+//
+// A linear algebra library for scientific computation in Ruby.
+// NMatrix is part of SciRuby.
+//
+// NMatrix was originally inspired by and derived from NArray, by
+// Masahiro Tanaka: http://narray.rubyforge.org
+//
+// == Copyright Information
+//
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
+//
+// Please see LICENSE.txt for additional copyright notices.
+//
+// == Contributing
+//
+// By contributing source code to SciRuby, you agree to be bound by
+// our Contributor Agreement:
+//
+// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
+//
+// == swap.h
+//
+// BLAS level 2 swap function in native C++.
+//
+#ifndef SWAP_H
+#define SWAP_H
+namespace nm { namespace math {
+/*
+template <typename DType>
+inline void swap(int n, DType *dx, int incx, DType *dy, int incy) {
+  if (n <= 0) return;
+  // For negative increments, start at the end of the array.
+  int ix = incx < 0 ? (-n+1)*incx : 0,
+      iy = incy < 0 ? (-n+1)*incy : 0;
+  if (incx < 0) ix = (-n + 1) * incx;
+  if (incy < 0) iy = (-n + 1) * incy;
+  for (size_t i = 0; i < n; ++i, ix += incx, iy += incy) {
+    DType dtemp = dx[ix];
+    dx[ix]      = dy[iy];
+    dy[iy]      = dtemp;
+  }
+  return;
+} /* dswap */
+// This is the old BLAS version of this function. ATLAS has an optimized version, but
+// it's going to be tough to translate.
+template <typename DType>
+static void swap(const int N, DType* X, const int incX, DType* Y, const int incY) {
+  if (N > 0) {
+    int ix = 0, iy = 0;
+    for (int i = 0; i < N; ++i) {
+      DType temp = X[i];
+      X[i]       = Y[i];
+      Y[i]       = temp;
+      ix += incX;
+      iy += incY;
+    }
+  }
+}
+}} // end nm::math
+#endif

data/ext/nmatrix_gemv/math/trsm.h ADDED

@@ -0,0 +1,387 @@
+/////////////////////////////////////////////////////////////////////
+// = NMatrix
+//
+// A linear algebra library for scientific computation in Ruby.
+// NMatrix is part of SciRuby.
+//
+// NMatrix was originally inspired by and derived from NArray, by
+// Masahiro Tanaka: http://narray.rubyforge.org
+//
+// == Copyright Information
+//
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
+//
+// Please see LICENSE.txt for additional copyright notices.
+//
+// == Contributing
+//
+// By contributing source code to SciRuby, you agree to be bound by
+// our Contributor Agreement:
+//
+// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
+//
+// == trsm.h
+//
+// trsm function in native C++.
+//
+/*
+ *             Automatically Tuned Linear Algebra Software v3.8.4
+ *                    (C) Copyright 1999 R. Clint Whaley
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions, and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *   3. The name of the ATLAS group or the names of its contributers may
+ *      not be used to endorse or promote products derived from this
+ *      software without specific written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef TRSM_H
+#define TRSM_H
+extern "C" {
+#if defined HAVE_CBLAS_H
+  #include <cblas.h>
+#elif defined HAVE_ATLAS_CBLAS_H
+  #include <atlas/cblas.h>
+#endif
+}
+namespace nm { namespace math {
+/*
+ * This version of trsm doesn't do any error checks and only works on column-major matrices.
+ *
+ * For row major, call trsm<DType> instead. That will handle necessary changes-of-variables
+ * and parameter checks.
+ *
+ * Note that some of the boundary conditions here may be incorrect. Very little has been tested!
+ * This was converted directly from dtrsm.f using f2c, and then rewritten more cleanly.
+ */
+template <typename DType>
+inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
+                         const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_DIAG diag,
+                         const int m, const int n, const DType alpha, const DType* a,
+                         const int lda, DType* b, const int ldb)
+{
+  // (row-major) trsm: left upper trans nonunit m=3 n=1 1/1 a 3 b 3
+  if (m == 0 || n == 0) return; /* Quick return if possible. */
+  if (alpha == 0) { // Handle alpha == 0
+    for (int j = 0; j < n; ++j) {
+      for (int i = 0; i < m; ++i) {
+        b[i + j * ldb] = 0;
+      }
+    }
+	  return;
+  }
+  if (side == CblasLeft) {
+	  if (trans_a == CblasNoTrans) {
+      /* Form  B := alpha*inv( A )*B. */
+	    if (uplo == CblasUpper) {
+    		for (int j = 0; j < n; ++j) {
+		      if (alpha != 1) {
+			      for (int i = 0; i < m; ++i) {
+			        b[i + j * ldb] = alpha * b[i + j * ldb];
+			      }
+		      }
+		      for (int k = m-1; k >= 0; --k) {
+			      if (b[k + j * ldb] != 0) {
+			        if (diag == CblasNonUnit) {
+				        b[k + j * ldb] /= a[k + k * lda];
+			        }
+              for (int i = 0; i < k-1; ++i) {
+                b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
+              }
+			      }
+  		    }
+		    }
+	    } else {
+    		for (int j = 0; j < n; ++j) {
+		      if (alpha != 1) {
+            for (int i = 0; i < m; ++i) {
+              b[i + j * ldb] = alpha * b[i + j * ldb];
+			      }
+		      }
+  		    for (int k = 0; k < m; ++k) {
+      			if (b[k + j * ldb] != 0.) {
+			        if (diag == CblasNonUnit) {
+				        b[k + j * ldb] /= a[k + k * lda];
+			        }
+    			    for (int i = k+1; i < m; ++i) {
+        				b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
+    			    }
+      			}
+  		    }
+    		}
+	    }
+	  } else { // CblasTrans
+      /*           Form  B := alpha*inv( A**T )*B. */
+	    if (uplo == CblasUpper) {
+    		for (int j = 0; j < n; ++j) {
+		      for (int i = 0; i < m; ++i) {
+			      DType temp = alpha * b[i + j * ldb];
+            for (int k = 0; k < i; ++k) { // limit was i-1. Lots of similar bugs in this code, probably.
+              temp -= a[k + i * lda] * b[k + j * ldb];
+      			}
+			      if (diag == CblasNonUnit) {
+			        temp /= a[i + i * lda];
+			      }
+			      b[i + j * ldb] = temp;
+  		    }
+    		}
+	    } else {
+    		for (int j = 0; j < n; ++j) {
+		      for (int i = m-1; i >= 0; --i) {
+			      DType temp= alpha * b[i + j * ldb];
+      			for (int k = i+1; k < m; ++k) {
+			        temp -= a[k + i * lda] * b[k + j * ldb];
+      			}
+			      if (diag == CblasNonUnit) {
+			        temp /= a[i + i * lda];
+			      }
+			      b[i + j * ldb] = temp;
+  		    }
+    		}
+	    }
+	  }
+  } else { // right side
+	  if (trans_a == CblasNoTrans) {
+      /*           Form  B := alpha*B*inv( A ). */
+	    if (uplo == CblasUpper) {
+    		for (int j = 0; j < n; ++j) {
+		      if (alpha != 1) {
+      			for (int i = 0; i < m; ++i) {
+			        b[i + j * ldb] = alpha * b[i + j * ldb];
+      			}
+		      }
+  		    for (int k = 0; k < j-1; ++k) {
+	      		if (a[k + j * lda] != 0) {
+    			    for (int i = 0; i < m; ++i) {
+				        b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
+			        }
+			      }
+  		    }
+	  	    if (diag == CblasNonUnit) {
+		      	DType temp = 1 / a[j + j * lda];
+			      for (int i = 0; i < m; ++i) {
+			        b[i + j * ldb] = temp * b[i + j * ldb];
+      			}
+		      }
+    		}
+	    } else {
+		    for (int j = n-1; j >= 0; --j) {
+		      if (alpha != 1) {
+			      for (int i = 0; i < m; ++i) {
+			        b[i + j * ldb] = alpha * b[i + j * ldb];
+      			}
+  		    }
+  		    for (int k = j+1; k < n; ++k) {
+	      		if (a[k + j * lda] != 0.) {
+    			    for (int i = 0; i < m; ++i) {
+				        b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
+    			    }
+		      	}
+  		    }
+	  	    if (diag == CblasNonUnit) {
+		      	DType temp = 1 / a[j + j * lda];
+			      for (int i = 0; i < m; ++i) {
+			        b[i + j * ldb] = temp * b[i + j * ldb];
+      			}
+		      }
+    		}
+	    }
+	  } else { // CblasTrans
+      /*           Form  B := alpha*B*inv( A**T ). */
+	    if (uplo == CblasUpper) {
+		    for (int k = n-1; k >= 0; --k) {
+		      if (diag == CblasNonUnit) {
+			      DType temp= 1 / a[k + k * lda];
+	      		for (int i = 0; i < m; ++i) {
+  			      b[i + k * ldb] = temp * b[i + k * ldb];
+      			}
+		      }
+  		    for (int j = 0; j < k-1; ++j) {
+	      		if (a[j + k * lda] != 0.) {
+			        DType temp= a[j + k * lda];
+    			    for (int i = 0; i < m; ++i) {
+		        		b[i + j * ldb] -= temp * b[i + k *	ldb];
+    			    }
+      			}
+  		    }
+	  	    if (alpha != 1) {
+      			for (int i = 0; i < m; ++i) {
+			        b[i + k * ldb] = alpha * b[i + k * ldb];
+      			}
+		      }
+    		}
+	    } else {
+    		for (int k = 0; k < n; ++k) {
+		      if (diag == CblasNonUnit) {
+      			DType temp = 1 / a[k + k * lda];
+			      for (int i = 0; i < m; ++i) {
+			        b[i + k * ldb] = temp * b[i + k * ldb];
+      			}
+		      }
+  		    for (int j = k+1; j < n; ++j) {
+	      		if (a[j + k * lda] != 0.) {
+			        DType temp = a[j + k * lda];
+			        for (int i = 0; i < m; ++i) {
+				        b[i + j * ldb] -= temp * b[i + k * ldb];
+    			    }
+		      	}
+  		    }
+	  	    if (alpha != 1) {
+      			for (int i = 0; i < m; ++i) {
+			        b[i + k * ldb] = alpha * b[i + k * ldb];
+      			}
+  		    }
+    		}
+	    }
+	  }
+  }
+}
+/*
+ * BLAS' DTRSM function, generalized.
+ */
+template <typename DType, typename = typename std::enable_if<!std::is_integral<DType>::value>::type>
+inline void trsm(const enum CBLAS_ORDER order,
+                 const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
+                 const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_DIAG diag,
+                 const int m, const int n, const DType alpha, const DType* a,
+                 const int lda, DType* b, const int ldb)
+{
+  /*using std::cerr;
+  using std::endl;*/
+  int                     num_rows_a = n;
+  if (side == CblasLeft)  num_rows_a = m;
+  if (lda < std::max(1,num_rows_a)) {
+    fprintf(stderr, "TRSM: num_rows_a = %d; got lda=%d\n", num_rows_a, lda);
+    rb_raise(rb_eArgError, "TRSM: Expected lda >= max(1, num_rows_a)");
+  }
+  // Test the input parameters.
+  if (order == CblasRowMajor) {
+    if (ldb < std::max(1,n)) {
+      fprintf(stderr, "TRSM: M=%d; got ldb=%d\n", m, ldb);
+      rb_raise(rb_eArgError, "TRSM: Expected ldb >= max(1,N)");
+    }
+    // For row major, need to switch side and uplo
+    enum CBLAS_SIDE side_ = side == CblasLeft  ? CblasRight : CblasLeft;
+    enum CBLAS_UPLO uplo_ = uplo == CblasUpper ? CblasLower : CblasUpper;
+/*
+    cerr << "(row-major) trsm: " << (side_ == CblasLeft ? "left " : "right ")
+         << (uplo_ == CblasUpper ? "upper " : "lower ")
+         << (trans_a == CblasTrans ? "trans " : "notrans ")
+         << (diag == CblasNonUnit ? "nonunit " : "unit ")
+         << n << " " << m << " " << alpha << " a " << lda << " b " << ldb << endl;
+*/
+    trsm_nothrow<DType>(side_, uplo_, trans_a, diag, n, m, alpha, a, lda, b, ldb);
+  } else { // CblasColMajor
+    if (ldb < std::max(1,m)) {
+      fprintf(stderr, "TRSM: M=%d; got ldb=%d\n", m, ldb);
+      rb_raise(rb_eArgError, "TRSM: Expected ldb >= max(1,M)");
+    }
+/*
+    cerr << "(col-major) trsm: " << (side == CblasLeft ? "left " : "right ")
+         << (uplo == CblasUpper ? "upper " : "lower ")
+         << (trans_a == CblasTrans ? "trans " : "notrans ")
+         << (diag == CblasNonUnit ? "nonunit " : "unit ")
+         << m << " " << n << " " << alpha << " a " << lda << " b " << ldb << endl;
+*/
+    trsm_nothrow<DType>(side, uplo, trans_a, diag, m, n, alpha, a, lda, b, ldb);
+  }
+}
+template <>
+inline void trsm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
+                 const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_DIAG diag,
+                 const int m, const int n, const float alpha, const float* a,
+                 const int lda, float* b, const int ldb)
+{
+  cblas_strsm(order, side, uplo, trans_a, diag, m, n, alpha, a, lda, b, ldb);
+}
+template <>
+inline void trsm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
+                 const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_DIAG diag,
+                 const int m, const int n, const double alpha, const double* a,
+                 const int lda, double* b, const int ldb)
+{
+/*  using std::cerr;
+  using std::endl;
+  cerr << "(row-major) dtrsm: " << (side == CblasLeft ? "left " : "right ")
+       << (uplo == CblasUpper ? "upper " : "lower ")
+       << (trans_a == CblasTrans ? "trans " : "notrans ")
+       << (diag == CblasNonUnit ? "nonunit " : "unit ")
+       << m << " " << n << " " << alpha << " a " << lda << " b " << ldb << endl;
+*/
+  cblas_dtrsm(order, side, uplo, trans_a, diag, m, n, alpha, a, lda, b, ldb);
+}
+template <>
+inline void trsm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
+                 const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_DIAG diag,
+                 const int m, const int n, const Complex64 alpha, const Complex64* a,
+                 const int lda, Complex64* b, const int ldb)
+{
+  cblas_ctrsm(order, side, uplo, trans_a, diag, m, n, (const void*)(&alpha), (const void*)(a), lda, (void*)(b), ldb);
+}
+template <>
+inline void trsm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
+                 const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_DIAG diag,
+                 const int m, const int n, const Complex128 alpha, const Complex128* a,
+                 const int lda, Complex128* b, const int ldb)
+{
+  cblas_ztrsm(order, side, uplo, trans_a, diag, m, n, (const void*)(&alpha), (const void*)(a), lda, (void*)(b), ldb);
+}
+} }  // namespace nm::math
+#endif // TRSM_H