nmatrix 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nmatrix/data/complex.h +183 -159
- data/ext/nmatrix/data/data.cpp +113 -112
- data/ext/nmatrix/data/data.h +306 -292
- data/ext/nmatrix/data/ruby_object.h +193 -193
- data/ext/nmatrix/extconf.rb +11 -9
- data/ext/nmatrix/math.cpp +9 -11
- data/ext/nmatrix/math/math.h +3 -2
- data/ext/nmatrix/math/trsm.h +152 -152
- data/ext/nmatrix/nmatrix.h +30 -0
- data/ext/nmatrix/ruby_constants.cpp +67 -67
- data/ext/nmatrix/ruby_constants.h +35 -35
- data/ext/nmatrix/ruby_nmatrix.c +168 -183
- data/ext/nmatrix/storage/common.h +4 -3
- data/ext/nmatrix/storage/dense/dense.cpp +50 -50
- data/ext/nmatrix/storage/dense/dense.h +8 -7
- data/ext/nmatrix/storage/list/list.cpp +16 -16
- data/ext/nmatrix/storage/list/list.h +7 -6
- data/ext/nmatrix/storage/storage.cpp +32 -32
- data/ext/nmatrix/storage/storage.h +12 -11
- data/ext/nmatrix/storage/yale/class.h +2 -2
- data/ext/nmatrix/storage/yale/iterators/base.h +2 -1
- data/ext/nmatrix/storage/yale/iterators/iterator.h +2 -1
- data/ext/nmatrix/storage/yale/iterators/row.h +2 -1
- data/ext/nmatrix/storage/yale/iterators/row_stored.h +2 -1
- data/ext/nmatrix/storage/yale/iterators/row_stored_nd.h +1 -0
- data/ext/nmatrix/storage/yale/iterators/stored_diagonal.h +2 -1
- data/ext/nmatrix/storage/yale/yale.cpp +27 -27
- data/ext/nmatrix/storage/yale/yale.h +7 -6
- data/ext/nmatrix/ttable_helper.rb +10 -10
- data/ext/nmatrix/types.h +3 -2
- data/ext/nmatrix/util/io.cpp +7 -7
- data/ext/nmatrix/util/sl_list.cpp +26 -26
- data/ext/nmatrix/util/sl_list.h +19 -18
- data/lib/nmatrix/blas.rb +7 -7
- data/lib/nmatrix/io/mat5_reader.rb +30 -30
- data/lib/nmatrix/math.rb +73 -17
- data/lib/nmatrix/nmatrix.rb +10 -8
- data/lib/nmatrix/shortcuts.rb +3 -3
- data/lib/nmatrix/version.rb +3 -3
- data/spec/00_nmatrix_spec.rb +6 -0
- data/spec/math_spec.rb +77 -0
- data/spec/spec_helper.rb +9 -0
- metadata +2 -2
    
        data/ext/nmatrix/extconf.rb
    CHANGED
    
    | @@ -105,9 +105,6 @@ basenames = %w{nmatrix ruby_constants data/data util/io math util/sl_list storag | |
| 105 105 | 
             
            $objs = basenames.map { |b| "#{b}.o"   }
         | 
| 106 106 | 
             
            $srcs = basenames.map { |b| "#{b}.cpp" }
         | 
| 107 107 |  | 
| 108 | 
            -
            #CONFIG['CXX'] = 'clang++'
         | 
| 109 | 
            -
            CONFIG['CXX'] = 'g++'
         | 
| 110 | 
            -
             | 
| 111 108 | 
             
            def find_newer_gplusplus #:nodoc:
         | 
| 112 109 | 
             
              print "checking for apparent GNU g++ binary with C++0x/C++11 support... "
         | 
| 113 110 | 
             
              [9,8,7,6,5,4,3].each do |minor|
         | 
| @@ -135,7 +132,7 @@ end | |
| 135 132 |  | 
| 136 133 |  | 
| 137 134 | 
             
            if CONFIG['CXX'] == 'clang++'
         | 
| 138 | 
            -
              $ | 
| 135 | 
            +
              $CXX_STANDARD = 'c++11'
         | 
| 139 136 |  | 
| 140 137 | 
             
            else
         | 
| 141 138 | 
             
              version = gplusplus_version
         | 
| @@ -147,11 +144,11 @@ else | |
| 147 144 | 
             
              end
         | 
| 148 145 |  | 
| 149 146 | 
             
              if version < '4.7.0'
         | 
| 150 | 
            -
                $ | 
| 147 | 
            +
                $CXX_STANDARD = 'c++0x'
         | 
| 151 148 | 
             
              else
         | 
| 152 | 
            -
                $ | 
| 149 | 
            +
                $CXX_STANDARD = 'c++11'
         | 
| 153 150 | 
             
              end
         | 
| 154 | 
            -
              puts "using C++ standard... #{$ | 
| 151 | 
            +
              puts "using C++ standard... #{$CXX_STANDARD}"
         | 
| 155 152 | 
             
              puts "g++ reports version... " + `#{CONFIG['CXX']} --version|head -n 1|cut -f 3 -d " "`
         | 
| 156 153 | 
             
            end
         | 
| 157 154 |  | 
| @@ -160,13 +157,18 @@ end | |
| 160 157 | 
             
            # For release, these next two should both be changed to -O3.
         | 
| 161 158 | 
             
            $CFLAGS += " -O3 "
         | 
| 162 159 | 
             
            #$CFLAGS += " -static -O0 -g "
         | 
| 163 | 
            -
            $ | 
| 164 | 
            -
            #$ | 
| 160 | 
            +
            $CXXFLAGS += " -O3 -std=#{$CXX_STANDARD} " #-fmax-errors=10 -save-temps
         | 
| 161 | 
            +
            #$CXXFLAGS += " -static -O0 -g -std=#{$CXX_STANDARD} "
         | 
| 165 162 |  | 
| 166 163 | 
             
            CONFIG['warnflags'].gsub!('-Wshorten-64-to-32', '') # doesn't work except in Mac-patched gcc (4.2)
         | 
| 167 164 | 
             
            CONFIG['warnflags'].gsub!('-Wdeclaration-after-statement', '')
         | 
| 168 165 | 
             
            CONFIG['warnflags'].gsub!('-Wimplicit-function-declaration', '')
         | 
| 169 166 |  | 
| 167 | 
            +
            have_func("rb_array_const_ptr", "ruby.h")
         | 
| 168 | 
            +
            have_macro("FIX_CONST_VALUE_PTR", "ruby.h")
         | 
| 169 | 
            +
            have_macro("RARRAY_CONST_PTR", "ruby.h")
         | 
| 170 | 
            +
            have_macro("RARRAY_AREF", "ruby.h")
         | 
| 171 | 
            +
             | 
| 170 172 | 
             
            create_conf_h("nmatrix_config.h")
         | 
| 171 173 | 
             
            create_makefile("nmatrix")
         | 
| 172 174 |  | 
    
        data/ext/nmatrix/math.cpp
    CHANGED
    
    | @@ -126,6 +126,7 @@ | |
| 126 126 | 
             
             */
         | 
| 127 127 |  | 
| 128 128 |  | 
| 129 | 
            +
            #include <ruby.h>
         | 
| 129 130 | 
             
            #include <algorithm>
         | 
| 130 131 | 
             
            #include <limits>
         | 
| 131 132 | 
             
            #include <cmath>
         | 
| @@ -232,8 +233,8 @@ namespace nm { | |
| 232 233 | 
             
                template <typename DType>
         | 
| 233 234 | 
             
                void inverse(const int M, void* a_elements) {
         | 
| 234 235 | 
             
                  DType* matrix   = reinterpret_cast<DType*>(a_elements);
         | 
| 235 | 
            -
                  int | 
| 236 | 
            -
                  int | 
| 236 | 
            +
                  int row_index[M]; // arrays for keeping track of column scrambling
         | 
| 237 | 
            +
                  int col_index[M];
         | 
| 237 238 |  | 
| 238 239 | 
             
                  for (int k = 0;k < M; ++k) {
         | 
| 239 240 | 
             
                    DType akk = std::abs( matrix[k * (M + 1)] ) ; // diagonal element
         | 
| @@ -294,9 +295,6 @@ namespace nm { | |
| 294 295 | 
             
                      }
         | 
| 295 296 | 
             
                    }
         | 
| 296 297 | 
             
                  }
         | 
| 297 | 
            -
             | 
| 298 | 
            -
                  delete[] row_index;
         | 
| 299 | 
            -
                  delete[] col_index;
         | 
| 300 298 | 
             
                }
         | 
| 301 299 |  | 
| 302 300 | 
             
                /*
         | 
| @@ -599,8 +597,8 @@ static VALUE nm_cblas_rotg(VALUE self, VALUE ab) { | |
| 599 597 | 
             
                  rb_ary_store(result, 0, *reinterpret_cast<VALUE*>(pC));
         | 
| 600 598 | 
             
                  rb_ary_store(result, 1, *reinterpret_cast<VALUE*>(pS));
         | 
| 601 599 | 
             
                } else {
         | 
| 602 | 
            -
                  rb_ary_store(result, 0, rubyobj_from_cval(pC, dtype).rval);
         | 
| 603 | 
            -
                  rb_ary_store(result, 1, rubyobj_from_cval(pS, dtype).rval);
         | 
| 600 | 
            +
                  rb_ary_store(result, 0, nm::rubyobj_from_cval(pC, dtype).rval);
         | 
| 601 | 
            +
                  rb_ary_store(result, 1, nm::rubyobj_from_cval(pS, dtype).rval);
         | 
| 604 602 | 
             
                }
         | 
| 605 603 | 
             
                NM_CONSERVATIVE(nm_unregister_value(&ab));
         | 
| 606 604 | 
             
                NM_CONSERVATIVE(nm_unregister_value(&self));
         | 
| @@ -724,7 +722,7 @@ static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx) { | |
| 724 722 |  | 
| 725 723 | 
             
                ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
         | 
| 726 724 |  | 
| 727 | 
            -
                return rubyobj_from_cval(Result, rdtype).rval;
         | 
| 725 | 
            +
                return nm::rubyobj_from_cval(Result, rdtype).rval;
         | 
| 728 726 | 
             
              }
         | 
| 729 727 | 
             
            }
         | 
| 730 728 |  | 
| @@ -773,7 +771,7 @@ static VALUE nm_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx) { | |
| 773 771 |  | 
| 774 772 | 
             
              ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
         | 
| 775 773 |  | 
| 776 | 
            -
              return rubyobj_from_cval(Result, rdtype).rval;
         | 
| 774 | 
            +
              return nm::rubyobj_from_cval(Result, rdtype).rval;
         | 
| 777 775 | 
             
            }
         | 
| 778 776 |  | 
| 779 777 | 
             
            /*
         | 
| @@ -1005,7 +1003,7 @@ static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VAL | |
| 1005 1003 | 
             
              } else {
         | 
| 1006 1004 | 
             
                ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
         | 
| 1007 1005 | 
             
                for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
         | 
| 1008 | 
            -
                  ipiv_[index] = FIX2INT(  | 
| 1006 | 
            +
                  ipiv_[index] = FIX2INT( RARRAY_AREF(ipiv, index) );
         | 
| 1009 1007 | 
             
                }
         | 
| 1010 1008 | 
             
              }
         | 
| 1011 1009 |  | 
| @@ -1057,7 +1055,7 @@ static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, | |
| 1057 1055 | 
             
              } else {
         | 
| 1058 1056 | 
             
                ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
         | 
| 1059 1057 | 
             
                for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
         | 
| 1060 | 
            -
                  ipiv_[index] = FIX2INT(  | 
| 1058 | 
            +
                  ipiv_[index] = FIX2INT( RARRAY_AREF(ipiv, index) );
         | 
| 1061 1059 | 
             
                }
         | 
| 1062 1060 | 
             
              }
         | 
| 1063 1061 |  | 
    
        data/ext/nmatrix/math/math.h
    CHANGED
    
    | @@ -72,6 +72,7 @@ | |
| 72 72 |  | 
| 73 73 | 
             
            #include <algorithm> // std::min, std::max
         | 
| 74 74 | 
             
            #include <limits> // std::numeric_limits
         | 
| 75 | 
            +
            #include <memory> // std::unique_ptr
         | 
| 75 76 |  | 
| 76 77 | 
             
            /*
         | 
| 77 78 | 
             
             * Project Includes
         | 
| @@ -123,8 +124,8 @@ template <typename DType> | |
| 123 124 | 
             
            inline void numbmm(const unsigned int n, const unsigned int m, const unsigned int l, const IType* ia, const IType* ja, const DType* a, const bool diaga,
         | 
| 124 125 | 
             
                        const IType* ib, const IType* jb, const DType* b, const bool diagb, IType* ic, IType* jc, DType* c, const bool diagc) {
         | 
| 125 126 | 
             
              const unsigned int max_lmn = std::max(std::max(m, n), l);
         | 
| 126 | 
            -
              IType next[max_lmn];
         | 
| 127 | 
            -
              DType sums[max_lmn];
         | 
| 127 | 
            +
              std::unique_ptr<IType[]> next(new IType[max_lmn]);
         | 
| 128 | 
            +
              std::unique_ptr<DType[]> sums(new DType[max_lmn]);
         | 
| 128 129 |  | 
| 129 130 | 
             
              DType v;
         | 
| 130 131 |  | 
    
        data/ext/nmatrix/math/trsm.h
    CHANGED
    
    | @@ -88,183 +88,183 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo, | |
| 88 88 | 
             
                    b[i + j * ldb] = 0;
         | 
| 89 89 | 
             
                  }
         | 
| 90 90 | 
             
                }
         | 
| 91 | 
            -
             | 
| 91 | 
            +
                return;
         | 
| 92 92 | 
             
              }
         | 
| 93 93 |  | 
| 94 94 | 
             
              if (side == CblasLeft) {
         | 
| 95 | 
            -
             | 
| 95 | 
            +
                if (trans_a == CblasNoTrans) {
         | 
| 96 96 |  | 
| 97 97 | 
             
                  /* Form  B := alpha*inv( A )*B. */
         | 
| 98 | 
            -
             | 
| 99 | 
            -
             | 
| 100 | 
            -
             | 
| 101 | 
            -
             | 
| 102 | 
            -
             | 
| 103 | 
            -
             | 
| 104 | 
            -
             | 
| 105 | 
            -
             | 
| 106 | 
            -
             | 
| 107 | 
            -
             | 
| 108 | 
            -
             | 
| 109 | 
            -
             | 
| 98 | 
            +
                  if (uplo == CblasUpper) {
         | 
| 99 | 
            +
                    for (int j = 0; j < n; ++j) {
         | 
| 100 | 
            +
                      if (alpha != 1) {
         | 
| 101 | 
            +
                        for (int i = 0; i < m; ++i) {
         | 
| 102 | 
            +
                          b[i + j * ldb] = alpha * b[i + j * ldb];
         | 
| 103 | 
            +
                        }
         | 
| 104 | 
            +
                      }
         | 
| 105 | 
            +
                      for (int k = m-1; k >= 0; --k) {
         | 
| 106 | 
            +
                        if (b[k + j * ldb] != 0) {
         | 
| 107 | 
            +
                          if (diag == CblasNonUnit) {
         | 
| 108 | 
            +
                            b[k + j * ldb] /= a[k + k * lda];
         | 
| 109 | 
            +
                          }
         | 
| 110 110 |  | 
| 111 111 | 
             
                          for (int i = 0; i < k-1; ++i) {
         | 
| 112 112 | 
             
                            b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
         | 
| 113 113 | 
             
                          }
         | 
| 114 | 
            -
             | 
| 115 | 
            -
             | 
| 116 | 
            -
             | 
| 117 | 
            -
             | 
| 118 | 
            -
             | 
| 119 | 
            -
             | 
| 114 | 
            +
                        }
         | 
| 115 | 
            +
                      }
         | 
| 116 | 
            +
                    }
         | 
| 117 | 
            +
                  } else {
         | 
| 118 | 
            +
                    for (int j = 0; j < n; ++j) {
         | 
| 119 | 
            +
                      if (alpha != 1) {
         | 
| 120 120 | 
             
                        for (int i = 0; i < m; ++i) {
         | 
| 121 121 | 
             
                          b[i + j * ldb] = alpha * b[i + j * ldb];
         | 
| 122 | 
            -
             | 
| 123 | 
            -
             | 
| 124 | 
            -
             | 
| 125 | 
            -
             | 
| 126 | 
            -
             | 
| 127 | 
            -
             | 
| 128 | 
            -
             | 
| 129 | 
            -
             | 
| 130 | 
            -
             | 
| 131 | 
            -
             | 
| 132 | 
            -
             | 
| 133 | 
            -
             | 
| 134 | 
            -
             | 
| 135 | 
            -
             | 
| 136 | 
            -
             | 
| 122 | 
            +
                        }
         | 
| 123 | 
            +
                      }
         | 
| 124 | 
            +
                      for (int k = 0; k < m; ++k) {
         | 
| 125 | 
            +
                        if (b[k + j * ldb] != 0.) {
         | 
| 126 | 
            +
                          if (diag == CblasNonUnit) {
         | 
| 127 | 
            +
                            b[k + j * ldb] /= a[k + k * lda];
         | 
| 128 | 
            +
                          }
         | 
| 129 | 
            +
                          for (int i = k+1; i < m; ++i) {
         | 
| 130 | 
            +
                            b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
         | 
| 131 | 
            +
                          }
         | 
| 132 | 
            +
                        }
         | 
| 133 | 
            +
                      }
         | 
| 134 | 
            +
                    }
         | 
| 135 | 
            +
                  }
         | 
| 136 | 
            +
                } else { // CblasTrans
         | 
| 137 137 |  | 
| 138 138 | 
             
                  /*           Form  B := alpha*inv( A**T )*B. */
         | 
| 139 | 
            -
             | 
| 140 | 
            -
             | 
| 141 | 
            -
             | 
| 142 | 
            -
             | 
| 139 | 
            +
                  if (uplo == CblasUpper) {
         | 
| 140 | 
            +
                    for (int j = 0; j < n; ++j) {
         | 
| 141 | 
            +
                      for (int i = 0; i < m; ++i) {
         | 
| 142 | 
            +
                        DType temp = alpha * b[i + j * ldb];
         | 
| 143 143 | 
             
                        for (int k = 0; k < i; ++k) { // limit was i-1. Lots of similar bugs in this code, probably.
         | 
| 144 144 | 
             
                          temp -= a[k + i * lda] * b[k + j * ldb];
         | 
| 145 | 
            -
             | 
| 146 | 
            -
             | 
| 147 | 
            -
             | 
| 148 | 
            -
             | 
| 149 | 
            -
             | 
| 150 | 
            -
             | 
| 151 | 
            -
             | 
| 152 | 
            -
             | 
| 153 | 
            -
             | 
| 154 | 
            -
             | 
| 155 | 
            -
             | 
| 156 | 
            -
             | 
| 157 | 
            -
             | 
| 158 | 
            -
             | 
| 159 | 
            -
             | 
| 160 | 
            -
             | 
| 161 | 
            -
             | 
| 162 | 
            -
             | 
| 163 | 
            -
             | 
| 164 | 
            -
             | 
| 165 | 
            -
             | 
| 166 | 
            -
             | 
| 145 | 
            +
                        }
         | 
| 146 | 
            +
                        if (diag == CblasNonUnit) {
         | 
| 147 | 
            +
                          temp /= a[i + i * lda];
         | 
| 148 | 
            +
                        }
         | 
| 149 | 
            +
                        b[i + j * ldb] = temp;
         | 
| 150 | 
            +
                      }
         | 
| 151 | 
            +
                    }
         | 
| 152 | 
            +
                  } else {
         | 
| 153 | 
            +
                    for (int j = 0; j < n; ++j) {
         | 
| 154 | 
            +
                      for (int i = m-1; i >= 0; --i) {
         | 
| 155 | 
            +
                        DType temp= alpha * b[i + j * ldb];
         | 
| 156 | 
            +
                        for (int k = i+1; k < m; ++k) {
         | 
| 157 | 
            +
                          temp -= a[k + i * lda] * b[k + j * ldb];
         | 
| 158 | 
            +
                        }
         | 
| 159 | 
            +
                        if (diag == CblasNonUnit) {
         | 
| 160 | 
            +
                          temp /= a[i + i * lda];
         | 
| 161 | 
            +
                        }
         | 
| 162 | 
            +
                        b[i + j * ldb] = temp;
         | 
| 163 | 
            +
                      }
         | 
| 164 | 
            +
                    }
         | 
| 165 | 
            +
                  }
         | 
| 166 | 
            +
                }
         | 
| 167 167 | 
             
              } else { // right side
         | 
| 168 168 |  | 
| 169 | 
            -
             | 
| 169 | 
            +
                if (trans_a == CblasNoTrans) {
         | 
| 170 170 |  | 
| 171 171 | 
             
                  /*           Form  B := alpha*B*inv( A ). */
         | 
| 172 172 |  | 
| 173 | 
            -
             | 
| 174 | 
            -
             | 
| 175 | 
            -
             | 
| 176 | 
            -
             | 
| 177 | 
            -
             | 
| 178 | 
            -
             | 
| 179 | 
            -
             | 
| 180 | 
            -
             | 
| 181 | 
            -
             | 
| 182 | 
            -
             | 
| 183 | 
            -
             | 
| 184 | 
            -
             | 
| 185 | 
            -
             | 
| 186 | 
            -
             | 
| 187 | 
            -
             | 
| 188 | 
            -
             | 
| 189 | 
            -
             | 
| 190 | 
            -
             | 
| 191 | 
            -
             | 
| 192 | 
            -
             | 
| 193 | 
            -
             | 
| 194 | 
            -
             | 
| 195 | 
            -
             | 
| 196 | 
            -
             | 
| 197 | 
            -
             | 
| 198 | 
            -
             | 
| 199 | 
            -
             | 
| 200 | 
            -
             | 
| 173 | 
            +
                  if (uplo == CblasUpper) {
         | 
| 174 | 
            +
                    for (int j = 0; j < n; ++j) {
         | 
| 175 | 
            +
                      if (alpha != 1) {
         | 
| 176 | 
            +
                        for (int i = 0; i < m; ++i) {
         | 
| 177 | 
            +
                          b[i + j * ldb] = alpha * b[i + j * ldb];
         | 
| 178 | 
            +
                        }
         | 
| 179 | 
            +
                      }
         | 
| 180 | 
            +
                      for (int k = 0; k < j-1; ++k) {
         | 
| 181 | 
            +
                        if (a[k + j * lda] != 0) {
         | 
| 182 | 
            +
                          for (int i = 0; i < m; ++i) {
         | 
| 183 | 
            +
                            b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
         | 
| 184 | 
            +
                          }
         | 
| 185 | 
            +
                        }
         | 
| 186 | 
            +
                      }
         | 
| 187 | 
            +
                      if (diag == CblasNonUnit) {
         | 
| 188 | 
            +
                        DType temp = 1 / a[j + j * lda];
         | 
| 189 | 
            +
                        for (int i = 0; i < m; ++i) {
         | 
| 190 | 
            +
                          b[i + j * ldb] = temp * b[i + j * ldb];
         | 
| 191 | 
            +
                        }
         | 
| 192 | 
            +
                      }
         | 
| 193 | 
            +
                    }
         | 
| 194 | 
            +
                  } else {
         | 
| 195 | 
            +
                    for (int j = n-1; j >= 0; --j) {
         | 
| 196 | 
            +
                      if (alpha != 1) {
         | 
| 197 | 
            +
                        for (int i = 0; i < m; ++i) {
         | 
| 198 | 
            +
                          b[i + j * ldb] = alpha * b[i + j * ldb];
         | 
| 199 | 
            +
                        }
         | 
| 200 | 
            +
                      }
         | 
| 201 201 |  | 
| 202 | 
            -
             | 
| 203 | 
            -
             | 
| 204 | 
            -
             | 
| 205 | 
            -
             | 
| 206 | 
            -
             | 
| 207 | 
            -
             | 
| 208 | 
            -
             | 
| 209 | 
            -
             | 
| 210 | 
            -
             | 
| 202 | 
            +
                      for (int k = j+1; k < n; ++k) {
         | 
| 203 | 
            +
                        if (a[k + j * lda] != 0.) {
         | 
| 204 | 
            +
                          for (int i = 0; i < m; ++i) {
         | 
| 205 | 
            +
                            b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
         | 
| 206 | 
            +
                          }
         | 
| 207 | 
            +
                        }
         | 
| 208 | 
            +
                      }
         | 
| 209 | 
            +
                      if (diag == CblasNonUnit) {
         | 
| 210 | 
            +
                        DType temp = 1 / a[j + j * lda];
         | 
| 211 211 |  | 
| 212 | 
            -
             | 
| 213 | 
            -
             | 
| 214 | 
            -
             | 
| 215 | 
            -
             | 
| 216 | 
            -
             | 
| 217 | 
            -
             | 
| 218 | 
            -
             | 
| 212 | 
            +
                        for (int i = 0; i < m; ++i) {
         | 
| 213 | 
            +
                          b[i + j * ldb] = temp * b[i + j * ldb];
         | 
| 214 | 
            +
                        }
         | 
| 215 | 
            +
                      }
         | 
| 216 | 
            +
                    }
         | 
| 217 | 
            +
                  }
         | 
| 218 | 
            +
                } else { // CblasTrans
         | 
| 219 219 |  | 
| 220 220 | 
             
                  /*           Form  B := alpha*B*inv( A**T ). */
         | 
| 221 221 |  | 
| 222 | 
            -
             | 
| 223 | 
            -
             | 
| 224 | 
            -
             | 
| 225 | 
            -
             | 
| 226 | 
            -
             | 
| 227 | 
            -
             | 
| 228 | 
            -
             | 
| 229 | 
            -
             | 
| 230 | 
            -
             | 
| 231 | 
            -
             | 
| 232 | 
            -
             | 
| 233 | 
            -
             | 
| 234 | 
            -
             | 
| 235 | 
            -
             | 
| 236 | 
            -
             | 
| 237 | 
            -
             | 
| 238 | 
            -
             | 
| 239 | 
            -
             | 
| 240 | 
            -
             | 
| 241 | 
            -
             | 
| 242 | 
            -
             | 
| 243 | 
            -
             | 
| 244 | 
            -
             | 
| 245 | 
            -
             | 
| 246 | 
            -
             | 
| 247 | 
            -
             | 
| 248 | 
            -
             | 
| 249 | 
            -
             | 
| 250 | 
            -
             | 
| 251 | 
            -
             | 
| 252 | 
            -
             | 
| 253 | 
            -
             | 
| 254 | 
            -
             | 
| 255 | 
            -
             | 
| 256 | 
            -
             | 
| 257 | 
            -
             | 
| 258 | 
            -
             | 
| 259 | 
            -
             | 
| 260 | 
            -
             | 
| 261 | 
            -
             | 
| 262 | 
            -
             | 
| 263 | 
            -
             | 
| 264 | 
            -
             | 
| 265 | 
            -
             | 
| 266 | 
            -
             | 
| 267 | 
            -
             | 
| 222 | 
            +
                  if (uplo == CblasUpper) {
         | 
| 223 | 
            +
                    for (int k = n-1; k >= 0; --k) {
         | 
| 224 | 
            +
                      if (diag == CblasNonUnit) {
         | 
| 225 | 
            +
                        DType temp= 1 / a[k + k * lda];
         | 
| 226 | 
            +
                        for (int i = 0; i < m; ++i) {
         | 
| 227 | 
            +
                          b[i + k * ldb] = temp * b[i + k * ldb];
         | 
| 228 | 
            +
                        }
         | 
| 229 | 
            +
                      }
         | 
| 230 | 
            +
                      for (int j = 0; j < k-1; ++j) {
         | 
| 231 | 
            +
                        if (a[j + k * lda] != 0.) {
         | 
| 232 | 
            +
                          DType temp= a[j + k * lda];
         | 
| 233 | 
            +
                          for (int i = 0; i < m; ++i) {
         | 
| 234 | 
            +
                            b[i + j * ldb] -= temp * b[i + k *  ldb];
         | 
| 235 | 
            +
                          }
         | 
| 236 | 
            +
                        }
         | 
| 237 | 
            +
                      }
         | 
| 238 | 
            +
                      if (alpha != 1) {
         | 
| 239 | 
            +
                        for (int i = 0; i < m; ++i) {
         | 
| 240 | 
            +
                          b[i + k * ldb] = alpha * b[i + k * ldb];
         | 
| 241 | 
            +
                        }
         | 
| 242 | 
            +
                      }
         | 
| 243 | 
            +
                    }
         | 
| 244 | 
            +
                  } else {
         | 
| 245 | 
            +
                    for (int k = 0; k < n; ++k) {
         | 
| 246 | 
            +
                      if (diag == CblasNonUnit) {
         | 
| 247 | 
            +
                        DType temp = 1 / a[k + k * lda];
         | 
| 248 | 
            +
                        for (int i = 0; i < m; ++i) {
         | 
| 249 | 
            +
                          b[i + k * ldb] = temp * b[i + k * ldb];
         | 
| 250 | 
            +
                        }
         | 
| 251 | 
            +
                      }
         | 
| 252 | 
            +
                      for (int j = k+1; j < n; ++j) {
         | 
| 253 | 
            +
                        if (a[j + k * lda] != 0.) {
         | 
| 254 | 
            +
                          DType temp = a[j + k * lda];
         | 
| 255 | 
            +
                          for (int i = 0; i < m; ++i) {
         | 
| 256 | 
            +
                            b[i + j * ldb] -= temp * b[i + k * ldb];
         | 
| 257 | 
            +
                          }
         | 
| 258 | 
            +
                        }
         | 
| 259 | 
            +
                      }
         | 
| 260 | 
            +
                      if (alpha != 1) {
         | 
| 261 | 
            +
                        for (int i = 0; i < m; ++i) {
         | 
| 262 | 
            +
                          b[i + k * ldb] = alpha * b[i + k * ldb];
         | 
| 263 | 
            +
                        }
         | 
| 264 | 
            +
                      }
         | 
| 265 | 
            +
                    }
         | 
| 266 | 
            +
                  }
         | 
| 267 | 
            +
                }
         | 
| 268 268 | 
             
              }
         | 
| 269 269 | 
             
            }
         | 
| 270 270 |  |