RubyGems - nmatrix-atlas - Versions diffs - 0.2.1 → 0.2.3 - Mend

nmatrix-atlas 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

checksums.yaml +4 -4
data/ext/nmatrix/data/data.h +7 -8
data/ext/nmatrix/data/ruby_object.h +1 -4
data/ext/nmatrix/math/asum.h +10 -31
data/ext/nmatrix/math/cblas_templates_core.h +10 -10
data/ext/nmatrix/math/getrf.h +2 -2
data/ext/nmatrix/math/imax.h +12 -9
data/ext/nmatrix/math/laswp.h +3 -3
data/ext/nmatrix/math/long_dtype.h +16 -3
data/ext/nmatrix/math/magnitude.h +54 -0
data/ext/nmatrix/math/nrm2.h +19 -14
data/ext/nmatrix/math/trsm.h +40 -36
data/ext/nmatrix/math/util.h +14 -0
data/ext/nmatrix/nmatrix.h +39 -1
data/ext/nmatrix/storage/common.h +9 -3
data/ext/nmatrix/storage/yale/class.h +1 -1
data/ext/nmatrix_atlas/extconf.rb +3 -131
data/ext/nmatrix_atlas/math_atlas.cpp +15 -15
data/lib/nmatrix/atlas.rb +59 -28
data/spec/00_nmatrix_spec.rb +50 -1
data/spec/02_slice_spec.rb +21 -21
data/spec/blas_spec.rb +25 -3
data/spec/math_spec.rb +233 -5
data/spec/shortcuts_spec.rb +145 -5
data/spec/spec_helper.rb +24 -1
metadata +18 -8

data/ext/nmatrix/math/trsm.h CHANGED

@@ -81,10 +81,14 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
   // (row-major) trsm: left upper trans nonunit m=3 n=1 1/1 a 3 b 3
   if (m == 0 || n == 0) return; /* Quick return if possible. */
+  // Apply necessary offset
+  a -= 1 + lda;
+  b -= 1 + ldb;
   if (alpha == 0) { // Handle alpha == 0
-    for (int j = 0; j < n; ++j) {
-      for (int i = 0; i < m; ++i) {
+    for (int j = 1; j <= n; ++j) {
+      for (int i = 1; i <= m; ++i) {
         b[i + j * ldb] = 0;
       }
     }
@@ -96,37 +100,37 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
       /* Form  B := alpha*inv( A )*B. */
       if (uplo == CblasUpper) {
-        for (int j = 0; j < n; ++j) {
+        for (int j = 1; j <= n; ++j) {
           if (alpha != 1) {
-            for (int i = 0; i < m; ++i) {
+            for (int i = 1; i <= m; ++i) {
               b[i + j * ldb] = alpha * b[i + j * ldb];
             }
           }
-          for (int k = m-1; k >= 0; --k) {
+          for (int k = m; k >= 1; --k) {
             if (b[k + j * ldb] != 0) {
               if (diag == CblasNonUnit) {
                 b[k + j * ldb] /= a[k + k * lda];
               }
-              for (int i = 0; i < k-1; ++i) {
+              for (int i = 1; i <= k-1; ++i) {
                 b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
               }
             }
           }
         }
       } else {
-        for (int j = 0; j < n; ++j) {
+        for (int j = 1; j <= n; ++j) {
           if (alpha != 1) {
-            for (int i = 0; i < m; ++i) {
+            for (int i = 1; i <= m; ++i) {
               b[i + j * ldb] = alpha * b[i + j * ldb];
             }
           }
-          for (int k = 0; k < m; ++k) {
+          for (int k = 1; k <= m; ++k) {
             if (b[k + j * ldb] != 0.) {
               if (diag == CblasNonUnit) {
                 b[k + j * ldb] /= a[k + k * lda];
               }
-              for (int i = k+1; i < m; ++i) {
+              for (int i = k+1; i <= m; ++i) {
                 b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
               }
             }
@@ -137,10 +141,10 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
       /*           Form  B := alpha*inv( A**T )*B. */
       if (uplo == CblasUpper) {
-        for (int j = 0; j < n; ++j) {
-          for (int i = 0; i < m; ++i) {
+        for (int j = 1; j <= n; ++j) {
+          for (int i = 1; i <= m; ++i) {
             DType temp = alpha * b[i + j * ldb];
-            for (int k = 0; k < i; ++k) { // limit was i-1. Lots of similar bugs in this code, probably.
+            for (int k = 1; k <= i-1; ++k) { // limit was i-1. Lots of similar bugs in this code, probably.
               temp -= a[k + i * lda] * b[k + j * ldb];
             }
             if (diag == CblasNonUnit) {
@@ -150,10 +154,10 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
           }
         }
       } else {
-        for (int j = 0; j < n; ++j) {
-          for (int i = m-1; i >= 0; --i) {
+        for (int j = 1; j <= n; ++j) {
+          for (int i = m; i >= 1; --i) {
             DType temp= alpha * b[i + j * ldb];
-            for (int k = i+1; k < m; ++k) {
+            for (int k = i+1; k <= m; ++k) {
               temp -= a[k + i * lda] * b[k + j * ldb];
             }
             if (diag == CblasNonUnit) {
@@ -171,37 +175,37 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
       /*           Form  B := alpha*B*inv( A ). */
       if (uplo == CblasUpper) {
-        for (int j = 0; j < n; ++j) {
+        for (int j = 1; j <= n; ++j) {
           if (alpha != 1) {
-            for (int i = 0; i < m; ++i) {
+            for (int i = 1; i <= m; ++i) {
               b[i + j * ldb] = alpha * b[i + j * ldb];
             }
           }
-          for (int k = 0; k < j-1; ++k) {
+          for (int k = 1; k <= j-1; ++k) {
             if (a[k + j * lda] != 0) {
-              for (int i = 0; i < m; ++i) {
+              for (int i = 1; i <= m; ++i) {
                 b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
               }
             }
           }
           if (diag == CblasNonUnit) {
             DType temp = 1 / a[j + j * lda];
-            for (int i = 0; i < m; ++i) {
+            for (int i = 1; i <= m; ++i) {
               b[i + j * ldb] = temp * b[i + j * ldb];
             }
           }
         }
       } else {
-        for (int j = n-1; j >= 0; --j) {
+        for (int j = n; j >= 1; --j) {
           if (alpha != 1) {
-            for (int i = 0; i < m; ++i) {
+            for (int i = 1; i <= m; ++i) {
               b[i + j * ldb] = alpha * b[i + j * ldb];
             }
           }
-          for (int k = j+1; k < n; ++k) {
+          for (int k = j+1; k <= n; ++k) {
             if (a[k + j * lda] != 0.) {
-              for (int i = 0; i < m; ++i) {
+              for (int i = 1; i <= m; ++i) {
                 b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
               }
             }
@@ -209,7 +213,7 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
           if (diag == CblasNonUnit) {
             DType temp = 1 / a[j + j * lda];
-            for (int i = 0; i < m; ++i) {
+            for (int i = 1; i <= m; ++i) {
               b[i + j * ldb] = temp * b[i + j * ldb];
             }
           }
@@ -220,45 +224,45 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
       /*           Form  B := alpha*B*inv( A**T ). */
       if (uplo == CblasUpper) {
-        for (int k = n-1; k >= 0; --k) {
+        for (int k = n; k >= 1; --k) {
           if (diag == CblasNonUnit) {
             DType temp= 1 / a[k + k * lda];
-            for (int i = 0; i < m; ++i) {
+            for (int i = 1; i <= m; ++i) {
               b[i + k * ldb] = temp * b[i + k * ldb];
             }
           }
-          for (int j = 0; j < k-1; ++j) {
+          for (int j = 1; j <= k-1; ++j) {
             if (a[j + k * lda] != 0.) {
               DType temp= a[j + k * lda];
-              for (int i = 0; i < m; ++i) {
+              for (int i = 1; i <= m; ++i) {
                 b[i + j * ldb] -= temp * b[i + k *  ldb];
               }
             }
           }
           if (alpha != 1) {
-            for (int i = 0; i < m; ++i) {
+            for (int i = 1; i <= m; ++i) {
               b[i + k * ldb] = alpha * b[i + k * ldb];
             }
           }
         }
       } else {
-        for (int k = 0; k < n; ++k) {
+        for (int k = 1; k <= n; ++k) {
           if (diag == CblasNonUnit) {
             DType temp = 1 / a[k + k * lda];
-            for (int i = 0; i < m; ++i) {
+            for (int i = 1; i <= m; ++i) {
               b[i + k * ldb] = temp * b[i + k * ldb];
             }
           }
-          for (int j = k+1; j < n; ++j) {
+          for (int j = k+1; j <= n; ++j) {
             if (a[j + k * lda] != 0.) {
               DType temp = a[j + k * lda];
-              for (int i = 0; i < m; ++i) {
+              for (int i = 1; i <= m; ++i) {
                 b[i + j * ldb] -= temp * b[i + k * ldb];
               }
             }
           }
           if (alpha != 1) {
-            for (int i = 0; i < m; ++i) {
+            for (int i = 1; i <= m; ++i) {
               b[i + k * ldb] = alpha * b[i + k * ldb];
             }
           }

data/ext/nmatrix/math/util.h CHANGED

@@ -70,6 +70,20 @@ static inline enum CBLAS_SIDE blas_side_sym(VALUE op) {
   return CblasLeft;
 }
+/*
+ * Interprets the LAPACK side argument which could be :left or :right
+ *
+ * Related to obtaining Q in QR factorization after calling lapack_geqrf
+ */
+static inline char lapacke_side_sym(VALUE op) {
+  ID op_id = rb_to_id(op);
+  if (op_id == nm_rb_left)  return 'L';
+  if (op_id == nm_rb_right) return 'R';
+  else rb_raise(rb_eArgError, "Expected :left or :right for side argument");
+  return 'L';
+}
 /*
  * Interprets cblas argument which could be :upper or :lower
  *

data/ext/nmatrix/nmatrix.h CHANGED

@@ -33,6 +33,7 @@
  */
 #include <ruby.h>
+#include "ruby_constants.h"
 #ifdef __cplusplus
   #include <cmath>
@@ -57,6 +58,28 @@
   #include "nm_memory.h"
 #endif
+#ifndef RB_BUILTIN_TYPE
+# define RB_BUILTIN_TYPE(obj) BUILTIN_TYPE(obj)
+#endif
+#ifndef RB_FLOAT_TYPE_P
+/* NOTE: assume flonum doesn't exist */
+# define RB_FLOAT_TYPE_P(obj) ( \
+    (!SPECIAL_CONST_P(obj) && BUILTIN_TYPE(obj) == T_FLOAT))
+#endif
+#ifndef RB_TYPE_P
+# define RB_TYPE_P(obj, type) ( \
+    ((type) == T_FIXNUM) ? FIXNUM_P(obj) : \
+    ((type) == T_TRUE) ? ((obj) == Qtrue) : \
+    ((type) == T_FALSE) ? ((obj) == Qfalse) : \
+    ((type) == T_NIL) ? ((obj) == Qnil) : \
+    ((type) == T_UNDEF) ? ((obj) == Qundef) : \
+    ((type) == T_SYMBOL) ? SYMBOL_P(obj) : \
+    ((type) == T_FLOAT) ? RB_FLOAT_TYPE_P(obj) : \
+    (!SPECIAL_CONST_P(obj) && BUILTIN_TYPE(obj) == (type)))
+#endif
 #ifndef FIX_CONST_VALUE_PTR
 # if defined(__fcc__) || defined(__fcc_version) || \
     defined(__FCC__) || defined(__FCC_VERSION)
@@ -343,11 +366,25 @@ NM_DEF_STRUCT_POST(NM_GC_HOLDER);       // };
 #define NM_SRC(val)             (NM_STORAGE(val)->src)
 #define NM_DIM(val)             (NM_STORAGE(val)->dim)
+// Returns an int corresponding the data type of the nmatrix. See the dtype_t
+// enum for a list of possible data types.
 #define NM_DTYPE(val)           (NM_STORAGE(val)->dtype)
+// Returns a number corresponding the storage type of the nmatrix. See the stype_t
+// enum for a list of possible storage types.
 #define NM_STYPE(val)           (NM_STRUCT(val)->stype)
+// Get the shape of the ith dimension (int)
 #define NM_SHAPE(val,i)         (NM_STORAGE(val)->shape[(i)])
+// Get the shape of the 0th dimension (int)
 #define NM_SHAPE0(val)          (NM_STORAGE(val)->shape[0])
+// Get the shape of the 1st dimenension (int)
 #define NM_SHAPE1(val)          (NM_STORAGE(val)->shape[1])
+// Get the default value assigned to the nmatrix.
 #define NM_DEFAULT_VAL(val)     (NM_STORAGE_LIST(val)->default_val)
 // Number of elements in a dense nmatrix.
@@ -366,7 +403,8 @@ NM_DEF_STRUCT_POST(NM_GC_HOLDER);       // };
 #define RB_FILE_EXISTS(fn)   (rb_funcall(rb_const_get(rb_cObject, rb_intern("File")), rb_intern("exists?"), 1, (fn)) == Qtrue)
-#define CheckNMatrixType(v)   if (TYPE(v) != T_DATA || (RDATA(v)->dfree != (RUBY_DATA_FUNC)nm_delete && RDATA(v)->dfree != (RUBY_DATA_FUNC)nm_delete_ref)) rb_raise(rb_eTypeError, "expected NMatrix on left-hand side of operation");
+#define IsNMatrixType(v)  (RB_TYPE_P(v, T_DATA) && (RDATA(v)->dfree == (RUBY_DATA_FUNC)nm_delete || RDATA(v)->dfree == (RUBY_DATA_FUNC)nm_delete_ref))
+#define CheckNMatrixType(v)   if (!IsNMatrixType(v)) rb_raise(rb_eTypeError, "expected NMatrix on left-hand side of operation");
 #define NM_IsNMatrix(obj) \
   (rb_obj_is_kind_of(obj, cNMatrix) == Qtrue)

data/ext/nmatrix/storage/common.h CHANGED

@@ -34,6 +34,7 @@
 #include <ruby.h>
 #include <cmath> // pow().
+#include <type_traits>
 /*
  * Project Includes
@@ -45,6 +46,11 @@
  * Macros
  */
+#define u_int8_t static_assert(false, "Please use uint8_t for cross-platform support and consistency."); uint8_t
+#define u_int16_t static_assert(false, "Please use uint16_t for cross-platform support and consistency."); uint16_t
+#define u_int32_t static_assert(false, "Please use uint32_t for cross-platform support and consistency."); uint32_t
+#define u_int64_t static_assert(false, "Please use uint64_t for cross-platform support and consistency."); uint64_t
 extern "C" {
 /*
@@ -152,7 +158,7 @@ namespace nm {
   EWOP_INT_INT_DIV(int16_t, int32_t)
   EWOP_INT_INT_DIV(int16_t, int64_t)
   EWOP_INT_INT_DIV(int8_t, int8_t)
-  EWOP_INT_UINT_DIV(int8_t, u_int8_t)
+  EWOP_INT_UINT_DIV(int8_t, uint8_t)
   EWOP_INT_INT_DIV(int8_t, int16_t)
   EWOP_INT_INT_DIV(int8_t, int32_t)
   EWOP_INT_INT_DIV(int8_t, int64_t)
@@ -162,12 +168,12 @@ namespace nm {
   EWOP_UINT_INT_DIV(uint8_t, int32_t)
   EWOP_UINT_INT_DIV(uint8_t, int64_t)
   EWOP_FLOAT_INT_DIV(float, int8_t)
-  EWOP_FLOAT_INT_DIV(float, u_int8_t)
+  EWOP_FLOAT_INT_DIV(float, uint8_t)
   EWOP_FLOAT_INT_DIV(float, int16_t)
   EWOP_FLOAT_INT_DIV(float, int32_t)
   EWOP_FLOAT_INT_DIV(float, int64_t)
   EWOP_FLOAT_INT_DIV(double, int8_t)
-  EWOP_FLOAT_INT_DIV(double, u_int8_t)
+  EWOP_FLOAT_INT_DIV(double, uint8_t)
   EWOP_FLOAT_INT_DIV(double, int16_t)
   EWOP_FLOAT_INT_DIV(double, int32_t)
   EWOP_FLOAT_INT_DIV(double, int64_t)

data/ext/nmatrix/storage/yale/class.h CHANGED

@@ -376,7 +376,7 @@ public:
       v       = reinterpret_cast<D*>(s->elements);
       v_size  = nm_storage_count_max_elements(s);
-    } else if (TYPE(right) == T_ARRAY) {
+    } else if (RB_TYPE_P(right, T_ARRAY)) {
       v_size = RARRAY_LEN(right);
       v      = NM_ALLOC_N(D, v_size);
       if (dtype() == nm::RUBYOBJ) {

data/ext/nmatrix_atlas/extconf.rb CHANGED

@@ -25,74 +25,11 @@
 # This file checks for ATLAS and other necessary headers, and
 # generates a Makefile for compiling NMatrix.
-require "mkmf"
-# Function derived from NArray's extconf.rb.
-def have_type(type, header=nil) #:nodoc:
-  printf "checking for %s... ", type
-  STDOUT.flush
-  src = <<"SRC"
-#include <ruby.h>
-SRC
-  src << <<"SRC" unless header.nil?
-#include <#{header}>
-SRC
-  r = try_link(src + <<"SRC")
-  int main() { return 0; }
-  int t() { #{type} a; return 0; }
-SRC
-  unless r
-    print "no\n"
-    return false
-  end
-  $defs.push(format("-DHAVE_%s", type.upcase))
-  print "yes\n"
-  return true
-end
-# Function derived from NArray's extconf.rb.
-def create_conf_h(file) #:nodoc:
-  print "creating #{file}\n"
-  File.open(file, 'w') do |hfile|
-    header_guard = file.upcase.sub(/\s|\./, '_')
-    hfile.puts "#ifndef #{header_guard}"
-    hfile.puts "#define #{header_guard}"
-    hfile.puts
-    # FIXME: Find a better way to do this:
-    hfile.puts "#define RUBY_2 1" if RUBY_VERSION >= '2.0'
-    for line in $defs
-      line =~ /^-D(.*)/
-      hfile.printf "#define %s 1\n", $1
-    end
-    hfile.puts
-    hfile.puts "#endif"
-  end
-end
-if RUBY_VERSION < '1.9'
-  raise(NotImplementedError, "Sorry, you need at least Ruby 1.9!")
-else
-  #$INSTALLFILES = [['nmatrix.h', '$(archdir)'], ['nmatrix.hpp', '$(archdir)'], ['nmatrix_config.h', '$(archdir)'], ['nm_memory.h', '$(archdir)']]
-  if /cygwin|mingw/ =~ RUBY_PLATFORM
-    #$INSTALLFILES << ['libnmatrix.a', '$(archdir)']
-  end
-end
+require "nmatrix/mkmf"
+#$INSTALLFILES = [['nmatrix.h', '$(archdir)'], ['nmatrix.hpp', '$(archdir)'], ['nmatrix_config.h', '$(archdir)'], ['nm_memory.h', '$(archdir)']]
 if /cygwin|mingw/ =~ RUBY_PLATFORM
-  CONFIG["DLDFLAGS"] << " --output-lib libnmatrix.a"
+  #$INSTALLFILES << ['libnmatrix.a', '$(archdir)']
 end
 $DEBUG = true
@@ -107,55 +44,6 @@ basenames = %w{nmatrix_atlas math_atlas}
 $objs = basenames.map { |b| "#{b}.o"   }
 $srcs = basenames.map { |b| "#{b}.cpp" }
-def find_newer_gplusplus #:nodoc:
-  print "checking for apparent GNU g++ binary with C++0x/C++11 support... "
-  [9,8,7,6,5,4,3].each do |minor|
-    ver = "4.#{minor}"
-    gpp = "g++-#{ver}"
-    result = `which #{gpp}`
-    next if result.empty?
-    CONFIG['CXX'] = gpp
-    puts ver
-    return CONFIG['CXX']
-  end
-  false
-end
-def gplusplus_version
-  cxxvar = proc { |n| `#{CONFIG['CXX']} -E -dM - </dev/null | grep #{n}`.chomp.split(' ')[2] }
-  major = cxxvar.call('__GNUC__')
-  minor = cxxvar.call('__GNUC_MINOR__')
-  patch = cxxvar.call('__GNUC_PATCHLEVEL__')
-  raise("unable to determine g++ version (match to get version was nil)") if major.nil? || minor.nil? || patch.nil?
-  "#{major}.#{minor}.#{patch}"
-end
-if CONFIG['CXX'] == 'clang++'
-  $CXX_STANDARD = 'c++11'
-else
-  version = gplusplus_version
-  if version < '4.3.0' && CONFIG['CXX'] == 'g++'  # see if we can find a newer G++, unless it's been overridden by user
-    if !find_newer_gplusplus
-      raise("You need a version of g++ which supports -std=c++0x or -std=c++11. If you're on a Mac and using Homebrew, we recommend using mac-brew-gcc.sh to install a more recent g++.")
-    end
-    version = gplusplus_version
-  end
-  if version < '4.7.0'
-    $CXX_STANDARD = 'c++0x'
-  else
-    $CXX_STANDARD = 'c++11'
-  end
-  puts "using C++ standard... #{$CXX_STANDARD}"
-  puts "g++ reports version... " + `#{CONFIG['CXX']} --version|head -n 1|cut -f 3 -d " "`
-end
-# add smmp in to get generic transp; remove smmp2 to eliminate funcptr transp
 # The next line allows the user to supply --with-atlas-dir=/usr/local/atlas,
 # --with-atlas-lib or --with-atlas-include and tell the compiler where to look
 # for ATLAS. The same for all the others
@@ -226,22 +114,6 @@ have_func("cblas_dgemm", "cblas.h")
 $libs += " -llapack -lcblas -latlas "
 #$libs += " -lprofiler "
-# For release, these next two should both be changed to -O3.
-$CFLAGS += " -O3" #" -O0 -g "
-#$CFLAGS += " -static -O0 -g "
-$CXXFLAGS += " -O3 -std=#{$CXX_STANDARD}" #" -O0 -g -std=#{$CXX_STANDARD} " #-fmax-errors=10 -save-temps
-#$CPPFLAGS += " -static -O0 -g -std=#{$CXX_STANDARD} "
-CONFIG['warnflags'].gsub!('-Wshorten-64-to-32', '') # doesn't work except in Mac-patched gcc (4.2)
-CONFIG['warnflags'].gsub!('-Wdeclaration-after-statement', '')
-CONFIG['warnflags'].gsub!('-Wimplicit-function-declaration', '')
-have_func("rb_array_const_ptr", "ruby.h")
-have_macro("FIX_CONST_VALUE_PTR", "ruby.h")
-have_macro("RARRAY_CONST_PTR", "ruby.h")
-have_macro("RARRAY_AREF", "ruby.h")
 create_conf_h("nmatrix_atlas_config.h")
 create_makefile("nmatrix_atlas")