RubyGems - numo-narray - Versions diffs - 0.9.1.2 → 0.9.1.3 - Mend

numo-narray 0.9.1.2 → 0.9.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

checksums.yaml +4 -4
data/Rakefile +7 -1
data/ext/numo/narray/array.c +6 -6
data/ext/numo/narray/data.c +8 -8
data/ext/numo/narray/depend.erb +4 -4
data/ext/numo/narray/extconf.rb +2 -2
data/ext/numo/narray/gen/cogen.rb +13 -0
data/ext/numo/narray/gen/def/dfloat.rb +1 -0
data/ext/numo/narray/gen/def/sfloat.rb +1 -0
data/ext/numo/narray/gen/narray_def.rb +14 -2
data/ext/numo/narray/gen/spec.rb +26 -10
data/ext/numo/narray/gen/tmpl/accum_binary.c +1 -1
data/ext/numo/narray/gen/tmpl/accum_index.c +11 -1
data/ext/numo/narray/gen/tmpl/alloc_func.c +3 -3
data/ext/numo/narray/gen/tmpl/binary.c +149 -10
data/ext/numo/narray/gen/tmpl/binary2.c +1 -1
data/ext/numo/narray/gen/tmpl/bincount.c +1 -1
data/ext/numo/narray/gen/tmpl/cast.c +1 -1
data/ext/numo/narray/gen/tmpl/cond_binary.c +1 -1
data/ext/numo/narray/gen/tmpl/each.c +1 -1
data/ext/numo/narray/gen/tmpl/each_with_index.c +1 -1
data/ext/numo/narray/gen/tmpl/extract_data.c +3 -3
data/ext/numo/narray/gen/tmpl/inspect.c +1 -1
data/ext/numo/narray/gen/tmpl/lib.c +5 -0
data/ext/numo/narray/gen/tmpl/map_with_index.c +1 -1
data/ext/numo/narray/gen/tmpl/median.c +3 -2
data/ext/numo/narray/gen/tmpl/pow.c +1 -1
data/ext/numo/narray/gen/tmpl/qsort.c +118 -56
data/ext/numo/narray/gen/tmpl/store.c +4 -4
data/ext/numo/narray/gen/tmpl/store_bit.c +4 -4
data/ext/numo/narray/gen/tmpl/to_a.c +1 -1
data/ext/numo/narray/gen/tmpl/unary_s.c +55 -9
data/ext/numo/narray/gen/tmpl_bit/each.c +1 -1
data/ext/numo/narray/gen/tmpl_bit/each_with_index.c +1 -1
data/ext/numo/narray/gen/tmpl_bit/inspect.c +1 -1
data/ext/numo/narray/gen/tmpl_bit/mask.c +1 -1
data/ext/numo/narray/gen/tmpl_bit/to_a.c +1 -1
data/ext/numo/narray/index.c +64 -37
data/ext/numo/narray/math.c +4 -4
data/ext/numo/narray/narray.c +54 -29
data/ext/numo/narray/ndloop.c +7 -7
data/ext/numo/narray/numo/narray.h +9 -2
data/ext/numo/narray/numo/template.h +18 -0
data/ext/numo/narray/numo/types/bit.h +5 -0
data/ext/numo/narray/numo/types/complex_macro.h +5 -0
data/ext/numo/narray/numo/types/float_macro.h +5 -0
data/ext/numo/narray/numo/types/int_macro.h +24 -0
data/ext/numo/narray/numo/types/robj_macro.h +5 -0
data/ext/numo/narray/numo/types/uint_macro.h +24 -0
data/ext/numo/narray/numo/types/xint_macro.h +5 -25
data/ext/numo/narray/rand.c +2 -29
data/ext/numo/narray/step.c +1 -28
data/ext/numo/narray/struct.c +26 -22
data/lib/numo/narray/extra.rb +50 -1
metadata +2 -2

data/ext/numo/narray/gen/tmpl/binary2.c CHANGED

@@ -48,7 +48,7 @@ static VALUE
     return <%=c_func%>_self(self, other);
     <% else %>
     VALUE klass, v;
-    klass = na_upcast(CLASS_OF(self),CLASS_OF(other));
+    klass = na_upcast(rb_obj_class(self),rb_obj_class(other));
     if (klass==cT) {
         return <%=c_func%>_self(self, other);
     } else {

data/ext/numo/narray/gen/tmpl/bincount.c CHANGED

@@ -170,7 +170,7 @@ static VALUE
             return <%=c_func%>_32(self, length);
         }
     } else {
-        wclass = CLASS_OF(weight);
+        wclass = rb_obj_class(weight);
         if (wclass == numo_cSFloat) {
             return <%=c_func%>_sf(self, weight, length);
         } else {

data/ext/numo/narray/gen/tmpl/cast.c CHANGED

@@ -17,7 +17,7 @@ static VALUE
     narray_t *na;
     dtype x;
-    if (CLASS_OF(obj)==cT) {
+    if (rb_obj_class(obj)==cT) {
         return obj;
     }
     if (RTEST(rb_obj_is_kind_of(obj,rb_cNumeric))) {

data/ext/numo/narray/gen/tmpl/cond_binary.c CHANGED

@@ -44,7 +44,7 @@ static VALUE
     return <%=c_func%>_self(self, other);
     <% else %>
     VALUE klass, v;
-    klass = na_upcast(CLASS_OF(self),CLASS_OF(other));
+    klass = na_upcast(rb_obj_class(self),rb_obj_class(other));
     if (klass==cT) {
         return <%=c_func%>_self(self, other);
     } else {

data/ext/numo/narray/gen/tmpl/each.c CHANGED

@@ -1,4 +1,4 @@
-void
+static void
 <%=c_iter%>(na_loop_t *const lp)
 {
     size_t i, s1;

data/ext/numo/narray/gen/tmpl/each_with_index.c CHANGED

@@ -11,7 +11,7 @@ yield_each_with_index(dtype x, size_t *c, VALUE *a, int nd, int md)
 }
-void
+static void
 <%=c_iter%>(na_loop_t *const lp)
 {
     size_t i, s1;

data/ext/numo/narray/gen/tmpl/extract_data.c CHANGED

@@ -15,7 +15,7 @@ static dtype
         if (na->size != 1) {
             rb_raise(nary_eShapeError,"narray size should be 1");
         }
-        klass = CLASS_OF(obj);
+        klass = rb_obj_class(obj);
         ptr = na_get_pointer_for_read(obj);
         pos = na_get_offset(obj);
         <% find_tmpl("store").definitions.select{|x| x.class==Store}.each do |x| %>
@@ -27,14 +27,14 @@ static dtype
         // coerce
         r = rb_funcall(obj, rb_intern("coerce_cast"), 1, cT);
-        if (CLASS_OF(r)==cT) {
+        if (rb_obj_class(r)==cT) {
             return <%=c_func%>(r);
         }
         <% if is_object %>
         return obj;
         <% else %>
         rb_raise(nary_eCastError, "unknown conversion from %s to %s",
-                 rb_class2name(CLASS_OF(obj)),
+                 rb_class2name(rb_obj_class(obj)),
                  rb_class2name(cT));
         <% end %>
     }

data/ext/numo/narray/gen/tmpl/inspect.c CHANGED

@@ -13,7 +13,7 @@ static VALUE
   @overload inspect
   @return [String]
 */
-VALUE
+static VALUE
 <%=c_func(0)%>(VALUE ary)
 {
     return na_ndloop_inspect(ary, <%=c_iter%>, Qnil);

data/ext/numo/narray/gen/tmpl/lib.c CHANGED

@@ -14,6 +14,11 @@
 #define m_map(x) m_num_to_data(rb_yield(m_data_to_num(x)))
+<% if is_simd %>
+#include <emmintrin.h>
+#define SIMD_ALIGNMENT_SIZE 16
+<% end %>
 <% id_decl.each do |x| %>
 <%= x %>
 <% end %>

data/ext/numo/narray/gen/tmpl/map_with_index.c CHANGED

@@ -12,7 +12,7 @@ yield_map_with_index(dtype x, size_t *c, VALUE *a, int nd, int md)
     return m_num_to_data(y);
 }
-void
+static void
 <%=c_iter%>(na_loop_t *const lp)
 {
     size_t  i;

data/ext/numo/narray/gen/tmpl/median.c CHANGED

@@ -47,7 +47,7 @@ static void
 static VALUE
 <%=c_func(-1)%>(int argc, VALUE *argv, VALUE self)
 {
-    VALUE reduce;
+    VALUE v, reduce;
     ndfunc_arg_in_t ain[2] = {{OVERWRITE,0},{sym_reduce,0}};
     ndfunc_arg_out_t aout[1] = {{INT2FIX(0),0}};
     ndfunc_t ndf = {0, NDF_HAS_LOOP|NDF_FLAT_REDUCE, 2,1, ain,aout};
@@ -60,5 +60,6 @@ static VALUE
     ndf.func = <%=c_iter%>;
     reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
   <% end %>
-    return na_ndloop(&ndf, 2, self, reduce);
+    v = na_ndloop(&ndf, 2, self, reduce);
+    return <%=type_name%>_extract(v);
 }

data/ext/numo/narray/gen/tmpl/pow.c CHANGED

@@ -67,7 +67,7 @@ static VALUE
     return <%=c_func%>_self(self,other);
     <% else %>
     VALUE klass, v;
-    klass = na_upcast(CLASS_OF(self),CLASS_OF(other));
+    klass = na_upcast(rb_obj_class(self),rb_obj_class(other));
     if (klass==cT) {
         return <%=c_func%>_self(self,other);
     } else {

data/ext/numo/narray/gen/tmpl/qsort.c CHANGED

@@ -53,12 +53,52 @@
 #define QSORT_INCL
 #define Min(x, y)               ((x) < (y) ? (x) : (y))
-#define swap(type,a,b) \
-    do {type tmp=*(type*)(a); *(type*)(a)=*(type*)(b); *(type*)(b)=tmp;} while(0)
+/*
+ * Qsort routine based on J. L. Bentley and M. D. McIlroy,
+ * "Engineering a sort function",
+ * Software--Practice and Experience 23 (1993) 1249-1265.
+ * We have modified their original by adding a check for already-sorted input,
+ * which seems to be a win per discussions on pgsql-hackers around 2006-03-21.
+ */
+#define swapcode(TYPE, parmi, parmj, n)         \
+    do {                                        \
+        size_t i = (n) / sizeof (TYPE);         \
+        TYPE *pi = (TYPE *)(void *)(parmi);     \
+        TYPE *pj = (TYPE *)(void *)(parmj);     \
+        do {                                    \
+            TYPE    t = *pi;                    \
+            *pi++ = *pj;                        \
+            *pj++ = t;                          \
+        } while (--i > 0);                      \
+    } while (0)
+#define SWAPINIT(a, es) swaptype = ((char *)(a) - (char *)0) % sizeof(long) || \
+        (es) % sizeof(long) ? 2 : (es) == sizeof(long)? 0 : 1;
+static inline void
+swapfunc(a, b, n, swaptype)
+     char       *a,
+     *b;
+     size_t          n;
+     int                     swaptype;
+{
+    if (swaptype <= 1)
+        swapcode(long, a, b, n);
+    else
+        swapcode(char, a, b, n);
+}
-#define vecswap(type, a, b, n) if ((n)>0) swap(type,(a),(b))
+#define swap(a, b)                                      \
+    if (swaptype == 0) {                                \
+        long t = *(long *)(void *)(a);                  \
+        *(long *)(void *)(a) = *(long *)(void *)(b);    \
+        *(long *)(void *)(b) = t;                       \
+    } else                                              \
+        swapfunc(a, b, es, swaptype)
-#define MED3(a,b,c)                                     \
+#define vecswap(a, b, n) if ((n) > 0) swapfunc((a), (b), (size_t)(n), swaptype)
+#define med3(a,b,c,_cmp)                                \
     (cmpgt(b,a) ?                                       \
      (cmpgt(c,b) ? b : (cmpgt(c,a) ? c : a))            \
      : (cmpgt(b,c) ? b : (cmpgt(c,a) ? a : c)))
@@ -76,75 +116,97 @@
 <% end %>
 <% c_func(:nodef)%>
-void
+static void
 <%=type_name%>_qsort<%=suffix%>(void *a, size_t n, ssize_t es)
 {
-    char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
-    int  d, r, presorted;
+    char       *pa,
+        *pb,
+        *pc,
+        *pd,
+        *pl,
+        *pm,
+        *pn;
+    int                     d,
+        r,
+        swaptype,
+        presorted;
- loop:
-    if (n < 7) {
-        for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
-            for (pl = pm; pl > (char *) a && cmpgt(pl - es, pl);
-                 pl -= es)
-                swap(qsort_dtype, pl, pl - es);
-        return;
-    }
+ loop:SWAPINIT(a, es);
+    if (n < 7)
+        {
+            for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
+                for (pl = pm; pl > (char *) a && cmpgt(pl - es, pl);
+                     pl -= es)
+                    swap(pl, pl - es);
+            return;
+        }
     presorted = 1;
-    for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es) {
-        if (cmpgt(pm - es, pm)) {
-            presorted = 0;
-            break;
+    for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
+        {
+            if (cmpgt(pm - es, pm))
+                {
+                    presorted = 0;
+                    break;
+                }
         }
-    }
     if (presorted)
         return;
     pm = (char *) a + (n / 2) * es;
-    if (n > 7) {
-        pl = (char *) a;
-        pn = (char *) a + (n - 1) * es;
-        if (n > 40) {
-            d = (n / 8) * es;
-            pl = MED3(pl, pl + d, pl + 2 * d);
-            pm = MED3(pm - d, pm, pm + d);
-            pn = MED3(pn - 2 * d, pn - d, pn);
+    if (n > 7)
+        {
+            pl = (char *) a;
+            pn = (char *) a + (n - 1) * es;
+            if (n > 40)
+                {
+                    d = (n / 8) * es;
+                    pl = med3(pl, pl + d, pl + 2 * d, cmp);
+                    pm = med3(pm - d, pm, pm + d, cmp);
+                    pn = med3(pn - 2 * d, pn - d, pn, cmp);
+                }
+            pm = med3(pl, pm, pn, cmp);
         }
-        pm = MED3(pl, pm, pn);
-    }
-    swap(qsort_dtype, a, pm);
+    swap(a, pm);
     pa = pb = (char *) a + es;
     pc = pd = (char *) a + (n - 1) * es;
-    for (;;) {
-        while (pb <= pc && (r = cmp(pb, a)) <= 0) {
-            if (r == 0) {
-                swap(qsort_dtype, pa, pb);
-                pa += es;
-            }
+    for (;;)
+        {
+            while (pb <= pc && (r = cmp(pb, a)) <= 0)
+                {
+                    if (r == 0)
+                        {
+                            swap(pa, pb);
+                            pa += es;
+                        }
+                    pb += es;
+                }
+            while (pb <= pc && (r = cmp(pc, a)) >= 0)
+                {
+                    if (r == 0)
+                        {
+                            swap(pc, pd);
+                            pd -= es;
+                        }
+                    pc -= es;
+                }
+            if (pb > pc)
+                break;
+            swap(pb, pc);
             pb += es;
-        }
-        while (pb <= pc && (r = cmp(pc, a)) >= 0) {
-            if (r == 0) {
-                swap(qsort_dtype, pc, pd);
-                pd -= es;
-            }
             pc -= es;
         }
-        if (pb > pc)
-            break;
-        swap(qsort_dtype, pb, pc);
-        pb += es;
-        pc -= es;
-    }
     pn = (char *) a + n * es;
     r = Min(pa - (char *) a, pb - pa);
-    vecswap(qsort_dtype, a, pb - r, r);
+    vecswap(a, pb - r, r);
     r = Min(pd - pc, pn - pd - es);
-    vecswap(qsort_dtype, pb, pn - r, r);
+    vecswap(pb, pn - r, r);
     if ((r = pb - pa) > es)
         <%=type_name%>_qsort<%=suffix%>(a, r / es, es);
-    if ((r = pd - pc) > es) {
-        a = pn - r;
-        n = r / es;
-        goto loop;
-    }
+    if ((r = pd - pc) > es)
+        {
+            /* Iterate rather than recurse to save stack space */
+            a = pn - r;
+            n = r / es;
+            goto loop;
+        }
+    /*              qsort(pn - r, r / es, es, cmp);*/
 }

data/ext/numo/narray/gen/tmpl/store.c CHANGED

@@ -13,7 +13,7 @@ static VALUE
 {
     VALUE r, klass;
-    klass = CLASS_OF(obj);
+    klass = rb_obj_class(obj);
     <% definitions.each do |x| %>
     if (<%=x.condition("klass")%>) {
@@ -24,7 +24,7 @@ static VALUE
     if (IsNArray(obj)) {
         r = rb_funcall(obj, rb_intern("coerce_cast"), 1, cT);
-        if (CLASS_OF(r)==cT) {
+        if (rb_obj_class(r)==cT) {
             <%=c_func%>(self,r);
             return self;
         }
@@ -34,8 +34,8 @@ static VALUE
     robject_store_numeric(self,obj);
     <% else %>
     rb_raise(nary_eCastError, "unknown conversion from %s to %s",
-             rb_class2name(CLASS_OF(obj)),
-             rb_class2name(CLASS_OF(self)));
+             rb_class2name(rb_obj_class(obj)),
+             rb_class2name(rb_obj_class(self)));
     <% end %>
     return self;
 }

data/ext/numo/narray/gen/tmpl/store_bit.c CHANGED

@@ -16,13 +16,13 @@ static void
         if (idx1) {
             for (; i--;) {
                 LOAD_BIT(a2, p2+*idx2, x); idx2++;
-                y = m_from_real(x);
+                y = m_from_sint(x);
                 SET_DATA_INDEX(p1,idx1,dtype,y);
             }
         } else {
             for (; i--;) {
                 LOAD_BIT(a2, p2+*idx2, x); idx2++;
-                y = m_from_real(x);
+                y = m_from_sint(x);
                 SET_DATA_STRIDE(p1,s1,dtype,y);
             }
         }
@@ -30,13 +30,13 @@ static void
         if (idx1) {
             for (; i--;) {
                 LOAD_BIT(a2, p2, x); p2 += s2;
-                y = m_from_real(x);
+                y = m_from_sint(x);
                 SET_DATA_INDEX(p1,idx1,dtype,y);
             }
         } else {
             for (; i--;) {
                 LOAD_BIT(a2, p2, x); p2 += s2;
-                y = m_from_real(x);
+                y = m_from_sint(x);
                 SET_DATA_STRIDE(p1,s1,dtype,y);
             }
         }

data/ext/numo/narray/gen/tmpl/to_a.c CHANGED

@@ -1,4 +1,4 @@
-void
+static void
 <%=c_iter%>(na_loop_t *const lp)
 {
     size_t i, s1;

data/ext/numo/narray/gen/tmpl/unary_s.c CHANGED

@@ -1,23 +1,36 @@
 static void
 <%=c_iter%>(na_loop_t *const lp)
 {
-    size_t  i;
+    size_t  i=0, n;
     char   *p1, *p2;
     ssize_t s1, s2;
     size_t *idx1, *idx2;
     dtype   x;
-    INIT_COUNTER(lp, i);
+<% if is_simd and !is_complex and %w[sqrt].include? name %>
+    size_t cnt;
+    size_t cnt_simd_loop = -1;
+    <% if is_double_precision %>
+    __m128d a;
+    <% else %>
+    __m128 a;
+    <% end %>
+    size_t num_pack; // Number of elements packed for SIMD.
+    num_pack = SIMD_ALIGNMENT_SIZE / sizeof(dtype);
+<% end %>
+    INIT_COUNTER(lp, n);
     INIT_PTR_IDX(lp, 0, p1, s1, idx1);
     INIT_PTR_IDX(lp, 1, p2, s2, idx2);
     if (idx1) {
         if (idx2) {
-            for (; i--;) {
+            for (i=0; i<n; i++) {
                 GET_DATA_INDEX(p1,idx1,dtype,x);
                 x = m_<%=name%>(x);
                 SET_DATA_INDEX(p2,idx2,dtype,x);
             }
         } else {
-            for (; i--;) {
+            for (i=0; i<n; i++) {
                 GET_DATA_INDEX(p1,idx1,dtype,x);
                 x = m_<%=name%>(x);
                 SET_DATA_STRIDE(p2,s2,dtype,x);
@@ -25,17 +38,50 @@ static void
         }
     } else {
         if (idx2) {
-            for (; i--;) {
+            for (i=0; i<n; i++) {
                 GET_DATA_STRIDE(p1,s1,dtype,x);
                 x = m_<%=name%>(x);
                 SET_DATA_INDEX(p2,idx2,dtype,x);
             }
         } else {
-            for (; i--;) {
-                GET_DATA_STRIDE(p1,s1,dtype,x);
-                x = m_<%=name%>(x);
-                SET_DATA_STRIDE(p2,s2,dtype,x);
+<% if is_simd and !is_complex and %w[sqrt].include? name %>
+            // Check number of elements. & Check same alignment.
+            if ((n >= num_pack) && is_same_aligned2(&((dtype*)p1)[i], &((dtype*)p2)[i], SIMD_ALIGNMENT_SIZE)){
+                // Calculate up to the position just before the start of SIMD computation.
+                cnt = get_count_of_elements_not_aligned_to_simd_size(&((dtype*)p1)[i], SIMD_ALIGNMENT_SIZE, sizeof(dtype));
+                for (i=0; i < cnt; i++) {
+                    ((dtype*)p2)[i] = m_<%=name%>(((dtype*)p1)[i]);
+                }
+                // Get the count of SIMD computation loops.
+                cnt_simd_loop = (n - i) % num_pack;
+                // SIMD computation.
+                if (p1 == p2) { // inplace case
+                    for(; i < n - cnt_simd_loop; i += num_pack){
+                        a = _mm_load_<%=simd_type%>(&((dtype*)p1)[i]);
+                        a = _mm_<%=name%>_<%=simd_type%>(a);
+                        _mm_store_<%=simd_type%>(&((dtype*)p1)[i], a);
+                    }
+                } else {
+                    for(; i < n - cnt_simd_loop; i += num_pack){
+                        a = _mm_load_<%=simd_type%>(&((dtype*)p1)[i]);
+                        a = _mm_<%=name%>_<%=simd_type%>(a);
+                        _mm_stream_<%=simd_type%>(&((dtype*)p2)[i], a);
+                    }
+                }
+            }
+            // Compute the remainder of the SIMD operation.
+            if (cnt_simd_loop != 0){
+<% end %>
+                for (; i<n; i++) {
+                    ((dtype*)p2)[i] = m_<%=name%>(((dtype*)p1)[i]);
+                }
+<% if is_simd and !is_complex and %w[sqrt].include? name %>
             }
+<% end %>
+            return;
         }
     }
 }