RubyGems - cumo - Versions diffs - 0.4.3 → 0.5.0 - Mend

cumo 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (106) hide show

checksums.yaml +4 -4
data/.gitignore +3 -0
data/.rubocop.yml +15 -0
data/.rubocop_todo.yml +1272 -0
data/3rd_party/mkmf-cu/Gemfile +2 -0
data/3rd_party/mkmf-cu/Rakefile +2 -1
data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +2 -0
data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +36 -7
data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +51 -45
data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +2 -0
data/3rd_party/mkmf-cu/mkmf-cu.gemspec +3 -1
data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +5 -3
data/CHANGELOG.md +69 -0
data/Gemfile +6 -1
data/README.md +2 -10
data/Rakefile +8 -11
data/bench/broadcast_fp32.rb +28 -26
data/bench/cumo_bench.rb +18 -16
data/bench/numo_bench.rb +18 -16
data/bench/reduction_fp32.rb +14 -12
data/bin/console +1 -0
data/cumo.gemspec +5 -8
data/ext/cumo/cuda/cudnn.c +2 -2
data/ext/cumo/cumo.c +7 -3
data/ext/cumo/depend.erb +15 -13
data/ext/cumo/extconf.rb +32 -46
data/ext/cumo/include/cumo/cuda/cudnn.h +3 -1
data/ext/cumo/include/cumo/intern.h +1 -0
data/ext/cumo/include/cumo/narray.h +13 -1
data/ext/cumo/include/cumo/template.h +2 -4
data/ext/cumo/include/cumo/types/complex_macro.h +1 -1
data/ext/cumo/include/cumo/types/float_macro.h +2 -2
data/ext/cumo/include/cumo/types/xint_macro.h +3 -2
data/ext/cumo/include/cumo.h +2 -2
data/ext/cumo/narray/array.c +3 -3
data/ext/cumo/narray/data.c +23 -2
data/ext/cumo/narray/gen/cogen.rb +8 -7
data/ext/cumo/narray/gen/cogen_kernel.rb +8 -7
data/ext/cumo/narray/gen/def/bit.rb +3 -1
data/ext/cumo/narray/gen/def/dcomplex.rb +2 -0
data/ext/cumo/narray/gen/def/dfloat.rb +2 -0
data/ext/cumo/narray/gen/def/int16.rb +2 -0
data/ext/cumo/narray/gen/def/int32.rb +2 -0
data/ext/cumo/narray/gen/def/int64.rb +2 -0
data/ext/cumo/narray/gen/def/int8.rb +2 -0
data/ext/cumo/narray/gen/def/robject.rb +2 -0
data/ext/cumo/narray/gen/def/scomplex.rb +2 -0
data/ext/cumo/narray/gen/def/sfloat.rb +2 -0
data/ext/cumo/narray/gen/def/uint16.rb +2 -0
data/ext/cumo/narray/gen/def/uint32.rb +2 -0
data/ext/cumo/narray/gen/def/uint64.rb +2 -0
data/ext/cumo/narray/gen/def/uint8.rb +2 -0
data/ext/cumo/narray/gen/erbln.rb +9 -7
data/ext/cumo/narray/gen/erbpp2.rb +26 -24
data/ext/cumo/narray/gen/narray_def.rb +13 -11
data/ext/cumo/narray/gen/spec.rb +58 -55
data/ext/cumo/narray/gen/tmpl/alloc_func.c +1 -1
data/ext/cumo/narray/gen/tmpl/at.c +34 -0
data/ext/cumo/narray/gen/tmpl/batch_norm.c +1 -1
data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +2 -2
data/ext/cumo/narray/gen/tmpl/conv.c +1 -1
data/ext/cumo/narray/gen/tmpl/conv_grad_w.c +3 -1
data/ext/cumo/narray/gen/tmpl/conv_transpose.c +1 -1
data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +1 -1
data/ext/cumo/narray/gen/tmpl/init_class.c +1 -0
data/ext/cumo/narray/gen/tmpl/pooling_backward.c +1 -1
data/ext/cumo/narray/gen/tmpl/pooling_forward.c +1 -1
data/ext/cumo/narray/gen/tmpl/qsort.c +1 -5
data/ext/cumo/narray/gen/tmpl/sort.c +1 -1
data/ext/cumo/narray/gen/tmpl_bit/binary.c +42 -14
data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +5 -0
data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +5 -0
data/ext/cumo/narray/gen/tmpl_bit/mask.c +27 -7
data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +21 -7
data/ext/cumo/narray/gen/tmpl_bit/unary.c +21 -7
data/ext/cumo/narray/index.c +243 -39
data/ext/cumo/narray/index_kernel.cu +84 -0
data/ext/cumo/narray/narray.c +38 -1
data/ext/cumo/narray/ndloop.c +1 -1
data/ext/cumo/narray/struct.c +1 -1
data/lib/cumo/cuda/compile_error.rb +1 -1
data/lib/cumo/cuda/compiler.rb +23 -22
data/lib/cumo/cuda/cudnn.rb +1 -1
data/lib/cumo/cuda/device.rb +1 -1
data/lib/cumo/cuda/link_state.rb +2 -2
data/lib/cumo/cuda/module.rb +1 -2
data/lib/cumo/cuda/nvrtc_program.rb +3 -2
data/lib/cumo/cuda.rb +2 -0
data/lib/cumo/linalg.rb +2 -0
data/lib/cumo/narray/extra.rb +137 -185
data/lib/cumo/narray.rb +2 -0
data/lib/cumo.rb +3 -1
data/test/bit_test.rb +157 -0
data/test/cuda/compiler_test.rb +69 -0
data/test/cuda/device_test.rb +30 -0
data/test/cuda/memory_pool_test.rb +45 -0
data/test/cuda/nvrtc_test.rb +51 -0
data/test/cuda/runtime_test.rb +28 -0
data/test/cudnn_test.rb +498 -0
data/test/cumo_test.rb +27 -0
data/test/narray_test.rb +745 -0
data/test/ractor_test.rb +52 -0
data/test/test_helper.rb +31 -0
metadata +31 -54
data/.travis.yml +0 -5
data/numo-narray-version +0 -1

data/ext/cumo/narray/gen/spec.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 def_id "cast"
 def_id "eq"
 def_id "ne"
@@ -14,8 +16,8 @@ if is_float
   def_id "copysign"
 end
 if is_int
-  def_id "<<","left_shift"
-  def_id ">>","right_shift"
+  def_id "<<", "left_shift"
+  def_id ">>", "right_shift"
 end
 if is_comparable && !is_object
   def_id "gt"
@@ -42,13 +44,13 @@ if is_object
   def_id "nan?"
   def_id "infinite?"
   def_id "finite?"
-  def_id "==","eq"
-  def_id "!=","ne"
-  def_id ">" ,"gt"
-  def_id ">=","ge"
-  def_id "<" ,"lt"
-  def_id "<=","le"
-  def_id "<=>","ufo"
+  def_id "==", "eq"
+  def_id "!=", "ne"
+  def_id ">" , "gt"
+  def_id ">=", "ge"
+  def_id "<" , "lt"
+  def_id "<=", "le"
+  def_id "<=>", "ufo"
 end
 if (is_float || is_complex) && !is_object
   def_id "gemm"
@@ -119,18 +121,18 @@ def_method "store" do
   store_numeric
   store_from "Bit"
   if is_complex
-    store_from "DComplex","cumo_dcomplex","m_from_dcomplex"
-    store_from "SComplex","cumo_scomplex","m_from_scomplex"
+    store_from "DComplex", "cumo_dcomplex", "m_from_dcomplex"
+    store_from "SComplex", "cumo_scomplex", "m_from_scomplex"
   end
-  store_from "DFloat","double",   "m_from_real"
-  store_from "SFloat","float",    "m_from_real"
+  store_from "DFloat", "double",   "m_from_real"
+  store_from "SFloat", "float",    "m_from_real"
   store_from "Int64", "int64_t",  "m_from_int64"
   store_from "Int32", "int32_t",  "m_from_int32"
   store_from "Int16", "int16_t",  "m_from_sint"
   store_from "Int8",  "int8_t",   "m_from_sint"
-  store_from "UInt64","u_int64_t","m_from_uint64"
-  store_from "UInt32","u_int32_t","m_from_uint32"
-  store_from "UInt16","u_int16_t","m_from_sint"
+  store_from "UInt64", "u_int64_t", "m_from_uint64"
+  store_from "UInt32", "u_int32_t", "m_from_uint32"
+  store_from "UInt16", "u_int16_t", "m_from_sint"
   store_from "UInt8", "u_int8_t", "m_from_sint"
   store_from "RObject", "VALUE",  "m_num_to_data"
   store_array
@@ -144,6 +146,7 @@ def_singleton_method "cast"
 def_method "aref", op:"[]"
 def_method "aref_cpu"
 def_method "aset", op:"[]="
+def_method "at"
 def_method "coerce_cast"
 def_method "to_a"
@@ -167,15 +170,15 @@ if is_bit
   binary "xor", "^"
   binary "eq"
   bit_count "count_true"
-  def_alias "count_1","count_true"
-  def_alias "count","count_true"
+  def_alias "count_1", "count_true"
+  def_alias "count", "count_true"
   bit_count "count_false"
-  def_alias "count_0","count_false"
+  def_alias "count_0", "count_false"
   bit_count_cpu "count_true_cpu"
-  def_alias "count_1_cpu","count_true_cpu"
-  def_alias "count_cpu","count_true_cpu"
+  def_alias "count_1_cpu", "count_true_cpu"
+  def_alias "count_cpu", "count_true_cpu"
   bit_count_cpu "count_false_cpu"
-  def_alias "count_0_cpu","count_false_cpu"
+  def_alias "count_0_cpu", "count_false_cpu"
   bit_reduce "all?", 1
   bit_reduce "any?", 0
   def_method "none?", "none_p"
@@ -215,17 +218,17 @@ if is_complex
   unary2 "real", "rtype", "cRT"
   unary2 "imag", "rtype", "cRT"
   unary2 "arg",  "rtype", "cRT"
-  def_alias "angle","arg"
+  def_alias "angle", "arg"
   set2 "set_imag", "rtype", "cRT"
   set2 "set_real", "rtype", "cRT"
-  def_alias "imag=","set_imag"
-  def_alias "real=","set_real"
+  def_alias "imag=", "set_imag"
+  def_alias "real=", "set_real"
 else
   def_alias "conj", "view"
   def_alias "im", "view"
 end
-def_alias "conjugate","conj"
+def_alias "conjugate", "conj"
 # base_cond
@@ -278,9 +281,9 @@ if is_comparable
   cond_binary "lt"
   cond_binary "le"
   def_alias ">", "gt"
-  def_alias ">=","ge"
+  def_alias ">=", "ge"
   def_alias "<", "lt"
-  def_alias "<=","le"
+  def_alias "<=", "le"
   def_method "clip"
 end
@@ -296,32 +299,32 @@ end
 if is_int
   if is_unsigned
-    accum "sum","u_int64_t","cumo_cUInt64"
-    accum "prod","u_int64_t","cumo_cUInt64"
+    accum "sum", "u_int64_t", "cumo_cUInt64"
+    accum "prod", "u_int64_t", "cumo_cUInt64"
   else
-    accum "sum","int64_t","cumo_cInt64"
-    accum "prod","int64_t","cumo_cInt64"
+    accum "sum", "int64_t", "cumo_cInt64"
+    accum "prod", "int64_t", "cumo_cInt64"
   end
 else
-  accum "sum","dtype","cT"
-  accum "prod","dtype","cT"
+  accum "sum", "dtype", "cT"
+  accum "prod", "dtype", "cT"
 end
 if is_double_precision
-  accum "kahan_sum","dtype","cT"
+  accum "kahan_sum", "dtype", "cT"
 end
 if is_float
-  accum "mean","dtype","cT"
-  accum "stddev","rtype","cRT"
-  accum "var","rtype","cRT"
-  accum "rms","rtype","cRT"
+  accum "mean", "dtype", "cT"
+  accum "stddev", "rtype", "cRT"
+  accum "var", "rtype", "cRT"
+  accum "rms", "rtype", "cRT"
 end
 if is_comparable
-  accum "min","dtype","cT"
-  accum "max","dtype","cT"
-  accum "ptp","dtype","cT"
+  accum "min", "dtype", "cT"
+  accum "max", "dtype", "cT"
+  accum "ptp", "dtype", "cT"
   accum_index "max_index"
   accum_index "min_index"
   def_method "minmax"
@@ -333,8 +336,8 @@ if is_int && !is_object
   def_method "bincount"
 end
-cum "cumsum","add"
-cum "cumprod","mul"
+cum "cumsum", "add"
+cum "cumprod", "mul"
 # dot
 accum_binary "mulsum"
@@ -377,17 +380,17 @@ def_method "poly"
 if is_comparable && !is_object
   if is_float
-    qsort type_name,"dtype","*(dtype*)","_prnan"
-    qsort type_name,"dtype","*(dtype*)","_ignan"
+    qsort type_name, "dtype", "*(dtype*)", "_prnan"
+    qsort type_name, "dtype", "*(dtype*)", "_ignan"
   else
-    qsort type_name,"dtype","*(dtype*)"
+    qsort type_name, "dtype", "*(dtype*)"
   end
   def_method "sort"
   if is_float
-    qsort type_name+"_index","dtype*","**(dtype**)","_prnan"
-    qsort type_name+"_index","dtype*","**(dtype**)","_ignan"
+    qsort type_name + "_index", "dtype*", "**(dtype**)", "_prnan"
+    qsort type_name + "_index", "dtype*", "**(dtype**)", "_ignan"
   else
-    qsort type_name+"_index","dtype*","**(dtype**)"
+    qsort type_name + "_index", "dtype*", "**(dtype**)"
   end
   def_method "sort_index"
   def_method "median"
@@ -407,7 +410,7 @@ def_module do
   set ns_var: "cT"
   set class_name: cn
   set name: "#{nm}_math"
-  set full_module_name: fn+"::NMath"
+  set full_module_name: fn + "::NMath"
   set module_name: "Math"
   set module_var: "mTM"
@@ -433,14 +436,14 @@ def_module do
   math "atanh"
   math "sinc"
   if !is_c
-    math "atan2",2
-    math "hypot",2
+    math "atan2", 2
+    math "hypot", 2
     math "erf"
     math "erfc"
     math "log1p"
     math "expm1"
-    math "ldexp",2
-    math "frexp",1,"frexp"
+    math "ldexp", 2
+    math "frexp", 1, "frexp"
   end
 end
 end

data/ext/cumo/narray/gen/tmpl/alloc_func.c CHANGED Viewed

@@ -85,7 +85,7 @@ static const rb_data_type_t <%=type_name%>_data_type = {
     {0, <%=type_name%>_free, <%=type_name%>_memsize,},
     &cumo_na_data_type,
     &<%=type_name%>_info,
-    0, // flags
+    RUBY_TYPED_FROZEN_SHAREABLE, // flags
 };
 <% end %>

data/ext/cumo/narray/gen/tmpl/at.c ADDED Viewed

@@ -0,0 +1,34 @@
+/*
+  Multi-dimensional array indexing.
+  Same as [] for one-dimensional NArray.
+  Similar to numpy's tuple indexing, i.e., `a[[1,2,..],[3,4,..]]`
+  @overload at(*indices)
+  @param [Numeric,Range,etc] *indices  Multi-dimensional Index Arrays.
+  @return [Cumo::NArray::<%=class_name%>] one-dimensional NArray view.
+  @example
+      x = Cumo::DFloat.new(3,3,3).seq
+      => Cumo::DFloat#shape=[3,3,3]
+       [[[0, 1, 2],
+         [3, 4, 5],
+         [6, 7, 8]],
+        [[9, 10, 11],
+         [12, 13, 14],
+         [15, 16, 17]],
+        [[18, 19, 20],
+         [21, 22, 23],
+         [24, 25, 26]]]
+      x.at([0,1,2],[0,1,2],[-1,-2,-3])
+      => Cumo::DFloat(view)#shape=[3]
+       [2, 13, 24]
+ */
+static VALUE
+<%=c_func(-1)%>(int argc, VALUE *argv, VALUE self)
+{
+    int result_nd;
+    size_t pos;
+    result_nd = cumo_na_get_result_dimension(self, argc, argv, sizeof(dtype), &pos);
+    return cumo_na_at_main(argc, argv, self, 0, result_nd, pos);
+}

data/ext/cumo/narray/gen/tmpl/batch_norm.c CHANGED Viewed

@@ -193,7 +193,7 @@ BATCH_NORM_ERROR:
 }
 #else // CUDNN_FOUND
-VALUE cumo_cuda_eCUDNNError;
+#include "cumo/cuda/cudnn.h"
 static VALUE
 <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)

data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c CHANGED Viewed

@@ -178,11 +178,11 @@ BATCH_NORM_BACKWARD_ERROR:
 }
 #else // CUDNN_FOUND
-VALUE cumo_cuda_eCudnnError;
+#include "cumo/cuda/cudnn.h"
 static VALUE
 <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)
 {
-    rb_raise(cumo_cuda_eCudnnError, "cuDNN is not available");
+    rb_raise(cumo_cuda_eCUDNNError, "cuDNN is not available");
 }
 #endif // CUDNN_FOUND

data/ext/cumo/narray/gen/tmpl/conv.c CHANGED Viewed

@@ -206,7 +206,7 @@ CONV_ERROR:
 }
 #else // CUDNN_FOUND
-VALUE cumo_cuda_eCUDNNError;
+#include "cumo/cuda/cudnn.h"
 static VALUE
 <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)

data/ext/cumo/narray/gen/tmpl/conv_grad_w.c CHANGED Viewed

@@ -95,6 +95,7 @@ static VALUE
     CUMO_CUDA_CUDNN_CHECK_DIM_EQ(sizet_w_shape[0], ngy->shape[1]);
     CUMO_CUDA_CUDNN_CHECK_DIM_EQ(sizet_w_shape[1], nx->shape[1]);
+#if !defined(NDEBUG)
     {
         // shape check of gy
         size_t *y_shape = ngy->shape;
@@ -105,6 +106,7 @@ static VALUE
                     x_shape[i + 2], sizet_w_shape[i + 2], int_stride[i], int_pad[i]));
         }
     }
+#endif
     x_cont = cumo_na_as_contiguous_array(x);
     gy_cont = cumo_na_as_contiguous_array(gy);
@@ -173,7 +175,7 @@ CONV_GRAD_W_ERROR:
 }
 #else // CUDNN_FOUND
-VALUE cumo_cuda_eCUDNNError;
+#include "cumo/cuda/cudnn.h"
 static VALUE
 <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)

data/ext/cumo/narray/gen/tmpl/conv_transpose.c CHANGED Viewed

@@ -234,7 +234,7 @@ CONV_TRANSPOSE_ERROR:
 }
 #else // CUDNN_FOUND
-VALUE cumo_cuda_eCUDNNError;
+#include "cumo/cuda/cudnn.h"
 static VALUE
 <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)

data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c CHANGED Viewed

@@ -139,7 +139,7 @@ FIXED_BATCH_NORM_ERROR:
 }
 #else // CUDNN_FOUND
-VALUE cumo_cuda_eCUDNNError;
+#include "cumo/cuda/cudnn.h"
 static VALUE
 <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)

data/ext/cumo/narray/gen/tmpl/init_class.c CHANGED Viewed

@@ -14,6 +14,7 @@
     rb_hash_aset(hCast, rb_cArray,   cT);
     <% for x in upcast %>
     <%= x %><% end %>
+    rb_obj_freeze(hCast);
     <% @children.each do |m| %>
     <%= m.init_def %><% end %>

data/ext/cumo/narray/gen/tmpl/pooling_backward.c CHANGED Viewed

@@ -126,7 +126,7 @@ POOLING_BACKAWARD_ERROR:
 }
 #else // CUDNN_FOUND
-VALUE cumo_cuda_eCUDNNError;
+#include "cumo/cuda/cudnn.h"
 static VALUE
 <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)

data/ext/cumo/narray/gen/tmpl/pooling_forward.c CHANGED Viewed

@@ -126,7 +126,7 @@ POLLING_FORWARD_ERROR:
 }
 #else // CUDNN_FOUND
-VALUE cumo_cuda_eCUDNNError;
+#include "cumo/cuda/cudnn.h"
 static VALUE
 <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)

data/ext/cumo/narray/gen/tmpl/qsort.c CHANGED Viewed

@@ -76,11 +76,7 @@
         (es) % sizeof(long) ? 2 : (es) == sizeof(long)? 0 : 1;
 static inline void
-swapfunc(a, b, n, swaptype)
-     char       *a,
-     *b;
-     size_t          n;
-     int                     swaptype;
+swapfunc(char *a, char *b, size_t n, int swaptype)
 {
     if (swaptype <= 1)
         swapcode(long, a, b, n);

data/ext/cumo/narray/gen/tmpl/sort.c CHANGED Viewed

@@ -32,7 +32,7 @@ static VALUE
 {
     VALUE reduce;
     cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{cumo_sym_reduce,0}};
-    cumo_ndfunc_t ndf = {0, CUMO_STRIDE_LOOP|CUMO_NDF_FLAT_REDUCE, 2,0, ain,0};
+    cumo_ndfunc_t ndf = {0, CUMO_NDF_HAS_LOOP|CUMO_NDF_FLAT_REDUCE, 2,0, ain,0};
     if (!CUMO_TEST_INPLACE(self)) {
         self = cumo_na_copy(self);

data/ext/cumo/narray/gen/tmpl_bit/binary.c CHANGED Viewed

@@ -25,10 +25,8 @@ static void
             CUMO_STORE_BIT_STEP(a3, p3, s3, idx3, x);
         }
     } else {
-        o1 =  p1 % CUMO_NB;
-        o1 -= p3;
-        o2 =  p2 % CUMO_NB;
-        o2 -= p3;
+        o1 =  p1-p3;
+        o2 =  p2-p3;
         l1 =  CUMO_NB+o1;
         r1 =  CUMO_NB-o1;
         l2 =  CUMO_NB+o2;
@@ -58,23 +56,53 @@ static void
             }
         } else {
             for (; n>=CUMO_NB; n-=CUMO_NB) {
-                x = *a1>>o1;
-                if (o1<0)  x |= *(a1-1)>>l1;
-                if (o1>0)  x |= *(a1+1)<<r1;
+                if (o1==0) {
+                    x = *a1;
+                } else if (o1>0) {
+                    x = *a1>>o1  | *(a1+1)<<r1;
+                } else {
+                    x = *a1<<-o1 | *(a1-1)>>l1;
+                }
                 a1++;
-                y = *a2>>o2;
-                if (o2<0)  y |= *(a2-1)>>l2;
-                if (o2>0)  y |= *(a2+1)<<r2;
+                if (o2==0) {
+                    y = *a2;
+                } else if (o2>0) {
+                    y = *a2>>o2  | *(a2+1)<<r2;
+                } else {
+                    y = *a2<<-o2 | *(a2-1)>>l2;
+                }
                 a2++;
                 x = m_<%=name%>(x,y);
                 *(a3++) = x;
             }
         }
         if (n>0) {
-            x = *a1>>o1;
-            if (o1<0)  x |= *(a1-1)>>l1;
-            y = *a2>>o2;
-            if (o2<0)  y |= *(a2-1)>>l2;
+            if (o1==0) {
+                x = *a1;
+            } else if (o1>0) {
+                x = *a1>>o1;
+                if ((int)n>r1) {
+                    x |= *(a1+1)<<r1;
+                }
+            } else {
+                x = *(a1-1)>>l1;
+                if ((int)n>-o1) {
+                    x |= *a1<<-o1;
+                }
+            }
+            if (o2==0) {
+                y = *a2;
+            } else if (o2>0) {
+                y = *a2>>o2;
+                if ((int)n>r2) {
+                    y |= *(a2+1)<<r2;
+                }
+            } else {
+                y = *(a2-1)>>l2;
+                if ((int)n>-o2) {
+                    y |= *a2<<-o2;
+                }
+            }
             x = m_<%=name%>(x,y);
             *a3 = (x & CUMO_SLB(n)) | (*a3 & CUMO_BALL<<n);
         }

data/ext/cumo/narray/gen/tmpl_bit/bit_count.c CHANGED Viewed

@@ -53,10 +53,15 @@ static VALUE
         return <%=c_func(-1)%>_cpu(argc, argv, self);
     } else {
         VALUE v, reduce;
+        cumo_narray_t *na;
         cumo_ndfunc_arg_in_t ain[3] = {{cT,0},{cumo_sym_reduce,0},{cumo_sym_init,0}};
         cumo_ndfunc_arg_out_t aout[1] = {{cumo_cUInt64,0}};
         cumo_ndfunc_t ndf = { <%=c_iter%>, CUMO_FULL_LOOP_NIP, 3, 1, ain, aout };
+        CumoGetNArray(self,na);
+        if (CUMO_NA_SIZE(na)==0) {
+            return INT2FIX(0);
+        }
         reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
         v = cumo_na_ndloop(&ndf, 3, self, reduce, INT2FIX(0));
         return v;

data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c CHANGED Viewed

@@ -111,10 +111,15 @@ static VALUE
 <%=c_func(-1)%>(int argc, VALUE *argv, VALUE self)
 {
     VALUE v, reduce;
+    cumo_narray_t *na;
     cumo_ndfunc_arg_in_t ain[3] = {{cT,0},{cumo_sym_reduce,0},{cumo_sym_init,0}};
     cumo_ndfunc_arg_out_t aout[1] = {{cumo_cBit,0}};
     cumo_ndfunc_t ndf = {<%=c_iter%>, CUMO_FULL_LOOP_NIP, 3,1, ain,aout};
+    CumoGetNArray(self,na);
+    if (CUMO_NA_SIZE(na)==0) {
+        return INT2FIX(0);
+    }
     reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
     v = cumo_na_ndloop(&ndf, 3, self, reduce, INT2FIX(<%=init_bit%>));
     if (argc > 0) {

data/ext/cumo/narray/gen/tmpl_bit/mask.c CHANGED Viewed

@@ -78,6 +78,10 @@ static void
 #define cIndex cumo_cInt32
 #endif
+static void shape_error(void) {
+    rb_raise(cumo_na_eShapeError,"mask and masked arrays must have the same shape");
+}
 /*
   Return subarray of argument masked with self bit array.
   @overload <%=op_map%>(array)
@@ -87,17 +91,33 @@ static void
 static VALUE
 <%=c_func(1)%>(VALUE mask, VALUE val)
 {
-    volatile VALUE idx_1, view;
+    int i;
+    VALUE idx_1, view;
     cumo_narray_data_t *nidx;
-    cumo_narray_view_t *nv;
-    cumo_narray_t      *na;
-    cumo_narray_view_t *na1;
+    cumo_narray_view_t *nv, *nv_val;
+    cumo_narray_t      *na, *na_mask;
     cumo_stridx_t stridx0;
     size_t n_1;
     where_opt_t g;
     cumo_ndfunc_arg_in_t ain[2] = {{cT,0},{Qnil,0}};
     cumo_ndfunc_t ndf = {<%=c_iter%>, CUMO_FULL_LOOP, 2, 0, ain, 0};
+    // cast val to NArray
+    if (!rb_obj_is_kind_of(val, cumo_cNArray)) {
+        val = rb_funcall(cumo_cNArray, cumo_id_cast, 1, val);
+    }
+    // shapes of mask and val must be same
+    CumoGetNArray(val, na);
+    CumoGetNArray(mask, na_mask);
+    if (na_mask->ndim != na->ndim) {
+        shape_error();
+    }
+    for (i=0; i<na->ndim; i++) {
+        if (na_mask->shape[i] != na->shape[i]) {
+            shape_error();
+        }
+    }
     // TODO(sonots): bit_count_true synchronizes with CPU. Avoid.
     n_1 = NUM2SIZET(<%=find_tmpl("count_true_cpu").c_func%>(0, NULL, mask));
     idx_1 = cumo_na_new(cIndex, 1, &n_1);
@@ -114,19 +134,19 @@ static VALUE
     CumoGetNArrayData(idx_1,nidx);
     CUMO_SDX_SET_INDEX(stridx0,(size_t*)nidx->ptr);
     nidx->ptr = NULL;
+    RB_GC_GUARD(idx_1);
     nv->stridx = ALLOC_N(cumo_stridx_t,1);
     nv->stridx[0] = stridx0;
     nv->offset = 0;
-    CumoGetNArray(val, na);
     switch(CUMO_NA_TYPE(na)) {
     case CUMO_NARRAY_DATA_T:
         nv->data = val;
         break;
     case CUMO_NARRAY_VIEW_T:
-        CumoGetNArrayView(val, na1);
-        nv->data = na1->data;
+        CumoGetNArrayView(val, nv_val);
+        nv->data = nv_val->data;
         break;
     default:
         rb_raise(rb_eRuntimeError,"invalid CUMO_NA_TYPE: %d",CUMO_NA_TYPE(na));

data/ext/cumo/narray/gen/tmpl_bit/store_bit.c CHANGED Viewed

@@ -22,8 +22,7 @@ static void
             CUMO_STORE_BIT_STEP(a3, p3, s3, idx3, x);
         }
     } else {
-        o1 =  p1 % CUMO_NB;
-        o1 -= p3;
+        o1 =  p1-p3;
         l1 =  CUMO_NB+o1;
         r1 =  CUMO_NB-o1;
         if (p3>0 || n<CUMO_NB) {
@@ -44,16 +43,31 @@ static void
             }
         } else {
             for (; n>=CUMO_NB; n-=CUMO_NB) {
-                x = *a1>>o1;
-                if (o1<0)  x |= *(a1-1)>>l1;
-                if (o1>0)  x |= *(a1+1)<<r1;
+                if (o1==0) {
+                    x = *a1;
+                } else if (o1>0) {
+                    x = *a1>>o1  | *(a1+1)<<r1;
+                } else {
+                    x = *a1<<-o1 | *(a1-1)>>l1;
+                }
                 a1++;
                 *(a3++) = x;
             }
         }
         if (n>0) {
-            x = *a1>>o1;
-            if (o1<0)  x |= *(a1-1)>>l1;
+            if (o1==0) {
+                x = *a1;
+            } else if (o1>0) {
+                x = *a1>>o1;
+                if ((int)n>r1) {
+                    x |= *(a1+1)<<r1;
+                }
+            } else {
+                x = *(a1-1)>>l1;
+                if ((int)n>-o1) {
+                    x |= *a1<<-o1;
+                }
+            }
             *a3 = (x & CUMO_SLB(n)) | (*a3 & CUMO_BALL<<n);
         }
     }