RubyGems - cumo - Versions diffs - 0.4.3 → 0.5.0 - Mend

cumo 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (106) hide show

checksums.yaml +4 -4
data/.gitignore +3 -0
data/.rubocop.yml +15 -0
data/.rubocop_todo.yml +1272 -0
data/3rd_party/mkmf-cu/Gemfile +2 -0
data/3rd_party/mkmf-cu/Rakefile +2 -1
data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +2 -0
data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +36 -7
data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +51 -45
data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +2 -0
data/3rd_party/mkmf-cu/mkmf-cu.gemspec +3 -1
data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +5 -3
data/CHANGELOG.md +69 -0
data/Gemfile +6 -1
data/README.md +2 -10
data/Rakefile +8 -11
data/bench/broadcast_fp32.rb +28 -26
data/bench/cumo_bench.rb +18 -16
data/bench/numo_bench.rb +18 -16
data/bench/reduction_fp32.rb +14 -12
data/bin/console +1 -0
data/cumo.gemspec +5 -8
data/ext/cumo/cuda/cudnn.c +2 -2
data/ext/cumo/cumo.c +7 -3
data/ext/cumo/depend.erb +15 -13
data/ext/cumo/extconf.rb +32 -46
data/ext/cumo/include/cumo/cuda/cudnn.h +3 -1
data/ext/cumo/include/cumo/intern.h +1 -0
data/ext/cumo/include/cumo/narray.h +13 -1
data/ext/cumo/include/cumo/template.h +2 -4
data/ext/cumo/include/cumo/types/complex_macro.h +1 -1
data/ext/cumo/include/cumo/types/float_macro.h +2 -2
data/ext/cumo/include/cumo/types/xint_macro.h +3 -2
data/ext/cumo/include/cumo.h +2 -2
data/ext/cumo/narray/array.c +3 -3
data/ext/cumo/narray/data.c +23 -2
data/ext/cumo/narray/gen/cogen.rb +8 -7
data/ext/cumo/narray/gen/cogen_kernel.rb +8 -7
data/ext/cumo/narray/gen/def/bit.rb +3 -1
data/ext/cumo/narray/gen/def/dcomplex.rb +2 -0
data/ext/cumo/narray/gen/def/dfloat.rb +2 -0
data/ext/cumo/narray/gen/def/int16.rb +2 -0
data/ext/cumo/narray/gen/def/int32.rb +2 -0
data/ext/cumo/narray/gen/def/int64.rb +2 -0
data/ext/cumo/narray/gen/def/int8.rb +2 -0
data/ext/cumo/narray/gen/def/robject.rb +2 -0
data/ext/cumo/narray/gen/def/scomplex.rb +2 -0
data/ext/cumo/narray/gen/def/sfloat.rb +2 -0
data/ext/cumo/narray/gen/def/uint16.rb +2 -0
data/ext/cumo/narray/gen/def/uint32.rb +2 -0
data/ext/cumo/narray/gen/def/uint64.rb +2 -0
data/ext/cumo/narray/gen/def/uint8.rb +2 -0
data/ext/cumo/narray/gen/erbln.rb +9 -7
data/ext/cumo/narray/gen/erbpp2.rb +26 -24
data/ext/cumo/narray/gen/narray_def.rb +13 -11
data/ext/cumo/narray/gen/spec.rb +58 -55
data/ext/cumo/narray/gen/tmpl/alloc_func.c +1 -1
data/ext/cumo/narray/gen/tmpl/at.c +34 -0
data/ext/cumo/narray/gen/tmpl/batch_norm.c +1 -1
data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +2 -2
data/ext/cumo/narray/gen/tmpl/conv.c +1 -1
data/ext/cumo/narray/gen/tmpl/conv_grad_w.c +3 -1
data/ext/cumo/narray/gen/tmpl/conv_transpose.c +1 -1
data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +1 -1
data/ext/cumo/narray/gen/tmpl/init_class.c +1 -0
data/ext/cumo/narray/gen/tmpl/pooling_backward.c +1 -1
data/ext/cumo/narray/gen/tmpl/pooling_forward.c +1 -1
data/ext/cumo/narray/gen/tmpl/qsort.c +1 -5
data/ext/cumo/narray/gen/tmpl/sort.c +1 -1
data/ext/cumo/narray/gen/tmpl_bit/binary.c +42 -14
data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +5 -0
data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +5 -0
data/ext/cumo/narray/gen/tmpl_bit/mask.c +27 -7
data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +21 -7
data/ext/cumo/narray/gen/tmpl_bit/unary.c +21 -7
data/ext/cumo/narray/index.c +243 -39
data/ext/cumo/narray/index_kernel.cu +84 -0
data/ext/cumo/narray/narray.c +38 -1
data/ext/cumo/narray/ndloop.c +1 -1
data/ext/cumo/narray/struct.c +1 -1
data/lib/cumo/cuda/compile_error.rb +1 -1
data/lib/cumo/cuda/compiler.rb +23 -22
data/lib/cumo/cuda/cudnn.rb +1 -1
data/lib/cumo/cuda/device.rb +1 -1
data/lib/cumo/cuda/link_state.rb +2 -2
data/lib/cumo/cuda/module.rb +1 -2
data/lib/cumo/cuda/nvrtc_program.rb +3 -2
data/lib/cumo/cuda.rb +2 -0
data/lib/cumo/linalg.rb +2 -0
data/lib/cumo/narray/extra.rb +137 -185
data/lib/cumo/narray.rb +2 -0
data/lib/cumo.rb +3 -1
data/test/bit_test.rb +157 -0
data/test/cuda/compiler_test.rb +69 -0
data/test/cuda/device_test.rb +30 -0
data/test/cuda/memory_pool_test.rb +45 -0
data/test/cuda/nvrtc_test.rb +51 -0
data/test/cuda/runtime_test.rb +28 -0
data/test/cudnn_test.rb +498 -0
data/test/cumo_test.rb +27 -0
data/test/narray_test.rb +745 -0
data/test/ractor_test.rb +52 -0
data/test/test_helper.rb +31 -0
metadata +31 -54
data/.travis.yml +0 -5
data/numo-narray-version +0 -1

data/lib/cumo/narray/extra.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module Cumo
   class NArray
@@ -23,12 +25,12 @@ module Cumo
     # Convert angles from radians to degrees.
     def rad2deg
-      self * (180/Math::PI)
+      self * (180 / Math::PI)
     end
     # Convert angles from degrees to radians.
     def deg2rad
-      self * (Math::PI/180)
+      self * (Math::PI / 180)
     end
     # Flip each row in the left/right direction.
@@ -43,56 +45,6 @@ module Cumo
       reverse(0)
     end
-    # Multi-dimensional array indexing.
-    # Same as [] for one-dimensional NArray.
-    # Similar to numpy's tuple indexing, i.e., `a[[1,2,..],[3,4,..]]`
-    # (This method will be rewritten in C)
-    # @return [Cumo::NArray] one-dimensional view of self.
-    # @example
-    #   p x = Cumo::DFloat.new(3,3,3).seq
-    #   # Cumo::DFloat#shape=[3,3,3]
-    #   # [[[0, 1, 2],
-    #   #   [3, 4, 5],
-    #   #   [6, 7, 8]],
-    #   #  [[9, 10, 11],
-    #   #   [12, 13, 14],
-    #   #   [15, 16, 17]],
-    #   #  [[18, 19, 20],
-    #   #   [21, 22, 23],
-    #   #   [24, 25, 26]]]
-    #
-    #   p x.at([0,1,2],[0,1,2],[-1,-2,-3])
-    #   # Cumo::DFloat(view)#shape=[3]
-    #   # [2, 13, 24]
-    def at(*indices)
-      if indices.size != ndim
-        raise DimensionError, "argument length does not match dimension size"
-      end
-      idx = nil
-      stride = 1
-      (indices.size-1).downto(0) do |i|
-        ix = Int64.cast(indices[i])
-        if ix.ndim != 1
-          raise DimensionError, "index array is not one-dimensional"
-        end
-        ix[ix < 0] += shape[i]
-        if ((ix < 0) & (ix >= shape[i])).any?
-          raise IndexError, "index array is out of range"
-        end
-        if idx
-          if idx.size != ix.size
-            raise ShapeError, "index array sizes mismatch"
-          end
-          idx += ix * stride
-          stride *= shape[i]
-        else
-          idx = ix
-          stride = shape[i]
-        end
-      end
-      self[idx]
-    end
     # Rotate in the plane specified by axes.
     # @example
     #   p a = Cumo::Int32.new(2,2).seq
@@ -114,7 +66,7 @@ module Cumo
     #   # Cumo::Int32(view)#shape=[2,2]
     #   # [[2, 0],
     #   #  [3, 1]]
-    def rot90(k=1,axes=[0,1])
+    def rot90(k=1, axes=[0, 1])
       case k % 4
       when 0
         view
@@ -128,7 +80,7 @@ module Cumo
     end
     def to_i
-      if size==1
+      if size == 1
         self.extract_cpu.to_i
       else
         # convert to Int?
@@ -137,7 +89,7 @@ module Cumo
     end
     def to_f
-      if size==1
+      if size == 1
         self.extract_cpu.to_f
       else
         # convert to DFloat?
@@ -146,7 +98,7 @@ module Cumo
     end
     def to_c
-      if size==1
+      if size == 1
         Complex(self.extract_cpu)
       else
         # convert to DComplex?
@@ -163,7 +115,7 @@ module Cumo
       case a
       when NArray
         (a.ndim == 0) ? a[:new] : a
-      when Numeric,Range
+      when Numeric, Range
         self[a]
       else
         cast(a)
@@ -201,7 +153,7 @@ module Cumo
         end
         a << b if !b.empty?
       end
-      if a.size==1
+      if a.size == 1
         self.cast(a[0])
       else
         self.cast(a)
@@ -237,18 +189,18 @@ module Cumo
     def each_over_axis(axis=0)
       unless block_given?
-        return to_enum(:each_over_axis,axis)
+        return to_enum(:each_over_axis, axis)
       end
       if ndim == 0
         if axis != 0
-          raise ArgumentError,"axis=#{axis} is invalid"
+          raise ArgumentError, "axis=#{axis} is invalid"
         end
         niter = 1
       else
         axis = check_axis(axis)
         niter = shape[axis]
       end
-      idx = [true]*ndim
+      idx = [true] * ndim
       niter.times do |i|
         idx[axis] = i
         yield(self[*idx])
@@ -275,15 +227,15 @@ module Cumo
     #   p a.append([7, 8, 9], axis:0)
     #   # in `append': dimension mismatch (Cumo::NArray::DimensionError)
-    def append(other,axis:nil)
+    def append(other, axis:nil)
       other = self.class.cast(other)
       if axis
         if ndim != other.ndim
           raise DimensionError, "dimension mismatch"
         end
-        return concatenate(other,axis:axis)
+        return concatenate(other, axis:axis)
       else
-        a = self.class.zeros(size+other.size)
+        a = self.class.zeros(size + other.size)
         a[0...size] = self[true]
         a[size..-1] = other[true]
         return a
@@ -310,11 +262,11 @@ module Cumo
     #   # Cumo::DFloat(view)#shape=[9]
     #   # [1, 3, 5, 7, 8, 9, 10, 11, 12]
-    def delete(indice,axis=nil)
+    def delete(indice, axis=nil)
       if axis
         bit = Bit.ones(shape[axis])
         bit[indice] = 0
-        idx = [true]*ndim
+        idx = [true] * ndim
         idx[axis] = bit.where
         return self[*idx].copy
       else
@@ -395,14 +347,14 @@ module Cumo
     #   # [[0, 999, 1, 2, 999, 3],
     #   #  [4, 999, 5, 6, 999, 7]]
-    def insert(indice,values,axis:nil)
+    def insert(indice, values, axis:nil)
       if axis
         values = self.class.asarray(values)
         nd = values.ndim
-        midx = [:new]*(ndim-nd) + [true]*nd
+        midx = [:new] * (ndim - nd) + [true] * nd
         case indice
         when Numeric
-          midx[-nd-1] = true
+          midx[-nd - 1] = true
           midx[axis] = :new
         end
         values = values[*midx]
@@ -412,27 +364,27 @@ module Cumo
       idx = Int64.asarray(indice)
       nidx = idx.size
       if nidx == 1
-        nidx = values.shape[axis||0]
+        nidx = values.shape[axis || 0]
         idx = idx + Int64.new(nidx).seq
       else
         sidx = idx.sort_index
         idx[sidx] += Int64.new(nidx).seq
       end
       if axis
-        bit = Bit.ones(shape[axis]+nidx)
+        bit = Bit.ones(shape[axis] + nidx)
         bit[idx] = 0
         new_shape = shape
         new_shape[axis] += nidx
         a = self.class.zeros(new_shape)
-        mdidx = [true]*ndim
+        mdidx = [true] * ndim
         mdidx[axis] = bit.where
         a[*mdidx] = self
         mdidx[axis] = idx
         a[*mdidx] = values
       else
-        bit = Bit.ones(size+nidx)
+        bit = Bit.ones(size + nidx)
         bit[idx] = 0
-        a = self.class.zeros(size+nidx)
+        a = self.class.zeros(size + nidx)
         a[bit.where] = self.flatten
         a[idx] = values
       end
@@ -461,8 +413,8 @@ module Cumo
     #   # [[1, 2, 5],
     #   #  [3, 4, 6]]
-    def concatenate(arrays,axis:0)
-      klass = (self==NArray) ? NArray.array_type(arrays) : self
+    def concatenate(arrays, axis:0)
+      klass = (self == NArray) ? NArray.array_type(arrays) : self
       nd = 0
       arrays = arrays.map do |a|
         case a
@@ -473,7 +425,7 @@ module Cumo
         when Array
           a = klass.cast(a)
         else
-          raise TypeError,"not Cumo::NArray: #{a.inspect[0..48]}"
+          raise TypeError, "not Cumo::NArray: #{a.inspect[0..48]}"
         end
         if a.ndim > nd
           nd = a.ndim
@@ -484,31 +436,31 @@ module Cumo
         axis += nd
       end
       if axis < 0 || axis >= nd
-        raise ArgumentError,"axis is out of range"
+        raise ArgumentError, "axis is out of range"
       end
       new_shape = nil
       sum_size = 0
       arrays.each do |a|
         a_shape = a.shape
         if nd != a_shape.size
-          a_shape = [1]*(nd-a_shape.size) + a_shape
+          a_shape = [1] * (nd - a_shape.size) + a_shape
         end
         sum_size += a_shape.delete_at(axis)
         if new_shape
           if new_shape != a_shape
-            raise ShapeError,"shape mismatch"
+            raise ShapeError, "shape mismatch"
           end
         else
           new_shape = a_shape
         end
       end
-      new_shape.insert(axis,sum_size)
+      new_shape.insert(axis, sum_size)
       result = klass.zeros(*new_shape)
       lst = 0
       refs = [true] * nd
       arrays.each do |a|
         fst = lst
-        lst = fst + (a.shape[axis-nd]||1)
+        lst = fst + (a.shape[axis - nd] || 1)
         refs[axis] = fst...lst
         result[*refs] = a
       end
@@ -539,7 +491,7 @@ module Cumo
       arys = arrays.map do |a|
         _atleast_2d(cast(a))
       end
-      concatenate(arys,axis:0)
+      concatenate(arys, axis:0)
     end
     # Stack arrays horizontally (column wise).
@@ -559,7 +511,7 @@ module Cumo
     #   #  [3, 4]]
     def hstack(arrays)
-      klass = (self==NArray) ? NArray.array_type(arrays) : self
+      klass = (self == NArray) ? NArray.array_type(arrays) : self
       nd = 0
       arys = arrays.map do |a|
         a = klass.cast(a)
@@ -567,7 +519,7 @@ module Cumo
         a
       end
       dim = (nd >= 2) ? 1 : 0
-      concatenate(arys,axis:dim)
+      concatenate(arys, axis:dim)
     end
     # Stack arrays in depth wise (along third axis).
@@ -592,7 +544,7 @@ module Cumo
       arys = arrays.map do |a|
         _atleast_3d(cast(a))
       end
-      concatenate(arys,axis:2)
+      concatenate(arys, axis:2)
     end
     # Stack 1-d arrays into columns of a 2-d array.
@@ -609,20 +561,20 @@ module Cumo
       arys = arrays.map do |a|
         a = cast(a)
         case a.ndim
-        when 0; a[:new,:new]
-        when 1; a[true,:new]
+        when 0; a[:new, :new]
+        when 1; a[true, :new]
         else; a
         end
       end
-      concatenate(arys,axis:1)
+      concatenate(arys, axis:1)
     end
     private
     # Return an narray with at least two dimension.
     def _atleast_2d(a)
       case a.ndim
-      when 0; a[:new,:new]
-      when 1; a[:new,true]
+      when 0; a[:new, :new]
+      when 1; a[:new, true]
       else;   a
       end
     end
@@ -630,9 +582,9 @@ module Cumo
     # Return an narray with at least three dimension.
     def _atleast_3d(a)
       case a.ndim
-      when 0; a[:new,:new,:new]
-      when 1; a[:new,true,:new]
-      when 2; a[true,true,:new]
+      when 0; a[:new, :new, :new]
+      when 1; a[:new, true, :new]
+      when 2; a[true, true, :new]
       else;   a
       end
     end
@@ -660,7 +612,7 @@ module Cumo
     #   # [[1, 2, 5],
     #   #  [3, 4, 6]]
-    def concatenate(*arrays,axis:0)
+    def concatenate(*arrays, axis:0)
       axis = check_axis(axis)
       self_shape = shape
       self_shape.delete_at(axis)
@@ -674,19 +626,19 @@ module Cumo
         when Array
           a = self.class.cast(a)
         else
-          raise TypeError,"not Cumo::NArray: #{a.inspect[0..48]}"
+          raise TypeError, "not Cumo::NArray: #{a.inspect[0..48]}"
         end
         if a.ndim > ndim
-          raise ShapeError,"dimension mismatch"
+          raise ShapeError, "dimension mismatch"
         end
         a_shape = a.shape
-        sum_size += a_shape.delete_at(axis-ndim) || 1
+        sum_size += a_shape.delete_at(axis - ndim) || 1
         if self_shape != a_shape
-          raise ShapeError,"shape mismatch"
+          raise ShapeError, "shape mismatch"
         end
         a
       end
-      self_shape.insert(axis,sum_size)
+      self_shape.insert(axis, sum_size)
       result = self.class.zeros(*self_shape)
       lst = shape[axis]
       refs = [true] * ndim
@@ -694,7 +646,7 @@ module Cumo
       result[*refs] = self
       arrays.each do |a|
         fst = lst
-        lst = fst + (a.shape[axis-ndim] || 1)
+        lst = fst + (a.shape[axis - ndim] || 1)
         refs[axis] = fst...lst
         result[*refs] = a
       end
@@ -735,7 +687,7 @@ module Cumo
       case indices_or_sections
       when Integer
         div_axis, mod_axis = size_axis.divmod(indices_or_sections)
-        refs = [true]*ndim
+        refs = [true] * ndim
         beg_idx = 0
         mod_axis.times.map do |i|
           end_idx = beg_idx + div_axis + 1
@@ -743,16 +695,16 @@ module Cumo
           beg_idx = end_idx
           self[*refs]
         end +
-        (indices_or_sections-mod_axis).times.map do |i|
+        (indices_or_sections - mod_axis).times.map do |i|
           end_idx = beg_idx + div_axis
           refs[axis] = beg_idx ... end_idx
           beg_idx = end_idx
           self[*refs]
         end
       when NArray
-        split(indices_or_sections.to_a,axis:axis)
+        split(indices_or_sections.to_a, axis:axis)
       when Array
-        refs = [true]*ndim
+        refs = [true] * ndim
         fst = 0
         (indices_or_sections + [size_axis]).map do |lst|
           lst = size_axis if lst > size_axis
@@ -761,7 +713,7 @@ module Cumo
           self[*refs]
         end
       else
-        raise TypeError,"argument must be Integer or Array"
+        raise TypeError, "argument must be Integer or Array"
       end
     end
@@ -859,8 +811,8 @@ module Cumo
     def tile(*arg)
       arg.each do |i|
-        if !i.kind_of?(Integer) || i<1
-          raise ArgumentError,"argument should be positive integer"
+        if !i.kind_of?(Integer) || i < 1
+          raise ArgumentError, "argument should be positive integer"
         end
       end
       ns = arg.size
@@ -869,26 +821,26 @@ module Cumo
       new_shp = []
       src_shp = []
       res_shp = []
-      (nd-ns).times do
+      (nd - ns).times do
         new_shp << 1
         new_shp << (n = shp.shift)
         src_shp << :new
         src_shp << true
         res_shp << n
       end
-      (ns-nd).times do
+      (ns - nd).times do
         new_shp << (m = arg.shift)
         new_shp << 1
         src_shp << :new
         src_shp << :new
         res_shp << m
       end
-      [nd,ns].min.times do
+      [nd, ns].min.times do
         new_shp << (m = arg.shift)
         new_shp << (n = shp.shift)
         src_shp << :new
         src_shp << true
-        res_shp << n*m
+        res_shp << n * m
       end
       self.class.new(*new_shp).store(self[*src_shp]).reshape(*res_shp)
     end
@@ -918,7 +870,7 @@ module Cumo
     #   #  [3, 4],
     #   #  [3, 4]]
-    def repeat(arg,axis:nil)
+    def repeat(arg, axis:nil)
       case axis
       when Integer
         axis = check_axis(axis)
@@ -927,25 +879,25 @@ module Cumo
         c = self.flatten
         axis = 0
       else
-        raise ArgumentError,"invalid axis"
+        raise ArgumentError, "invalid axis"
       end
       case arg
       when Integer
-        if !arg.kind_of?(Integer) || arg<1
-          raise ArgumentError,"argument should be positive integer"
+        if !arg.kind_of?(Integer) || arg < 1
+          raise ArgumentError, "argument should be positive integer"
         end
-        idx = c.shape[axis].times.map{|i| [i]*arg}.flatten
+        idx = c.shape[axis].times.map { |i| [i] * arg }.flatten
       else
         arg = arg.to_a
         if arg.size != c.shape[axis]
-          raise ArgumentError,"repeat size shoud be equal to size along axis"
+          raise ArgumentError, "repeat size shoud be equal to size along axis"
         end
         arg.each do |i|
-          if !i.kind_of?(Integer) || i<0
-            raise ArgumentError,"argument should be non-negative integer"
+          if !i.kind_of?(Integer) || i < 0
+            raise ArgumentError, "argument should be non-negative integer"
           end
         end
-        idx = arg.each_with_index.map{|a,i| [i]*a}.flatten
+        idx = arg.each_with_index.map { |a, i| [i] * a }.flatten
       end
       ref = [true] * c.ndim
       ref[axis] = idx
@@ -980,27 +932,27 @@ module Cumo
     #   # Cumo::DFloat#shape=[1,4]
     #   # [[-1, 2, 0, -2]]
-    def diff(n=1,axis:-1)
+    def diff(n=1, axis:-1)
       axis = check_axis(axis)
       if n < 0 || n >= shape[axis]
-        raise ShapeError,"n=#{n} is invalid for shape[#{axis}]=#{shape[axis]}"
+        raise ShapeError, "n=#{n} is invalid for shape[#{axis}]=#{shape[axis]}"
       end
       # calculate polynomial coefficient
-      c = self.class[-1,1]
+      c = self.class[-1, 1]
       2.upto(n) do |i|
-        x = self.class.zeros(i+1)
+        x = self.class.zeros(i + 1)
         x[0..-2] = c
-        y = self.class.zeros(i+1)
+        y = self.class.zeros(i + 1)
         y[1..-1] = c
         c = y - x
       end
-      s = [true]*ndim
+      s = [true] * ndim
       s[axis] = n..-1
       result = self[*s].dup
       sum = result.inplace
-      (n-1).downto(0) do |i|
-        s = [true]*ndim
-        s[axis] = i..-n-1+i
+      (n - 1).downto(0) do |i|
+        s = [true] * ndim
+        s[axis] = i..-n - 1 + i
         sum + self[*s] * c[i] # inplace addition
       end
       return result
@@ -1020,11 +972,11 @@ module Cumo
         raise NArray::ShapeError, "must be >= 2-dimensional array"
       end
       if contiguous?
-        *shp,m,n = shape
-        idx = tril_indices(k-1)
-        reshape!(*shp,m*n)
-        self[false,idx] = 0
-        reshape!(*shp,m,n)
+        *shp, m, n = shape
+        idx = tril_indices(k - 1)
+        reshape!(*shp, m * n)
+        self[false, idx] = 0
+        reshape!(*shp, m, n)
       else
         store(triu(k))
       end
@@ -1035,15 +987,15 @@ module Cumo
       if ndim < 2
         raise NArray::ShapeError, "must be >= 2-dimensional array"
       end
-      m,n = shape[-2..-1]
-      NArray.triu_indices(m,n,k)
+      m, n = shape[-2..-1]
+      NArray.triu_indices(m, n, k)
     end
     # Return the indices for the uppler-triangle on and above the k-th diagonal.
-    def self.triu_indices(m,n,k=0)
-      x = Cumo::Int64.new(m,1).seq + k
-      y = Cumo::Int64.new(1,n).seq
-      (x<=y).where
+    def self.triu_indices(m, n, k=0)
+      x = Cumo::Int64.new(m, 1).seq + k
+      y = Cumo::Int64.new(1, n).seq
+      (x <= y).where
     end
     # Lower triangular matrix.
@@ -1059,11 +1011,11 @@ module Cumo
         raise NArray::ShapeError, "must be >= 2-dimensional array"
       end
       if contiguous?
-        idx = triu_indices(k+1)
-        *shp,m,n = shape
-        reshape!(*shp,m*n)
-        self[false,idx] = 0
-        reshape!(*shp,m,n)
+        idx = triu_indices(k + 1)
+        *shp, m, n = shape
+        reshape!(*shp, m * n)
+        self[false, idx] = 0
+        reshape!(*shp, m, n)
       else
         store(tril(k))
       end
@@ -1074,15 +1026,15 @@ module Cumo
       if ndim < 2
         raise NArray::ShapeError, "must be >= 2-dimensional array"
       end
-      m,n = shape[-2..-1]
-      NArray.tril_indices(m,n,k)
+      m, n = shape[-2..-1]
+      NArray.tril_indices(m, n, k)
     end
     # Return the indices for the lower-triangle on and below the k-th diagonal.
-    def self.tril_indices(m,n,k=0)
-      x = Cumo::Int64.new(m,1).seq + k
-      y = Cumo::Int64.new(1,n).seq
-      (x>=y).where
+    def self.tril_indices(m, n, k=0)
+      x = Cumo::Int64.new(m, 1).seq + k
+      y = Cumo::Int64.new(1, n).seq
+      (x >= y).where
     end
     # Return the k-th diagonal indices.
@@ -1090,22 +1042,22 @@ module Cumo
       if ndim < 2
         raise NArray::ShapeError, "must be >= 2-dimensional array"
       end
-      m,n = shape[-2..-1]
-      NArray.diag_indices(m,n,k)
+      m, n = shape[-2..-1]
+      NArray.diag_indices(m, n, k)
     end
     # Return the k-th diagonal indices.
-    def self.diag_indices(m,n,k=0)
-      x = Cumo::Int64.new(m,1).seq + k
-      y = Cumo::Int64.new(1,n).seq
+    def self.diag_indices(m, n, k=0)
+      x = Cumo::Int64.new(m, 1).seq + k
+      y = Cumo::Int64.new(1, n).seq
       (x.eq y).where
     end
     # Return a matrix whose diagonal is constructed by self along the last axis.
     def diag(k=0)
-      *shp,n = shape
+      *shp, n = shape
       n += k.abs
-      a = self.class.zeros(*shp,n,n)
+      a = self.class.zeros(*shp, n, n)
       a.diagonal(k).store(self)
       a
     end
@@ -1120,8 +1072,8 @@ module Cumo
     # @param axis [Array] (optional, default=[-2,-1]) diagonal axis
     # @param nan [Bool] (optional, default=false) nan-aware algorithm, i.e., if true then it ignores nan.
-    def trace(offset=nil,axis=nil,nan:false)
-      diagonal(offset,axis).sum(nan:nan,axis:-1)
+    def trace(offset=nil, axis=nil, nan:false)
+      diagonal(offset, axis).sum(nan:nan, axis:-1)
     end
@@ -1164,20 +1116,20 @@ module Cumo
           when 0
             b.mulsum(self, axis:-2)
           when 1
-            self[true,:new].mulsum(b, axis:-2)
+            self[true, :new].mulsum(b, axis:-2)
           else
             unless @@warn_slow_dot
               nx = 200
               ns = 200000
-              am,an = shape[-2..-1]
-              bm,bn = b.shape[-2..-1]
+              am, an = shape[-2..-1]
+              bm, bn = b.shape[-2..-1]
               if am > nx && an > nx && bm > nx && bn > nx &&
                   size > ns && b.size > ns
                 @@warn_slow_dot = true
                 warn "\nwarning: matrix dot for #{t} is slow. Consider SFloat, DFloat, SComplex, or DComplex to use cuBLAS.\n\n"
               end
             end
-            self[false,:new].mulsum(b[false,:new,true,true], axis:-2)
+            self[false, :new].mulsum(b[false, :new, true, true], axis:-2)
           end
         end
       end
@@ -1217,17 +1169,17 @@ module Cumo
     def outer(b, axis:nil)
       b = NArray.cast(b)
       if axis.nil?
-        self[false,:new] * ((b.ndim==0) ? b : b[false,:new,true])
+        self[false, :new] * ((b.ndim == 0) ? b : b[false, :new, true])
       else
-        md,nd = [ndim,b.ndim].minmax
+        md, nd = [ndim, b.ndim].minmax
         axis = check_axis(axis) - nd
         if axis < -md
-          raise ArgumentError,"axis=#{axis} is out of range"
+          raise ArgumentError, "axis=#{axis} is out of range"
         end
-        adim = [true]*ndim
-        adim[axis+ndim+1,0] = :new
-        bdim = [true]*b.ndim
-        bdim[axis+b.ndim,0] = :new
+        adim = [true] * ndim
+        adim[axis + ndim + 1, 0] = :new
+        bdim = [true] * b.ndim
+        bdim[axis + b.ndim, 0] = :new
         self[*adim] * b[*bdim]
       end
     end
@@ -1259,9 +1211,9 @@ module Cumo
       ndb = b.ndim
       shpa = shape
       shpb = b.shape
-      adim = [:new]*(2*[ndb-nda,0].max) + [true,:new]*nda
-      bdim = [:new]*(2*[nda-ndb,0].max) + [:new,true]*ndb
-      shpr = (-[nda,ndb].max..-1).map{|i| (shpa[i]||1) * (shpb[i]||1)}
+      adim = [:new] * (2 * [ndb - nda, 0].max) + [true, :new] * nda
+      bdim = [:new] * (2 * [nda - ndb, 0].max) + [:new, true] * ndb
+      shpr = (-[nda, ndb].max..-1).map { |i| (shpa[i] || 1) * (shpb[i] || 1) }
       (self[*adim] * b[*bdim]).reshape(*shpr)
     end
@@ -1269,7 +1221,7 @@ module Cumo
     # under construction
     def cov(y=nil, ddof:1, fweights:nil, aweights:nil)
       if y
-        m = NArray.vstack([self,y])
+        m = NArray.vstack([self, y])
       else
         m = self
       end
@@ -1280,7 +1232,7 @@ module Cumo
       end
       if aweights
         a = aweights
-        w = w ? w*a : a
+        w = w ? w * a : a
       end
       if w
         w_sum = w.sum(axis:-1, keepdims:true)
@@ -1289,23 +1241,23 @@ module Cumo
         elsif aweights.nil?
           fact = w_sum - ddof
         else
-          wa_sum = (w*a).sum(axis:-1, keepdims:true)
+          wa_sum = (w * a).sum(axis:-1, keepdims:true)
           fact = w_sum - ddof * wa_sum / w_sum
         end
         if (fact <= 0).any?
-          raise StandardError,"Degrees of freedom <= 0 for slice"
+          raise StandardError, "Degrees of freedom <= 0 for slice"
         end
       else
         fact = m.shape[-1] - ddof
       end
       if w
-        m -= (m*w).sum(axis:-1, keepdims:true) / w_sum
-        mw = m*w
+        m -= (m * w).sum(axis:-1, keepdims:true) / w_sum
+        mw = m * w
       else
         m -= m.mean(axis:-1, keepdims:true)
         mw = m
       end
-      mt = (m.ndim < 2) ? m : m.swapaxes(-2,-1)
+      mt = (m.ndim < 2) ? m : m.swapaxes(-2, -1)
       mw.dot(mt.conj) / fact
     end
@@ -1313,15 +1265,15 @@ module Cumo
     # @!visibility private
     def check_axis(axis)
-      unless Integer===axis
-        raise ArgumentError,"axis=#{axis} must be Integer"
+      unless Integer === axis
+        raise ArgumentError, "axis=#{axis} must be Integer"
       end
       a = axis
       if a < 0
         a += ndim
       end
       if a < 0 || a >= ndim
-        raise ArgumentError,"axis=#{axis} is invalid"
+        raise ArgumentError, "axis=#{axis} is invalid"
       end
       a
     end