RubyGems - cumo - Versions diffs - 0.4.3 → 0.5.1 - Mend

cumo 0.4.3 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

checksums.yaml +4 -4
data/.gitignore +3 -0
data/.rubocop.yml +15 -0
data/.rubocop_todo.yml +1252 -0
data/3rd_party/mkmf-cu/Gemfile +2 -0
data/3rd_party/mkmf-cu/Rakefile +2 -1
data/3rd_party/mkmf-cu/bin/mkmf-cu-nvcc +2 -0
data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +43 -7
data/3rd_party/mkmf-cu/lib/mkmf-cu/nvcc.rb +51 -45
data/3rd_party/mkmf-cu/lib/mkmf-cu.rb +2 -0
data/3rd_party/mkmf-cu/mkmf-cu.gemspec +3 -1
data/3rd_party/mkmf-cu/test/test_mkmf-cu.rb +5 -3
data/CHANGELOG.md +85 -0
data/Dockerfile +34 -0
data/Gemfile +6 -1
data/README.md +2 -10
data/Rakefile +8 -11
data/bench/broadcast_fp32.rb +28 -26
data/bench/cumo_bench.rb +18 -16
data/bench/numo_bench.rb +18 -16
data/bench/reduction_fp32.rb +14 -12
data/bin/console +1 -0
data/cumo.gemspec +6 -9
data/docker-build.sh +4 -0
data/docker-launch.sh +4 -0
data/docs/src-tree.md +1 -1
data/ext/cumo/cuda/cudnn.c +2 -2
data/ext/cumo/cuda/cudnn_impl.cpp +25 -3
data/ext/cumo/cuda/driver.c +8 -0
data/ext/cumo/cumo.c +7 -3
data/ext/cumo/depend.erb +15 -13
data/ext/cumo/extconf.rb +33 -47
data/ext/cumo/include/cumo/cuda/cudnn.h +3 -1
data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +13 -6
data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +3 -3
data/ext/cumo/include/cumo/intern.h +1 -0
data/ext/cumo/include/cumo/narray.h +13 -1
data/ext/cumo/include/cumo/template.h +2 -4
data/ext/cumo/include/cumo/types/complex_macro.h +1 -1
data/ext/cumo/include/cumo/types/complex_macro_kernel.h +15 -4
data/ext/cumo/include/cumo/types/float_macro.h +2 -2
data/ext/cumo/include/cumo/types/real_accum_kernel.h +15 -4
data/ext/cumo/include/cumo/types/xint_macro.h +3 -2
data/ext/cumo/include/cumo/types/xint_macro_kernel.h +11 -3
data/ext/cumo/include/cumo.h +2 -2
data/ext/cumo/narray/array.c +8 -6
data/ext/cumo/narray/data.c +48 -28
data/ext/cumo/narray/gen/cogen.rb +8 -7
data/ext/cumo/narray/gen/cogen_kernel.rb +8 -7
data/ext/cumo/narray/gen/def/bit.rb +3 -1
data/ext/cumo/narray/gen/def/dcomplex.rb +2 -0
data/ext/cumo/narray/gen/def/dfloat.rb +2 -0
data/ext/cumo/narray/gen/def/int16.rb +2 -0
data/ext/cumo/narray/gen/def/int32.rb +2 -0
data/ext/cumo/narray/gen/def/int64.rb +2 -0
data/ext/cumo/narray/gen/def/int8.rb +2 -0
data/ext/cumo/narray/gen/def/robject.rb +2 -0
data/ext/cumo/narray/gen/def/scomplex.rb +2 -0
data/ext/cumo/narray/gen/def/sfloat.rb +2 -0
data/ext/cumo/narray/gen/def/uint16.rb +2 -0
data/ext/cumo/narray/gen/def/uint32.rb +2 -0
data/ext/cumo/narray/gen/def/uint64.rb +2 -0
data/ext/cumo/narray/gen/def/uint8.rb +2 -0
data/ext/cumo/narray/gen/erbln.rb +9 -7
data/ext/cumo/narray/gen/erbpp2.rb +26 -24
data/ext/cumo/narray/gen/narray_def.rb +13 -11
data/ext/cumo/narray/gen/spec.rb +58 -55
data/ext/cumo/narray/gen/tmpl/accum.c +2 -2
data/ext/cumo/narray/gen/tmpl/accum_binary.c +1 -1
data/ext/cumo/narray/gen/tmpl/alloc_func.c +1 -1
data/ext/cumo/narray/gen/tmpl/aref.c +18 -18
data/ext/cumo/narray/gen/tmpl/aset.c +16 -16
data/ext/cumo/narray/gen/tmpl/at.c +34 -0
data/ext/cumo/narray/gen/tmpl/batch_norm.c +5 -2
data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +6 -3
data/ext/cumo/narray/gen/tmpl/bincount.c +7 -7
data/ext/cumo/narray/gen/tmpl/clip.c +11 -15
data/ext/cumo/narray/gen/tmpl/conv.c +1 -1
data/ext/cumo/narray/gen/tmpl/conv_grad_w.c +3 -1
data/ext/cumo/narray/gen/tmpl/conv_transpose.c +1 -1
data/ext/cumo/narray/gen/tmpl/cum.c +1 -1
data/ext/cumo/narray/gen/tmpl/each.c +4 -2
data/ext/cumo/narray/gen/tmpl/each_with_index.c +5 -2
data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +5 -2
data/ext/cumo/narray/gen/tmpl/init_class.c +1 -0
data/ext/cumo/narray/gen/tmpl/logseq.c +6 -5
data/ext/cumo/narray/gen/tmpl/map_with_index.c +5 -6
data/ext/cumo/narray/gen/tmpl/median.c +2 -2
data/ext/cumo/narray/gen/tmpl/minmax.c +1 -1
data/ext/cumo/narray/gen/tmpl/poly.c +4 -4
data/ext/cumo/narray/gen/tmpl/pooling_backward.c +1 -1
data/ext/cumo/narray/gen/tmpl/pooling_forward.c +1 -1
data/ext/cumo/narray/gen/tmpl/qsort.c +1 -5
data/ext/cumo/narray/gen/tmpl/rand.c +8 -6
data/ext/cumo/narray/gen/tmpl/rand_norm.c +18 -16
data/ext/cumo/narray/gen/tmpl/seq.c +5 -4
data/ext/cumo/narray/gen/tmpl/sort.c +3 -3
data/ext/cumo/narray/gen/tmpl/sort_index.c +2 -2
data/ext/cumo/narray/gen/tmpl_bit/aref.c +26 -32
data/ext/cumo/narray/gen/tmpl_bit/aset.c +18 -30
data/ext/cumo/narray/gen/tmpl_bit/binary.c +42 -14
data/ext/cumo/narray/gen/tmpl_bit/bit_count.c +5 -0
data/ext/cumo/narray/gen/tmpl_bit/bit_reduce.c +5 -0
data/ext/cumo/narray/gen/tmpl_bit/mask.c +27 -7
data/ext/cumo/narray/gen/tmpl_bit/store_bit.c +21 -7
data/ext/cumo/narray/gen/tmpl_bit/unary.c +21 -7
data/ext/cumo/narray/index.c +244 -40
data/ext/cumo/narray/index_kernel.cu +84 -0
data/ext/cumo/narray/narray.c +57 -19
data/ext/cumo/narray/ndloop.c +1 -1
data/ext/cumo/narray/struct.c +1 -1
data/lib/cumo/cuda/compile_error.rb +1 -1
data/lib/cumo/cuda/compiler.rb +23 -22
data/lib/cumo/cuda/cudnn.rb +1 -1
data/lib/cumo/cuda/device.rb +1 -1
data/lib/cumo/cuda/link_state.rb +2 -2
data/lib/cumo/cuda/module.rb +1 -2
data/lib/cumo/cuda/nvrtc_program.rb +3 -2
data/lib/cumo/cuda.rb +2 -0
data/lib/cumo/linalg.rb +2 -0
data/lib/cumo/narray/extra.rb +297 -341
data/lib/cumo/narray.rb +2 -0
data/lib/cumo.rb +3 -1
data/test/bit_test.rb +157 -0
data/test/cuda/compiler_test.rb +69 -0
data/test/cuda/device_test.rb +31 -0
data/test/cuda/memory_pool_test.rb +45 -0
data/test/cuda/nvrtc_test.rb +51 -0
data/test/cuda/runtime_test.rb +28 -0
data/test/cudnn_test.rb +498 -0
data/test/cumo_test.rb +27 -0
data/test/narray_test.rb +745 -0
data/test/ractor_test.rb +52 -0
data/test/test_helper.rb +31 -0
metadata +34 -54
data/.travis.yml +0 -5
data/numo-narray-version +0 -1

data/ext/cumo/narray/gen/spec.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 def_id "cast"
 def_id "eq"
 def_id "ne"
@@ -14,8 +16,8 @@ if is_float
   def_id "copysign"
 end
 if is_int
-  def_id "<<","left_shift"
-  def_id ">>","right_shift"
+  def_id "<<", "left_shift"
+  def_id ">>", "right_shift"
 end
 if is_comparable && !is_object
   def_id "gt"
@@ -42,13 +44,13 @@ if is_object
   def_id "nan?"
   def_id "infinite?"
   def_id "finite?"
-  def_id "==","eq"
-  def_id "!=","ne"
-  def_id ">" ,"gt"
-  def_id ">=","ge"
-  def_id "<" ,"lt"
-  def_id "<=","le"
-  def_id "<=>","ufo"
+  def_id "==", "eq"
+  def_id "!=", "ne"
+  def_id ">" , "gt"
+  def_id ">=", "ge"
+  def_id "<" , "lt"
+  def_id "<=", "le"
+  def_id "<=>", "ufo"
 end
 if (is_float || is_complex) && !is_object
   def_id "gemm"
@@ -119,18 +121,18 @@ def_method "store" do
   store_numeric
   store_from "Bit"
   if is_complex
-    store_from "DComplex","cumo_dcomplex","m_from_dcomplex"
-    store_from "SComplex","cumo_scomplex","m_from_scomplex"
+    store_from "DComplex", "cumo_dcomplex", "m_from_dcomplex"
+    store_from "SComplex", "cumo_scomplex", "m_from_scomplex"
   end
-  store_from "DFloat","double",   "m_from_real"
-  store_from "SFloat","float",    "m_from_real"
+  store_from "DFloat", "double",   "m_from_real"
+  store_from "SFloat", "float",    "m_from_real"
   store_from "Int64", "int64_t",  "m_from_int64"
   store_from "Int32", "int32_t",  "m_from_int32"
   store_from "Int16", "int16_t",  "m_from_sint"
   store_from "Int8",  "int8_t",   "m_from_sint"
-  store_from "UInt64","u_int64_t","m_from_uint64"
-  store_from "UInt32","u_int32_t","m_from_uint32"
-  store_from "UInt16","u_int16_t","m_from_sint"
+  store_from "UInt64", "u_int64_t", "m_from_uint64"
+  store_from "UInt32", "u_int32_t", "m_from_uint32"
+  store_from "UInt16", "u_int16_t", "m_from_sint"
   store_from "UInt8", "u_int8_t", "m_from_sint"
   store_from "RObject", "VALUE",  "m_num_to_data"
   store_array
@@ -144,6 +146,7 @@ def_singleton_method "cast"
 def_method "aref", op:"[]"
 def_method "aref_cpu"
 def_method "aset", op:"[]="
+def_method "at"
 def_method "coerce_cast"
 def_method "to_a"
@@ -167,15 +170,15 @@ if is_bit
   binary "xor", "^"
   binary "eq"
   bit_count "count_true"
-  def_alias "count_1","count_true"
-  def_alias "count","count_true"
+  def_alias "count_1", "count_true"
+  def_alias "count", "count_true"
   bit_count "count_false"
-  def_alias "count_0","count_false"
+  def_alias "count_0", "count_false"
   bit_count_cpu "count_true_cpu"
-  def_alias "count_1_cpu","count_true_cpu"
-  def_alias "count_cpu","count_true_cpu"
+  def_alias "count_1_cpu", "count_true_cpu"
+  def_alias "count_cpu", "count_true_cpu"
   bit_count_cpu "count_false_cpu"
-  def_alias "count_0_cpu","count_false_cpu"
+  def_alias "count_0_cpu", "count_false_cpu"
   bit_reduce "all?", 1
   bit_reduce "any?", 0
   def_method "none?", "none_p"
@@ -215,17 +218,17 @@ if is_complex
   unary2 "real", "rtype", "cRT"
   unary2 "imag", "rtype", "cRT"
   unary2 "arg",  "rtype", "cRT"
-  def_alias "angle","arg"
+  def_alias "angle", "arg"
   set2 "set_imag", "rtype", "cRT"
   set2 "set_real", "rtype", "cRT"
-  def_alias "imag=","set_imag"
-  def_alias "real=","set_real"
+  def_alias "imag=", "set_imag"
+  def_alias "real=", "set_real"
 else
   def_alias "conj", "view"
   def_alias "im", "view"
 end
-def_alias "conjugate","conj"
+def_alias "conjugate", "conj"
 # base_cond
@@ -278,9 +281,9 @@ if is_comparable
   cond_binary "lt"
   cond_binary "le"
   def_alias ">", "gt"
-  def_alias ">=","ge"
+  def_alias ">=", "ge"
   def_alias "<", "lt"
-  def_alias "<=","le"
+  def_alias "<=", "le"
   def_method "clip"
 end
@@ -296,32 +299,32 @@ end
 if is_int
   if is_unsigned
-    accum "sum","u_int64_t","cumo_cUInt64"
-    accum "prod","u_int64_t","cumo_cUInt64"
+    accum "sum", "u_int64_t", "cumo_cUInt64"
+    accum "prod", "u_int64_t", "cumo_cUInt64"
   else
-    accum "sum","int64_t","cumo_cInt64"
-    accum "prod","int64_t","cumo_cInt64"
+    accum "sum", "int64_t", "cumo_cInt64"
+    accum "prod", "int64_t", "cumo_cInt64"
   end
 else
-  accum "sum","dtype","cT"
-  accum "prod","dtype","cT"
+  accum "sum", "dtype", "cT"
+  accum "prod", "dtype", "cT"
 end
 if is_double_precision
-  accum "kahan_sum","dtype","cT"
+  accum "kahan_sum", "dtype", "cT"
 end
 if is_float
-  accum "mean","dtype","cT"
-  accum "stddev","rtype","cRT"
-  accum "var","rtype","cRT"
-  accum "rms","rtype","cRT"
+  accum "mean", "dtype", "cT"
+  accum "stddev", "rtype", "cRT"
+  accum "var", "rtype", "cRT"
+  accum "rms", "rtype", "cRT"
 end
 if is_comparable
-  accum "min","dtype","cT"
-  accum "max","dtype","cT"
-  accum "ptp","dtype","cT"
+  accum "min", "dtype", "cT"
+  accum "max", "dtype", "cT"
+  accum "ptp", "dtype", "cT"
   accum_index "max_index"
   accum_index "min_index"
   def_method "minmax"
@@ -333,8 +336,8 @@ if is_int && !is_object
   def_method "bincount"
 end
-cum "cumsum","add"
-cum "cumprod","mul"
+cum "cumsum", "add"
+cum "cumprod", "mul"
 # dot
 accum_binary "mulsum"
@@ -377,17 +380,17 @@ def_method "poly"
 if is_comparable && !is_object
   if is_float
-    qsort type_name,"dtype","*(dtype*)","_prnan"
-    qsort type_name,"dtype","*(dtype*)","_ignan"
+    qsort type_name, "dtype", "*(dtype*)", "_prnan"
+    qsort type_name, "dtype", "*(dtype*)", "_ignan"
   else
-    qsort type_name,"dtype","*(dtype*)"
+    qsort type_name, "dtype", "*(dtype*)"
   end
   def_method "sort"
   if is_float
-    qsort type_name+"_index","dtype*","**(dtype**)","_prnan"
-    qsort type_name+"_index","dtype*","**(dtype**)","_ignan"
+    qsort type_name + "_index", "dtype*", "**(dtype**)", "_prnan"
+    qsort type_name + "_index", "dtype*", "**(dtype**)", "_ignan"
   else
-    qsort type_name+"_index","dtype*","**(dtype**)"
+    qsort type_name + "_index", "dtype*", "**(dtype**)"
   end
   def_method "sort_index"
   def_method "median"
@@ -407,7 +410,7 @@ def_module do
   set ns_var: "cT"
   set class_name: cn
   set name: "#{nm}_math"
-  set full_module_name: fn+"::NMath"
+  set full_module_name: fn + "::NMath"
   set module_name: "Math"
   set module_var: "mTM"
@@ -433,14 +436,14 @@ def_module do
   math "atanh"
   math "sinc"
   if !is_c
-    math "atan2",2
-    math "hypot",2
+    math "atan2", 2
+    math "hypot", 2
     math "erf"
     math "erfc"
     math "log1p"
     math "expm1"
-    math "ldexp",2
-    math "frexp",1,"frexp"
+    math "ldexp", 2
+    math "frexp", 1, "frexp"
   end
 end
 end

data/ext/cumo/narray/gen/tmpl/accum.c CHANGED Viewed

@@ -56,8 +56,8 @@ static void
 <% else %>
   @overload <%=name%>(axis:nil, keepdims:false)
 <% end %>
-  @param [Numeric,Array,Range] axis (keyword) Affected dimensions.
-  @param [TrueClass] keepdims (keyword) If true, the reduced axes are left in the result array as dimensions with size one.
+  @param [Numeric,Array,Range] axis  Performs <%=name%> along the axis.
+  @param [TrueClass] keepdims  If true, the reduced axes are left in the result array as dimensions with size one.
   @return [Cumo::<%=class_name%>] returns result of <%=name%>.
 */
 static VALUE

data/ext/cumo/narray/gen/tmpl/accum_binary.c CHANGED Viewed

@@ -91,7 +91,7 @@ static VALUE
   @overload <%=op_map%>(other, axis:nil, keepdims:false)
 <% end %>
   @param [Cumo::NArray,Numeric] other
-  @param [Numeric,Array,Range] axis (keyword) Affected dimensions.
+  @param [Numeric,Array,Range] axis  Performs <%=name%> along the axis.
   @param [TrueClass] keepdims (keyword) If true, the reduced axes are left in the result array as dimensions with size one.
 <% if is_float %>
   @param [TrueClass] nan (keyword) If true, apply NaN-aware algorithm (avoid NaN if exists).

data/ext/cumo/narray/gen/tmpl/alloc_func.c CHANGED Viewed

@@ -85,7 +85,7 @@ static const rb_data_type_t <%=type_name%>_data_type = {
     {0, <%=type_name%>_free, <%=type_name%>_memsize,},
     &cumo_na_data_type,
     &<%=type_name%>_info,
-    0, // flags
+    RUBY_TYPED_FROZEN_SHAREABLE, // flags
 };
 <% end %>

data/ext/cumo/narray/gen/tmpl/aref.c CHANGED Viewed

@@ -21,35 +21,35 @@ static VALUE
   @example
       a = Cumo::DFloat.new(4,5).seq
-      => Cumo::DFloat#shape=[4,5]
-      [[0, 1, 2, 3, 4],
-       [5, 6, 7, 8, 9],
-       [10, 11, 12, 13, 14],
-       [15, 16, 17, 18, 19]]
+      # => Cumo::DFloat#shape=[4,5]
+      # [[0, 1, 2, 3, 4],
+      #  [5, 6, 7, 8, 9],
+      #  [10, 11, 12, 13, 14],
+      #  [15, 16, 17, 18, 19]]
       a[7]
-      => Cumo::DFloat#shape=[]
-      6.0
+      # => Cumo::DFloat#shape=[]
+      # 6.0
       a[1,1]
-      => Cumo::DFloat#shape=[]
-      6.0
+      # => Cumo::DFloat#shape=[]
+      # 6.0
       a[1..3,1]
-      => Cumo::DFloat#shape=[3]
-      [6, 11, 16]
+      # => Cumo::DFloat#shape=[3]
+      # [6, 11, 16]
       a[1,[1,3,4]]
-      => Cumo::DFloat#shape=[3]
-      [6, 8, 9]
+      # => Cumo::DFloat#shape=[3]
+      # [6, 8, 9]
       a[true,2].fill(99)
       a
-      => Cumo::DFloat#shape=[4,5]
-      [[0, 1, 99, 3, 4],
-       [5, 6, 99, 8, 9],
-       [10, 11, 99, 13, 14],
-       [15, 16, 99, 18, 19]]
+      # => Cumo::DFloat#shape=[4,5]
+      # [[0, 1, 99, 3, 4],
+      #  [5, 6, 99, 8, 9],
+      #  [10, 11, 99, 13, 14],
+      #  [15, 16, 99, 18, 19]]
  */
 static VALUE
 <%=c_func(-1)%>(int argc, VALUE *argv, VALUE self)

data/ext/cumo/narray/gen/tmpl/aset.c CHANGED Viewed

@@ -10,31 +10,31 @@
   @example
       a = Cumo::DFloat.new(3,4).seq
-      => Cumo::DFloat#shape=[3,4]
-      [[0, 1, 2, 3],
-       [4, 5, 6, 7],
-       [8, 9, 10, 11]]
+      # => Cumo::DFloat#shape=[3,4]
+      # [[0, 1, 2, 3],
+      #  [4, 5, 6, 7],
+      #  [8, 9, 10, 11]]
       a[1,2]=99
       a
-      => Cumo::DFloat#shape=[3,4]
-      [[0, 1, 2, 3],
-       [4, 5, 99, 7],
-       [8, 9, 10, 11]]
+      # => Cumo::DFloat#shape=[3,4]
+      # [[0, 1, 2, 3],
+      #  [4, 5, 99, 7],
+      #  [8, 9, 10, 11]]
       a[1,[0,2]] = [101,102]
       a
-      => Cumo::DFloat#shape=[3,4]
-      [[0, 1, 2, 3],
-       [101, 5, 102, 7],
-       [8, 9, 10, 11]]
+      # => Cumo::DFloat#shape=[3,4]
+      # [[0, 1, 2, 3],
+      #  [101, 5, 102, 7],
+      #  [8, 9, 10, 11]]
       a[1,true]=99
       a
-      => Cumo::DFloat#shape=[3,4]
-      [[0, 1, 2, 3],
-       [99, 99, 99, 99],
-       [8, 9, 10, 11]]
+      # => Cumo::DFloat#shape=[3,4]
+      # [[0, 1, 2, 3],
+      #  [99, 99, 99, 99],
+      #  [8, 9, 10, 11]]
 */
 static VALUE

data/ext/cumo/narray/gen/tmpl/at.c ADDED Viewed

@@ -0,0 +1,34 @@
+/*
+  Multi-dimensional array indexing.
+  Same as [] for one-dimensional NArray.
+  Similar to numpy's tuple indexing, i.e., `a[[1,2,..],[3,4,..]]`
+  @overload at(*indices)
+  @param [Numeric,Range,etc] *indices  Multi-dimensional Index Arrays.
+  @return [Cumo::NArray::<%=class_name%>] one-dimensional NArray view.
+  @example
+      x = Cumo::DFloat.new(3,3,3).seq
+      => Cumo::DFloat#shape=[3,3,3]
+       [[[0, 1, 2],
+         [3, 4, 5],
+         [6, 7, 8]],
+        [[9, 10, 11],
+         [12, 13, 14],
+         [15, 16, 17]],
+        [[18, 19, 20],
+         [21, 22, 23],
+         [24, 25, 26]]]
+      x.at([0,1,2],[0,1,2],[-1,-2,-3])
+      => Cumo::DFloat(view)#shape=[3]
+       [2, 13, 24]
+ */
+static VALUE
+<%=c_func(-1)%>(int argc, VALUE *argv, VALUE self)
+{
+    int result_nd;
+    size_t pos;
+    result_nd = cumo_na_get_result_dimension(self, argc, argv, sizeof(dtype), &pos);
+    return cumo_na_at_main(argc, argv, self, 0, result_nd, pos);
+}

data/ext/cumo/narray/gen/tmpl/batch_norm.c CHANGED Viewed

@@ -157,8 +157,11 @@ static VALUE
     status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
     if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_ERROR;
+    status = cudnnCreateTensorDescriptor(&bn_desc);
+    if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_ERROR;
     mode = cumo_cuda_cudnn_GetBatchNormMode(axis_ndim, int_axis);
-    status = cumo_cuda_cudnn_CreateBNTensorDescriptor(&bn_desc, x_desc, mode);
+    status = cudnnDeriveBNTensorDescriptor(bn_desc, x_desc, mode);
     if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_ERROR;
     // TODO: bn_desc may return another type, and may need to cast gamma, beta, mean, var
@@ -193,7 +196,7 @@ BATCH_NORM_ERROR:
 }
 #else // CUDNN_FOUND
-VALUE cumo_cuda_eCUDNNError;
+#include "cumo/cuda/cudnn.h"
 static VALUE
 <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)

data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c CHANGED Viewed

@@ -134,8 +134,11 @@ static VALUE
     status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
     if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
+    status = cudnnCreateTensorDescriptor(&bn_desc);
+    if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
     mode = cumo_cuda_cudnn_GetBatchNormMode(axis_ndim, int_axis);
-    status = cumo_cuda_cudnn_CreateBNTensorDescriptor(&bn_desc, x_desc, mode);
+    status = cudnnDeriveBNTensorDescriptor(bn_desc, x_desc, mode);
     if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
     // TODO: bn_desc may return another type, and may need to cast gamma, gy, mean, var
@@ -178,11 +181,11 @@ BATCH_NORM_BACKWARD_ERROR:
 }
 #else // CUDNN_FOUND
-VALUE cumo_cuda_eCudnnError;
+#include "cumo/cuda/cudnn.h"
 static VALUE
 <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)
 {
-    rb_raise(cumo_cuda_eCudnnError, "cuDNN is not available");
+    rb_raise(cumo_cuda_eCUDNNError, "cuDNN is not available");
 }
 #endif // CUDNN_FOUND

data/ext/cumo/narray/gen/tmpl/bincount.c CHANGED Viewed

@@ -116,22 +116,22 @@ static VALUE
     otherwise returns UInt32 or UInt64 depending on the size along last axis.
   @example
     Cumo::Int32[0..4].bincount
-    => Cumo::UInt32#shape=[5]
-       [1, 1, 1, 1, 1]
+    # => Cumo::UInt32#shape=[5]
+    #    [1, 1, 1, 1, 1]
     Cumo::Int32[0, 1, 1, 3, 2, 1, 7].bincount
-    => Cumo::UInt32#shape=[8]
-       [1, 3, 1, 1, 0, 0, 0, 1]
+    # => Cumo::UInt32#shape=[8]
+    #    [1, 3, 1, 1, 0, 0, 0, 1]
     x = Cumo::Int32[0, 1, 1, 3, 2, 1, 7, 23]
     x.bincount.size == x.max+1
-    => true
+    # => true
     w = Cumo::DFloat[0.3, 0.5, 0.2, 0.7, 1.0, -0.6]
     x = Cumo::Int32[0, 1, 1, 2, 2, 2]
     x.bincount(w)
-    => Cumo::DFloat#shape=[3]
-       [0.3, 0.7, 1.1]
+    # => Cumo::DFloat#shape=[3]
+    #    [0.3, 0.7, 1.1]
 */
 static VALUE

data/ext/cumo/narray/gen/tmpl/clip.c CHANGED Viewed

@@ -75,28 +75,24 @@ static void
   @example
       a = Cumo::Int32.new(10).seq
-      p a.clip(1,8)
-      # Cumo::Int32#shape=[10]
-      # [1, 1, 2, 3, 4, 5, 6, 7, 8, 8]
-      p a
-      # Cumo::Int32#shape=[10]
+      # => Cumo::Int32#shape=[10]
       # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
-      p a.inplace.clip(3,6)
-      # Cumo::Int32(view)#shape=[10]
-      # [3, 3, 3, 3, 4, 5, 6, 6, 6, 6]
+      a.clip(1,8)
+      # => Cumo::Int32#shape=[10]
+      # [1, 1, 2, 3, 4, 5, 6, 7, 8, 8]
-      p a
-      # Cumo::Int32#shape=[10]
+      a.inplace.clip(3,6)
+      a
+      # => Cumo::Int32#shape=[10]
       # [3, 3, 3, 3, 4, 5, 6, 6, 6, 6]
-      p a = Cumo::Int32.new(10).seq
-      # Cumo::Int32#shape=[10]
+      b = Cumo::Int32.new(10).seq
+      # => Cumo::Int32#shape=[10]
       # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
-      p a.clip([3,4,1,1,1,4,4,4,4,4], 8)
-      # Cumo::Int32#shape=[10]
+      b.clip([3,4,1,1,1,4,4,4,4,4], 8)
+      # => Cumo::Int32#shape=[10]
       # [3, 4, 2, 3, 4, 5, 6, 7, 8, 8]
 */
 static VALUE

data/ext/cumo/narray/gen/tmpl/conv.c CHANGED Viewed

@@ -206,7 +206,7 @@ CONV_ERROR:
 }
 #else // CUDNN_FOUND
-VALUE cumo_cuda_eCUDNNError;
+#include "cumo/cuda/cudnn.h"
 static VALUE
 <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)

data/ext/cumo/narray/gen/tmpl/conv_grad_w.c CHANGED Viewed

@@ -95,6 +95,7 @@ static VALUE
     CUMO_CUDA_CUDNN_CHECK_DIM_EQ(sizet_w_shape[0], ngy->shape[1]);
     CUMO_CUDA_CUDNN_CHECK_DIM_EQ(sizet_w_shape[1], nx->shape[1]);
+#if !defined(NDEBUG)
     {
         // shape check of gy
         size_t *y_shape = ngy->shape;
@@ -105,6 +106,7 @@ static VALUE
                     x_shape[i + 2], sizet_w_shape[i + 2], int_stride[i], int_pad[i]));
         }
     }
+#endif
     x_cont = cumo_na_as_contiguous_array(x);
     gy_cont = cumo_na_as_contiguous_array(gy);
@@ -173,7 +175,7 @@ CONV_GRAD_W_ERROR:
 }
 #else // CUDNN_FOUND
-VALUE cumo_cuda_eCUDNNError;
+#include "cumo/cuda/cudnn.h"
 static VALUE
 <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)

data/ext/cumo/narray/gen/tmpl/conv_transpose.c CHANGED Viewed

@@ -234,7 +234,7 @@ CONV_TRANSPOSE_ERROR:
 }
 #else // CUDNN_FOUND
-VALUE cumo_cuda_eCUDNNError;
+#include "cumo/cuda/cudnn.h"
 static VALUE
 <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)

data/ext/cumo/narray/gen/tmpl/cum.c CHANGED Viewed

@@ -30,7 +30,7 @@ static void
 /*
   <%=name%> of self.
   @overload <%=name%>(axis:nil, nan:false)
-  @param [Numeric,Array,Range] axis  Affected dimensions.
+  @param [Numeric,Array,Range] axis  Performs <%=name%> along the axis.
   @param [TrueClass] nan  If true, apply NaN-aware algorithm (avoid NaN if exists).
   @return [Cumo::<%=class_name%>] <%=name%> of self.
 */

data/ext/cumo/narray/gen/tmpl/each.c CHANGED Viewed

@@ -34,8 +34,10 @@ static void
   passing that element as a parameter.
   @overload <%=name%>
   @return [Cumo::NArray] self
-  For a block {|x| ... }
-  @yield [x]  x is element of NArray.
+  For a block `{|x| ... }`,
+  @yieldparam [Numeric] x  an element of NArray.
+  @see #each_with_index
+  @see #map
 */
 static VALUE
 <%=c_func(0)%>(VALUE self)

data/ext/cumo/narray/gen/tmpl/each_with_index.c CHANGED Viewed

@@ -55,9 +55,12 @@ static void
   Invokes the given block once for each element of self,
   passing that element and indices along each axis as parameters.
   @overload <%=name%>
+  For a block `{|x,i,j,...| ... }`,
+  @yieldparam [Numeric] x  an element
+  @yieldparam [Integer] i,j,...  multitimensional indices
   @return [Cumo::NArray] self
-  For a block {|x,i,j,...| ... }
-  @yield [x,i,j,...]  x is an element, i,j,... are multidimensional indices.
+  @see #each
+  @see #map_with_index
 */
 static VALUE
 <%=c_func(0)%>(VALUE self)

data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c CHANGED Viewed

@@ -106,8 +106,11 @@ static VALUE
     status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
     if (status != CUDNN_STATUS_SUCCESS) goto FIXED_BATCH_NORM_ERROR;
+    status = cudnnCreateTensorDescriptor(&bn_desc);
+    if (status != CUDNN_STATUS_SUCCESS) goto FIXED_BATCH_NORM_ERROR;
     mode = cumo_cuda_cudnn_GetBatchNormMode(axis_ndim, int_axis);
-    status = cumo_cuda_cudnn_CreateBNTensorDescriptor(&bn_desc, x_desc, mode);
+    status = cudnnDeriveBNTensorDescriptor(bn_desc, x_desc, mode);
     if (status != CUDNN_STATUS_SUCCESS) goto FIXED_BATCH_NORM_ERROR;
     // TODO: bn_desc may return another type, and may need to cast gamma, beta, mean, var
@@ -139,7 +142,7 @@ FIXED_BATCH_NORM_ERROR:
 }
 #else // CUDNN_FOUND
-VALUE cumo_cuda_eCUDNNError;
+#include "cumo/cuda/cudnn.h"
 static VALUE
 <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)

data/ext/cumo/narray/gen/tmpl/init_class.c CHANGED Viewed

@@ -14,6 +14,7 @@
     rb_hash_aset(hCast, rb_cArray,   cT);
     <% for x in upcast %>
     <%= x %><% end %>
+    rb_obj_freeze(hCast);
     <% @children.each do |m| %>
     <%= m.init_def %><% end %>