RubyGems - cumo - Versions diffs - 0.5.0 → 0.5.2 - Mend

cumo 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

checksums.yaml +4 -4
data/.rubocop_todo.yml +18 -37
data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +28 -21
data/CHANGELOG.md +28 -0
data/Dockerfile +34 -0
data/cumo.gemspec +1 -1
data/docker-build.sh +4 -0
data/docker-launch.sh +4 -0
data/docs/src-tree.md +1 -1
data/ext/cumo/cuda/cudnn_impl.cpp +25 -3
data/ext/cumo/cuda/driver.c +8 -0
data/ext/cumo/depend.erb +1 -1
data/ext/cumo/extconf.rb +1 -1
data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +14 -7
data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +3 -3
data/ext/cumo/include/cumo/narray.h +2 -0
data/ext/cumo/include/cumo/types/complex.h +2 -2
data/ext/cumo/include/cumo/types/complex_macro_kernel.h +15 -4
data/ext/cumo/include/cumo/types/real_accum_kernel.h +15 -4
data/ext/cumo/include/cumo/types/xint_macro_kernel.h +11 -3
data/ext/cumo/include/cumo.h +2 -2
data/ext/cumo/narray/array.c +5 -3
data/ext/cumo/narray/data.c +25 -26
data/ext/cumo/narray/gen/tmpl/accum.c +2 -2
data/ext/cumo/narray/gen/tmpl/accum_binary.c +1 -1
data/ext/cumo/narray/gen/tmpl/alloc_func.c +4 -1
data/ext/cumo/narray/gen/tmpl/allocate.c +1 -0
data/ext/cumo/narray/gen/tmpl/aref.c +18 -18
data/ext/cumo/narray/gen/tmpl/aset.c +16 -16
data/ext/cumo/narray/gen/tmpl/batch_norm.c +4 -1
data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +4 -1
data/ext/cumo/narray/gen/tmpl/bincount.c +7 -7
data/ext/cumo/narray/gen/tmpl/clip.c +11 -15
data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +12 -12
data/ext/cumo/narray/gen/tmpl/cum.c +1 -1
data/ext/cumo/narray/gen/tmpl/each.c +4 -2
data/ext/cumo/narray/gen/tmpl/each_with_index.c +5 -2
data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +4 -1
data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +12 -12
data/ext/cumo/narray/gen/tmpl/logseq.c +6 -5
data/ext/cumo/narray/gen/tmpl/map_with_index.c +5 -6
data/ext/cumo/narray/gen/tmpl/median.c +2 -2
data/ext/cumo/narray/gen/tmpl/minmax.c +1 -1
data/ext/cumo/narray/gen/tmpl/poly.c +4 -4
data/ext/cumo/narray/gen/tmpl/rand.c +8 -6
data/ext/cumo/narray/gen/tmpl/rand_norm.c +18 -16
data/ext/cumo/narray/gen/tmpl/seq.c +5 -4
data/ext/cumo/narray/gen/tmpl/sort.c +2 -2
data/ext/cumo/narray/gen/tmpl/sort_index.c +2 -2
data/ext/cumo/narray/gen/tmpl_bit/allocate.c +1 -0
data/ext/cumo/narray/gen/tmpl_bit/aref.c +26 -32
data/ext/cumo/narray/gen/tmpl_bit/aset.c +18 -30
data/ext/cumo/narray/index.c +1 -1
data/ext/cumo/narray/narray.c +116 -21
data/lib/cumo/narray/extra.rb +160 -156
data/test/cuda/device_test.rb +2 -1
data/test/cudnn_test.rb +2 -2
data/test/narray_test.rb +80 -0
data/test/ractor_test.rb +5 -3
metadata +5 -2

data/ext/cumo/include/cumo/types/xint_macro_kernel.h CHANGED Viewed

@@ -70,18 +70,26 @@ __host__ __device__ static inline dtype f_minimum(dtype x, dtype y)
 /* --------- thrust ----------------- */
 #include "cumo/cuda/cumo_thrust.hpp"
-struct cumo_thrust_plus : public thrust::binary_function<dtype, dtype, dtype>
+struct cumo_thrust_plus
 {
+    using first_argument_type  = dtype;
+    using second_argument_type = dtype;
+    using result_type          = dtype;
     __host__ __device__ dtype operator()(dtype x, dtype y) { return m_add(x,y); }
 };
-struct cumo_thrust_multiplies : public thrust::binary_function<dtype, dtype, dtype>
+struct cumo_thrust_multiplies
 {
+    using first_argument_type  = dtype;
+    using second_argument_type = dtype;
+    using result_type          = dtype;
     __host__ __device__ dtype operator()(dtype x, dtype y) { return m_mul(x,y); }
 };
-struct cumo_thrust_square : public thrust::unary_function<dtype, dtype>
+struct cumo_thrust_square
 {
+    using argument_type = dtype;
+    using result_type   = dtype;
     __host__ __device__ rtype operator()(const dtype& x) const { return m_square(x); }
 };

data/ext/cumo/include/cumo.h CHANGED Viewed

@@ -10,8 +10,8 @@ extern "C" {
 #endif
 #endif
-#define CUMO_VERSION "0.5.0"
-#define CUMO_VERSION_CODE 50
+#define CUMO_VERSION "0.5.2"
+#define CUMO_VERSION_CODE 51
 bool cumo_compatible_mode_enabled_p();
 bool cumo_show_warning_enabled_p();

data/ext/cumo/narray/array.c CHANGED Viewed

@@ -466,11 +466,13 @@ cumo_na_s_array_shape(VALUE mod, VALUE ary)
   @return [Cumo::NArray]
   @example
     Cumo::NArray.new_like([[1,2,3],[4,5,6]])
-    => Cumo::Int32#shape=[2,3](empty)
+    # => Cumo::Int32#shape=[2,3](empty)
     Cumo::DFloat.new_like([[1,2],[3,4]])
-    => Cumo::DFloat#shape=[2,2](empty)
+    # => Cumo::DFloat#shape=[2,2](empty)
     Cumo::NArray.new_like([1,2i,3])
-    => Cumo::DComplex#shape=[3](empty)
+    # => Cumo::DComplex#shape=[3](empty)
 */
 VALUE
 cumo_na_s_new_like(VALUE type, VALUE obj)

data/ext/cumo/narray/data.c CHANGED Viewed

@@ -195,21 +195,21 @@ check_axis(int axis, int ndim)
   @example
     x = Cumo::Int32[[1,2,3]]
-    p x.swapaxes(0,1)
-    # Cumo::Int32(view)#shape=[3,1]
+    x.swapaxes(0,1)
+    # => Cumo::Int32(view)#shape=[3,1]
     # [[1],
     #  [2],
     #  [3]]
-    p x = Cumo::Int32[[[0,1],[2,3]],[[4,5],[6,7]]]
-    # Cumo::Int32#shape=[2,2,2]
+    x = Cumo::Int32[[[0,1],[2,3]],[[4,5],[6,7]]]
+    # => Cumo::Int32#shape=[2,2,2]
     # [[[0, 1],
     #   [2, 3]],
     #  [[4, 5],
     #   [6, 7]]]
-    p x.swapaxes(0,2)
-    # Cumo::Int32(view)#shape=[2,2,2]
+    x.swapaxes(0,2)
+    # => Cumo::Int32(view)#shape=[2,2,2]
     # [[[0, 4],
     #   [2, 6]],
     #  [[1, 5],
@@ -510,7 +510,6 @@ cumo_na_flatten_dim(VALUE self, int sd)
     case CUMO_NARRAY_FILEMAP_T:
         stride = cumo_na_element_stride(self);
         for (i=sd+1; i--; ) {
-            //printf("data: i=%d shpae[i]=%ld stride=%ld\n",i,shape[i],stride);
             CUMO_SDX_SET_STRIDE(na2->stridx[i],stride);
             stride *= shape[i];
         }
@@ -533,12 +532,10 @@ cumo_na_flatten_dim(VALUE self, int sd)
                 CUMO_SDX_SET_INDEX(na2->stridx[i],idx2);
             } else {
                 na2->stridx[i] = na1->stridx[i];
-                //printf("view: i=%d stridx=%d\n",i,CUMO_SDX_GET_STRIDE(sdx));
             }
         }
         // flat dimenion == last dimension
         if (RTEST(cumo_na_check_ladder(self,sd))) {
-        //if (0) {
             na2->stridx[sd] = na1->stridx[nd-1];
         } else {
             // set index
@@ -607,28 +604,30 @@ void cumo_na_diagonal_stride_index_kernel_launch(size_t *idx, ssize_t s0, size_t
   @return [Cumo::NArray]  diagonal view of NArray.
   @example
     a = Cumo::DFloat.new(4,5).seq
-    => Cumo::DFloat#shape=[4,5]
-    [[0, 1, 2, 3, 4],
-     [5, 6, 7, 8, 9],
-     [10, 11, 12, 13, 14],
-     [15, 16, 17, 18, 19]]
+    # => Cumo::DFloat#shape=[4,5]
+    # [[0, 1, 2, 3, 4],
+    #  [5, 6, 7, 8, 9],
+    #  [10, 11, 12, 13, 14],
+    #  [15, 16, 17, 18, 19]]
     b = a.diagonal(1)
-    => Cumo::DFloat(view)#shape=[4]
-    [1, 7, 13, 19]
+    # => Cumo::DFloat(view)#shape=[4]
+    # [1, 7, 13, 19]
     b.store(0)
     a
-    => Cumo::DFloat#shape=[4,5]
-    [[0, 0, 2, 3, 4],
-     [5, 6, 0, 8, 9],
-     [10, 11, 12, 0, 14],
-     [15, 16, 17, 18, 0]]
+    # => Cumo::DFloat#shape=[4,5]
+    # [[0, 0, 2, 3, 4],
+    #  [5, 6, 0, 8, 9],
+    #  [10, 11, 12, 0, 14],
+    #  [15, 16, 17, 18, 0]]
     b.store([1,2,3,4])
     a
-    => Cumo::DFloat#shape=[4,5]
-    [[0, 1, 2, 3, 4],
-     [5, 6, 2, 8, 9],
-     [10, 11, 12, 3, 14],
-     [15, 16, 17, 18, 4]]
+    # => Cumo::DFloat#shape=[4,5]
+    # [[0, 1, 2, 3, 4],
+    #  [5, 6, 2, 8, 9],
+    #  [10, 11, 12, 3, 14],
+    #  [15, 16, 17, 18, 4]]
  */
 static VALUE
 cumo_na_diagonal(int argc, VALUE *argv, VALUE self)

data/ext/cumo/narray/gen/tmpl/accum.c CHANGED Viewed

@@ -56,8 +56,8 @@ static void
 <% else %>
   @overload <%=name%>(axis:nil, keepdims:false)
 <% end %>
-  @param [Numeric,Array,Range] axis (keyword) Affected dimensions.
-  @param [TrueClass] keepdims (keyword) If true, the reduced axes are left in the result array as dimensions with size one.
+  @param [Numeric,Array,Range] axis  Performs <%=name%> along the axis.
+  @param [TrueClass] keepdims  If true, the reduced axes are left in the result array as dimensions with size one.
   @return [Cumo::<%=class_name%>] returns result of <%=name%>.
 */
 static VALUE

data/ext/cumo/narray/gen/tmpl/accum_binary.c CHANGED Viewed

@@ -91,7 +91,7 @@ static VALUE
   @overload <%=op_map%>(other, axis:nil, keepdims:false)
 <% end %>
   @param [Cumo::NArray,Numeric] other
-  @param [Numeric,Array,Range] axis (keyword) Affected dimensions.
+  @param [Numeric,Array,Range] axis  Performs <%=name%> along the axis.
   @param [TrueClass] keepdims (keyword) If true, the reduced axes are left in the result array as dimensions with size one.
 <% if is_float %>
   @param [TrueClass] nan (keyword) If true, apply NaN-aware algorithm (avoid NaN if exists).

data/ext/cumo/narray/gen/tmpl/alloc_func.c CHANGED Viewed

@@ -29,7 +29,9 @@ static void
     assert(na->base.type == CUMO_NARRAY_DATA_T);
     if (na->ptr != NULL) {
-        cumo_cuda_runtime_free(na->ptr);
+        if (na->owned) {
+            cumo_cuda_runtime_free(na->ptr);
+        }
         na->ptr = NULL;
     }
     if (na->base.size > 0) {
@@ -103,5 +105,6 @@ static VALUE
     na->base.shape = NULL;
     na->base.reduce = INT2FIX(0);
     na->ptr = NULL;
+    na->owned = FALSE;
     return TypedData_Wrap_Struct(klass, &<%=type_name%>_data_type, (void*)na);
 }

data/ext/cumo/narray/gen/tmpl/allocate.c CHANGED Viewed

@@ -22,6 +22,7 @@ static VALUE
             ptr = cumo_cuda_runtime_malloc(sizeof(dtype) * na->size);
             <% end %>
             CUMO_NA_DATA_PTR(na) = ptr;
+            CUMO_NA_DATA_OWNED(na) = TRUE;
         }
         break;
     case CUMO_NARRAY_VIEW_T:

data/ext/cumo/narray/gen/tmpl/aref.c CHANGED Viewed

@@ -21,35 +21,35 @@ static VALUE
   @example
       a = Cumo::DFloat.new(4,5).seq
-      => Cumo::DFloat#shape=[4,5]
-      [[0, 1, 2, 3, 4],
-       [5, 6, 7, 8, 9],
-       [10, 11, 12, 13, 14],
-       [15, 16, 17, 18, 19]]
+      # => Cumo::DFloat#shape=[4,5]
+      # [[0, 1, 2, 3, 4],
+      #  [5, 6, 7, 8, 9],
+      #  [10, 11, 12, 13, 14],
+      #  [15, 16, 17, 18, 19]]
       a[7]
-      => Cumo::DFloat#shape=[]
-      6.0
+      # => Cumo::DFloat#shape=[]
+      # 6.0
       a[1,1]
-      => Cumo::DFloat#shape=[]
-      6.0
+      # => Cumo::DFloat#shape=[]
+      # 6.0
       a[1..3,1]
-      => Cumo::DFloat#shape=[3]
-      [6, 11, 16]
+      # => Cumo::DFloat#shape=[3]
+      # [6, 11, 16]
       a[1,[1,3,4]]
-      => Cumo::DFloat#shape=[3]
-      [6, 8, 9]
+      # => Cumo::DFloat#shape=[3]
+      # [6, 8, 9]
       a[true,2].fill(99)
       a
-      => Cumo::DFloat#shape=[4,5]
-      [[0, 1, 99, 3, 4],
-       [5, 6, 99, 8, 9],
-       [10, 11, 99, 13, 14],
-       [15, 16, 99, 18, 19]]
+      # => Cumo::DFloat#shape=[4,5]
+      # [[0, 1, 99, 3, 4],
+      #  [5, 6, 99, 8, 9],
+      #  [10, 11, 99, 13, 14],
+      #  [15, 16, 99, 18, 19]]
  */
 static VALUE
 <%=c_func(-1)%>(int argc, VALUE *argv, VALUE self)

data/ext/cumo/narray/gen/tmpl/aset.c CHANGED Viewed

@@ -10,31 +10,31 @@
   @example
       a = Cumo::DFloat.new(3,4).seq
-      => Cumo::DFloat#shape=[3,4]
-      [[0, 1, 2, 3],
-       [4, 5, 6, 7],
-       [8, 9, 10, 11]]
+      # => Cumo::DFloat#shape=[3,4]
+      # [[0, 1, 2, 3],
+      #  [4, 5, 6, 7],
+      #  [8, 9, 10, 11]]
       a[1,2]=99
       a
-      => Cumo::DFloat#shape=[3,4]
-      [[0, 1, 2, 3],
-       [4, 5, 99, 7],
-       [8, 9, 10, 11]]
+      # => Cumo::DFloat#shape=[3,4]
+      # [[0, 1, 2, 3],
+      #  [4, 5, 99, 7],
+      #  [8, 9, 10, 11]]
       a[1,[0,2]] = [101,102]
       a
-      => Cumo::DFloat#shape=[3,4]
-      [[0, 1, 2, 3],
-       [101, 5, 102, 7],
-       [8, 9, 10, 11]]
+      # => Cumo::DFloat#shape=[3,4]
+      # [[0, 1, 2, 3],
+      #  [101, 5, 102, 7],
+      #  [8, 9, 10, 11]]
       a[1,true]=99
       a
-      => Cumo::DFloat#shape=[3,4]
-      [[0, 1, 2, 3],
-       [99, 99, 99, 99],
-       [8, 9, 10, 11]]
+      # => Cumo::DFloat#shape=[3,4]
+      # [[0, 1, 2, 3],
+      #  [99, 99, 99, 99],
+      #  [8, 9, 10, 11]]
 */
 static VALUE

data/ext/cumo/narray/gen/tmpl/batch_norm.c CHANGED Viewed

@@ -157,8 +157,11 @@ static VALUE
     status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
     if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_ERROR;
+    status = cudnnCreateTensorDescriptor(&bn_desc);
+    if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_ERROR;
     mode = cumo_cuda_cudnn_GetBatchNormMode(axis_ndim, int_axis);
-    status = cumo_cuda_cudnn_CreateBNTensorDescriptor(&bn_desc, x_desc, mode);
+    status = cudnnDeriveBNTensorDescriptor(bn_desc, x_desc, mode);
     if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_ERROR;
     // TODO: bn_desc may return another type, and may need to cast gamma, beta, mean, var

data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c CHANGED Viewed

@@ -134,8 +134,11 @@ static VALUE
     status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
     if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
+    status = cudnnCreateTensorDescriptor(&bn_desc);
+    if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
     mode = cumo_cuda_cudnn_GetBatchNormMode(axis_ndim, int_axis);
-    status = cumo_cuda_cudnn_CreateBNTensorDescriptor(&bn_desc, x_desc, mode);
+    status = cudnnDeriveBNTensorDescriptor(bn_desc, x_desc, mode);
     if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
     // TODO: bn_desc may return another type, and may need to cast gamma, gy, mean, var

data/ext/cumo/narray/gen/tmpl/bincount.c CHANGED Viewed

@@ -116,22 +116,22 @@ static VALUE
     otherwise returns UInt32 or UInt64 depending on the size along last axis.
   @example
     Cumo::Int32[0..4].bincount
-    => Cumo::UInt32#shape=[5]
-       [1, 1, 1, 1, 1]
+    # => Cumo::UInt32#shape=[5]
+    #    [1, 1, 1, 1, 1]
     Cumo::Int32[0, 1, 1, 3, 2, 1, 7].bincount
-    => Cumo::UInt32#shape=[8]
-       [1, 3, 1, 1, 0, 0, 0, 1]
+    # => Cumo::UInt32#shape=[8]
+    #    [1, 3, 1, 1, 0, 0, 0, 1]
     x = Cumo::Int32[0, 1, 1, 3, 2, 1, 7, 23]
     x.bincount.size == x.max+1
-    => true
+    # => true
     w = Cumo::DFloat[0.3, 0.5, 0.2, 0.7, 1.0, -0.6]
     x = Cumo::Int32[0, 1, 1, 2, 2, 2]
     x.bincount(w)
-    => Cumo::DFloat#shape=[3]
-       [0.3, 0.7, 1.1]
+    # => Cumo::DFloat#shape=[3]
+    #    [0.3, 0.7, 1.1]
 */
 static VALUE

data/ext/cumo/narray/gen/tmpl/clip.c CHANGED Viewed

@@ -75,28 +75,24 @@ static void
   @example
       a = Cumo::Int32.new(10).seq
-      p a.clip(1,8)
-      # Cumo::Int32#shape=[10]
-      # [1, 1, 2, 3, 4, 5, 6, 7, 8, 8]
-      p a
-      # Cumo::Int32#shape=[10]
+      # => Cumo::Int32#shape=[10]
       # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
-      p a.inplace.clip(3,6)
-      # Cumo::Int32(view)#shape=[10]
-      # [3, 3, 3, 3, 4, 5, 6, 6, 6, 6]
+      a.clip(1,8)
+      # => Cumo::Int32#shape=[10]
+      # [1, 1, 2, 3, 4, 5, 6, 7, 8, 8]
-      p a
-      # Cumo::Int32#shape=[10]
+      a.inplace.clip(3,6)
+      a
+      # => Cumo::Int32#shape=[10]
       # [3, 3, 3, 3, 4, 5, 6, 6, 6, 6]
-      p a = Cumo::Int32.new(10).seq
-      # Cumo::Int32#shape=[10]
+      b = Cumo::Int32.new(10).seq
+      # => Cumo::Int32#shape=[10]
       # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
-      p a.clip([3,4,1,1,1,4,4,4,4,4], 8)
-      # Cumo::Int32#shape=[10]
+      b.clip([3,4,1,1,1,4,4,4,4,4], 8)
+      # => Cumo::Int32#shape=[10]
       # [3, 4, 2, 3, 4, 5, 6, 7, 8, 8]
 */
 static VALUE

data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu CHANGED Viewed

@@ -79,10 +79,10 @@ void cumo_<%=type_name%>_mean_kernel_launch(uint64_t n, char *p1, ssize_t s1, ch
 {
     ssize_t s1_idx = s1 / sizeof(dtype);
     thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
-    thrust::device_ptr<dtype> data_end   = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
-    if (s1_idx == 1) {
-        cumo_<%=type_name%>_mean_kernel<<<1,1>>>(data_begin, data_end, (dtype*)p2, n);
+    if (s1_idx == 1 || n == 1) {
+        cumo_<%=type_name%>_mean_kernel<<<1,1>>>(data_begin, data_begin + n, (dtype*)p2, n);
     } else {
+        thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
         cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
         cumo_<%=type_name%>_mean_kernel<<<1,1>>>(range.begin(), range.end(), (dtype*)p2, n);
     }
@@ -92,10 +92,10 @@ void cumo_<%=type_name%>_var_kernel_launch(uint64_t n, char *p1, ssize_t s1, cha
 {
     ssize_t s1_idx = s1 / sizeof(dtype);
     thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
-    thrust::device_ptr<dtype> data_end   = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
-    if (s1_idx == 1) {
-        cumo_<%=type_name%>_var_kernel<<<1,1>>>(data_begin, data_end, (rtype*)p2);
+    if (s1_idx == 1 || n == 1) {
+        cumo_<%=type_name%>_var_kernel<<<1,1>>>(data_begin, data_begin + n, (rtype*)p2);
     } else {
+    thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
         cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
         cumo_<%=type_name%>_var_kernel<<<1,1>>>(range.begin(), range.end(), (rtype*)p2);
     }
@@ -105,10 +105,10 @@ void cumo_<%=type_name%>_stddev_kernel_launch(uint64_t n, char *p1, ssize_t s1,
 {
     ssize_t s1_idx = s1 / sizeof(dtype);
     thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
-    thrust::device_ptr<dtype> data_end   = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
-    if (s1_idx == 1) {
-        cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(data_begin, data_end, (rtype*)p2);
+    if (s1_idx == 1 || n == 1) {
+        cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(data_begin, data_begin + n, (rtype*)p2);
     } else {
+        thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
         cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
         cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(range.begin(), range.end(), (rtype*)p2);
     }
@@ -118,10 +118,10 @@ void cumo_<%=type_name%>_rms_kernel_launch(uint64_t n, char *p1, ssize_t s1, cha
 {
     ssize_t s1_idx = s1 / sizeof(dtype);
     thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
-    thrust::device_ptr<dtype> data_end   = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
-    if (s1_idx == 1) {
-        cumo_<%=type_name%>_rms_kernel<<<1,1>>>(data_begin, data_end, (rtype*)p2, n);
+    if (s1_idx == 1 || n == 1) {
+        cumo_<%=type_name%>_rms_kernel<<<1,1>>>(data_begin, data_begin + n, (rtype*)p2, n);
     } else {
+        thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
         cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
         cumo_<%=type_name%>_rms_kernel<<<1,1>>>(range.begin(), range.end(), (rtype*)p2, n);
     }

data/ext/cumo/narray/gen/tmpl/cum.c CHANGED Viewed

@@ -30,7 +30,7 @@ static void
 /*
   <%=name%> of self.
   @overload <%=name%>(axis:nil, nan:false)
-  @param [Numeric,Array,Range] axis  Affected dimensions.
+  @param [Numeric,Array,Range] axis  Performs <%=name%> along the axis.
   @param [TrueClass] nan  If true, apply NaN-aware algorithm (avoid NaN if exists).
   @return [Cumo::<%=class_name%>] <%=name%> of self.
 */

data/ext/cumo/narray/gen/tmpl/each.c CHANGED Viewed

@@ -34,8 +34,10 @@ static void
   passing that element as a parameter.
   @overload <%=name%>
   @return [Cumo::NArray] self
-  For a block {|x| ... }
-  @yield [x]  x is element of NArray.
+  For a block `{|x| ... }`,
+  @yieldparam [Numeric] x  an element of NArray.
+  @see #each_with_index
+  @see #map
 */
 static VALUE
 <%=c_func(0)%>(VALUE self)

data/ext/cumo/narray/gen/tmpl/each_with_index.c CHANGED Viewed

@@ -55,9 +55,12 @@ static void
   Invokes the given block once for each element of self,
   passing that element and indices along each axis as parameters.
   @overload <%=name%>
+  For a block `{|x,i,j,...| ... }`,
+  @yieldparam [Numeric] x  an element
+  @yieldparam [Integer] i,j,...  multitimensional indices
   @return [Cumo::NArray] self
-  For a block {|x,i,j,...| ... }
-  @yield [x,i,j,...]  x is an element, i,j,... are multidimensional indices.
+  @see #each
+  @see #map_with_index
 */
 static VALUE
 <%=c_func(0)%>(VALUE self)

data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c CHANGED Viewed

@@ -106,8 +106,11 @@ static VALUE
     status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
     if (status != CUDNN_STATUS_SUCCESS) goto FIXED_BATCH_NORM_ERROR;
+    status = cudnnCreateTensorDescriptor(&bn_desc);
+    if (status != CUDNN_STATUS_SUCCESS) goto FIXED_BATCH_NORM_ERROR;
     mode = cumo_cuda_cudnn_GetBatchNormMode(axis_ndim, int_axis);
-    status = cumo_cuda_cudnn_CreateBNTensorDescriptor(&bn_desc, x_desc, mode);
+    status = cudnnDeriveBNTensorDescriptor(bn_desc, x_desc, mode);
     if (status != CUDNN_STATUS_SUCCESS) goto FIXED_BATCH_NORM_ERROR;
     // TODO: bn_desc may return another type, and may need to cast gamma, beta, mean, var

data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu CHANGED Viewed

@@ -57,10 +57,10 @@ void cumo_<%=type_name%>_mean_kernel_launch(uint64_t n, char *p1, ssize_t s1, ch
 {
     ssize_t s1_idx = s1 / sizeof(dtype);
     thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
-    thrust::device_ptr<dtype> data_end   = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
-    if (s1_idx == 1) {
-        cumo_<%=type_name%>_mean_kernel<<<1,1>>>(data_begin, data_end, (<%=dtype%>*)p2, n);
+    if (s1_idx == 1 || n == 1) {
+        cumo_<%=type_name%>_mean_kernel<<<1,1>>>(data_begin, data_begin + n, (<%=dtype%>*)p2, n);
     } else {
+        thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
         cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
         cumo_<%=type_name%>_mean_kernel<<<1,1>>>(range.begin(), range.end(), (<%=dtype%>*)p2, n);
     }
@@ -70,10 +70,10 @@ void cumo_<%=type_name%>_var_kernel_launch(uint64_t n, char *p1, ssize_t s1, cha
 {
     ssize_t s1_idx = s1 / sizeof(dtype);
     thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
-    thrust::device_ptr<dtype> data_end   = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
-    if (s1_idx == 1) {
-        cumo_<%=type_name%>_var_kernel<<<1,1>>>(data_begin, data_end, (<%=dtype%>*)p2);
+    if (s1_idx == 1 || n == 1) {
+        cumo_<%=type_name%>_var_kernel<<<1,1>>>(data_begin, data_begin + n, (<%=dtype%>*)p2);
     } else {
+        thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
         cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
         cumo_<%=type_name%>_var_kernel<<<1,1>>>(range.begin(), range.end(), (<%=dtype%>*)p2);
     }
@@ -83,10 +83,10 @@ void cumo_<%=type_name%>_stddev_kernel_launch(uint64_t n, char *p1, ssize_t s1,
 {
     ssize_t s1_idx = s1 / sizeof(dtype);
     thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
-    thrust::device_ptr<dtype> data_end   = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
-    if (s1_idx == 1) {
-        cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(data_begin, data_end, (<%=dtype%>*)p2);
+    if (s1_idx == 1 || n == 1) {
+        cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(data_begin, data_begin + n, (<%=dtype%>*)p2);
     } else {
+        thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
         cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
         cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(range.begin(), range.end(), (<%=dtype%>*)p2);
     }
@@ -96,10 +96,10 @@ void cumo_<%=type_name%>_rms_kernel_launch(uint64_t n, char *p1, ssize_t s1, cha
 {
     ssize_t s1_idx = s1 / sizeof(dtype);
     thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
-    thrust::device_ptr<dtype> data_end   = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
-    if (s1_idx == 1) {
-        cumo_<%=type_name%>_rms_kernel<<<1,1>>>(data_begin, data_end, (<%=dtype%>*)p2, n);
+    if (s1_idx == 1 || n == 1) {
+        cumo_<%=type_name%>_rms_kernel<<<1,1>>>(data_begin, data_begin + n, (<%=dtype%>*)p2, n);
     } else {
+        thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
         cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
         cumo_<%=type_name%>_rms_kernel<<<1,1>>>(range.begin(), range.end(), (<%=dtype%>*)p2, n);
     }

data/ext/cumo/narray/gen/tmpl/logseq.c CHANGED Viewed

@@ -62,7 +62,7 @@ static void
 /*
   Set logarithmic sequence of numbers to self. The sequence is obtained from
-     base**(beg+i*step)
+     `base**(beg+i*step)`
   where i is 1-dimensional index.
   Applicable classes: DFloat, SFloat, DComplex, SCopmplex.
@@ -74,11 +74,12 @@ static void
   @example
     Cumo::DFloat.new(5).logseq(4,-1,2)
-    => Cumo::DFloat#shape=[5]
-      [16, 8, 4, 2, 1]
+    # => Cumo::DFloat#shape=[5]
+    #   [16, 8, 4, 2, 1]
     Cumo::DComplex.new(5).logseq(0,1i*Math::PI/3,Math::E)
-    => Cumo::DComplex#shape=[5]
-      [1+7.26156e-310i, 0.5+0.866025i, -0.5+0.866025i, -1+1.22465e-16i, ...]
+    # => Cumo::DComplex#shape=[5]
+    #   [1+7.26156e-310i, 0.5+0.866025i, -0.5+0.866025i, -1+1.22465e-16i, ...]
 */
 static VALUE
 <%=c_func(-1)%>(int argc, VALUE *args, VALUE self)

data/ext/cumo/narray/gen/tmpl/map_with_index.c CHANGED Viewed

@@ -78,14 +78,13 @@ static void
   passing that element and indices along each axis as parameters.
   Creates a new NArray containing the values returned by the block.
   Inplace option is allowed, i.e., `nary.inplace.map` overwrites `nary`.
   @overload <%=name%>
-  For a block {|x,i,j,...| ... }
-  @yield [x,i,j,...]  x is an element, i,j,... are multidimensional indices.
+  For a block `{|x,i,j,...| ... }`,
+  @yieldparam [Numeric] x  an element
+  @yieldparam [Integer] i,j,...  multitimensional indices
   @return [Cumo::NArray] mapped array
+  @see #map
+  @see #each_with_index
 */
 static VALUE
 <%=c_func(0)%>(VALUE self)

data/ext/cumo/narray/gen/tmpl/median.c CHANGED Viewed

@@ -40,8 +40,8 @@ static void
 <% else %>
   @overload <%=name%>(axis:nil, keepdims:false)
 <% end %>
-  @param [Numeric,Array,Range] axis (keyword) Affected dimensions.
-  @param [TrueClass] keepdims (keyword) If true, the reduced axes are left in the result array as dimensions with size one.
+  @param [Numeric,Array,Range] axis  Finds <%=name%> along the axis.
+  @param [TrueClass] keepdims  If true, the reduced axes are left in the result array as dimensions with size one.
   @return [Cumo::<%=class_name%>] returns <%=name%> of self.
 */

data/ext/cumo/narray/gen/tmpl/minmax.c CHANGED Viewed

@@ -26,7 +26,7 @@ static void
 <% else %>
   @overload <%=name%>(axis:nil, keepdims:false)
 <% end %>
-  @param [Numeric,Array,Range] axis (keyword) Affected dimensions.
+  @param [Numeric,Array,Range] axis  Finds min-max along the axis.
   @param [TrueClass] keepdims (keyword) If true, the reduced axes are left in the result array as dimensions with size one.
   @return [Cumo::<%=class_name%>,Cumo::<%=class_name%>] min and max of self.
 */