cumo 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +18 -37
- data/3rd_party/mkmf-cu/lib/mkmf-cu/cli.rb +28 -21
- data/CHANGELOG.md +28 -0
- data/Dockerfile +34 -0
- data/cumo.gemspec +1 -1
- data/docker-build.sh +4 -0
- data/docker-launch.sh +4 -0
- data/docs/src-tree.md +1 -1
- data/ext/cumo/cuda/cudnn_impl.cpp +25 -3
- data/ext/cumo/cuda/driver.c +8 -0
- data/ext/cumo/depend.erb +1 -1
- data/ext/cumo/extconf.rb +1 -1
- data/ext/cumo/include/cumo/cuda/cumo_thrust.hpp +14 -7
- data/ext/cumo/include/cumo/cuda/cumo_thrust_complex.hpp +3 -3
- data/ext/cumo/include/cumo/narray.h +2 -0
- data/ext/cumo/include/cumo/types/complex.h +2 -2
- data/ext/cumo/include/cumo/types/complex_macro_kernel.h +15 -4
- data/ext/cumo/include/cumo/types/real_accum_kernel.h +15 -4
- data/ext/cumo/include/cumo/types/xint_macro_kernel.h +11 -3
- data/ext/cumo/include/cumo.h +2 -2
- data/ext/cumo/narray/array.c +5 -3
- data/ext/cumo/narray/data.c +25 -26
- data/ext/cumo/narray/gen/tmpl/accum.c +2 -2
- data/ext/cumo/narray/gen/tmpl/accum_binary.c +1 -1
- data/ext/cumo/narray/gen/tmpl/alloc_func.c +4 -1
- data/ext/cumo/narray/gen/tmpl/allocate.c +1 -0
- data/ext/cumo/narray/gen/tmpl/aref.c +18 -18
- data/ext/cumo/narray/gen/tmpl/aset.c +16 -16
- data/ext/cumo/narray/gen/tmpl/batch_norm.c +4 -1
- data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +4 -1
- data/ext/cumo/narray/gen/tmpl/bincount.c +7 -7
- data/ext/cumo/narray/gen/tmpl/clip.c +11 -15
- data/ext/cumo/narray/gen/tmpl/complex_accum_kernel.cu +12 -12
- data/ext/cumo/narray/gen/tmpl/cum.c +1 -1
- data/ext/cumo/narray/gen/tmpl/each.c +4 -2
- data/ext/cumo/narray/gen/tmpl/each_with_index.c +5 -2
- data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +4 -1
- data/ext/cumo/narray/gen/tmpl/float_accum_kernel.cu +12 -12
- data/ext/cumo/narray/gen/tmpl/logseq.c +6 -5
- data/ext/cumo/narray/gen/tmpl/map_with_index.c +5 -6
- data/ext/cumo/narray/gen/tmpl/median.c +2 -2
- data/ext/cumo/narray/gen/tmpl/minmax.c +1 -1
- data/ext/cumo/narray/gen/tmpl/poly.c +4 -4
- data/ext/cumo/narray/gen/tmpl/rand.c +8 -6
- data/ext/cumo/narray/gen/tmpl/rand_norm.c +18 -16
- data/ext/cumo/narray/gen/tmpl/seq.c +5 -4
- data/ext/cumo/narray/gen/tmpl/sort.c +2 -2
- data/ext/cumo/narray/gen/tmpl/sort_index.c +2 -2
- data/ext/cumo/narray/gen/tmpl_bit/allocate.c +1 -0
- data/ext/cumo/narray/gen/tmpl_bit/aref.c +26 -32
- data/ext/cumo/narray/gen/tmpl_bit/aset.c +18 -30
- data/ext/cumo/narray/index.c +1 -1
- data/ext/cumo/narray/narray.c +116 -21
- data/lib/cumo/narray/extra.rb +160 -156
- data/test/cuda/device_test.rb +2 -1
- data/test/cudnn_test.rb +2 -2
- data/test/narray_test.rb +80 -0
- data/test/ractor_test.rb +5 -3
- metadata +5 -2
|
@@ -70,18 +70,26 @@ __host__ __device__ static inline dtype f_minimum(dtype x, dtype y)
|
|
|
70
70
|
/* --------- thrust ----------------- */
|
|
71
71
|
#include "cumo/cuda/cumo_thrust.hpp"
|
|
72
72
|
|
|
73
|
-
struct cumo_thrust_plus
|
|
73
|
+
struct cumo_thrust_plus
|
|
74
74
|
{
|
|
75
|
+
using first_argument_type = dtype;
|
|
76
|
+
using second_argument_type = dtype;
|
|
77
|
+
using result_type = dtype;
|
|
75
78
|
__host__ __device__ dtype operator()(dtype x, dtype y) { return m_add(x,y); }
|
|
76
79
|
};
|
|
77
80
|
|
|
78
|
-
struct cumo_thrust_multiplies
|
|
81
|
+
struct cumo_thrust_multiplies
|
|
79
82
|
{
|
|
83
|
+
using first_argument_type = dtype;
|
|
84
|
+
using second_argument_type = dtype;
|
|
85
|
+
using result_type = dtype;
|
|
80
86
|
__host__ __device__ dtype operator()(dtype x, dtype y) { return m_mul(x,y); }
|
|
81
87
|
};
|
|
82
88
|
|
|
83
|
-
struct cumo_thrust_square
|
|
89
|
+
struct cumo_thrust_square
|
|
84
90
|
{
|
|
91
|
+
using argument_type = dtype;
|
|
92
|
+
using result_type = dtype;
|
|
85
93
|
__host__ __device__ rtype operator()(const dtype& x) const { return m_square(x); }
|
|
86
94
|
};
|
|
87
95
|
|
data/ext/cumo/include/cumo.h
CHANGED
data/ext/cumo/narray/array.c
CHANGED
|
@@ -466,11 +466,13 @@ cumo_na_s_array_shape(VALUE mod, VALUE ary)
|
|
|
466
466
|
@return [Cumo::NArray]
|
|
467
467
|
@example
|
|
468
468
|
Cumo::NArray.new_like([[1,2,3],[4,5,6]])
|
|
469
|
-
=> Cumo::Int32#shape=[2,3](empty)
|
|
469
|
+
# => Cumo::Int32#shape=[2,3](empty)
|
|
470
|
+
|
|
470
471
|
Cumo::DFloat.new_like([[1,2],[3,4]])
|
|
471
|
-
=> Cumo::DFloat#shape=[2,2](empty)
|
|
472
|
+
# => Cumo::DFloat#shape=[2,2](empty)
|
|
473
|
+
|
|
472
474
|
Cumo::NArray.new_like([1,2i,3])
|
|
473
|
-
=> Cumo::DComplex#shape=[3](empty)
|
|
475
|
+
# => Cumo::DComplex#shape=[3](empty)
|
|
474
476
|
*/
|
|
475
477
|
VALUE
|
|
476
478
|
cumo_na_s_new_like(VALUE type, VALUE obj)
|
data/ext/cumo/narray/data.c
CHANGED
|
@@ -195,21 +195,21 @@ check_axis(int axis, int ndim)
|
|
|
195
195
|
@example
|
|
196
196
|
x = Cumo::Int32[[1,2,3]]
|
|
197
197
|
|
|
198
|
-
|
|
199
|
-
# Cumo::Int32(view)#shape=[3,1]
|
|
198
|
+
x.swapaxes(0,1)
|
|
199
|
+
# => Cumo::Int32(view)#shape=[3,1]
|
|
200
200
|
# [[1],
|
|
201
201
|
# [2],
|
|
202
202
|
# [3]]
|
|
203
203
|
|
|
204
|
-
|
|
205
|
-
# Cumo::Int32#shape=[2,2,2]
|
|
204
|
+
x = Cumo::Int32[[[0,1],[2,3]],[[4,5],[6,7]]]
|
|
205
|
+
# => Cumo::Int32#shape=[2,2,2]
|
|
206
206
|
# [[[0, 1],
|
|
207
207
|
# [2, 3]],
|
|
208
208
|
# [[4, 5],
|
|
209
209
|
# [6, 7]]]
|
|
210
210
|
|
|
211
|
-
|
|
212
|
-
# Cumo::Int32(view)#shape=[2,2,2]
|
|
211
|
+
x.swapaxes(0,2)
|
|
212
|
+
# => Cumo::Int32(view)#shape=[2,2,2]
|
|
213
213
|
# [[[0, 4],
|
|
214
214
|
# [2, 6]],
|
|
215
215
|
# [[1, 5],
|
|
@@ -510,7 +510,6 @@ cumo_na_flatten_dim(VALUE self, int sd)
|
|
|
510
510
|
case CUMO_NARRAY_FILEMAP_T:
|
|
511
511
|
stride = cumo_na_element_stride(self);
|
|
512
512
|
for (i=sd+1; i--; ) {
|
|
513
|
-
//printf("data: i=%d shpae[i]=%ld stride=%ld\n",i,shape[i],stride);
|
|
514
513
|
CUMO_SDX_SET_STRIDE(na2->stridx[i],stride);
|
|
515
514
|
stride *= shape[i];
|
|
516
515
|
}
|
|
@@ -533,12 +532,10 @@ cumo_na_flatten_dim(VALUE self, int sd)
|
|
|
533
532
|
CUMO_SDX_SET_INDEX(na2->stridx[i],idx2);
|
|
534
533
|
} else {
|
|
535
534
|
na2->stridx[i] = na1->stridx[i];
|
|
536
|
-
//printf("view: i=%d stridx=%d\n",i,CUMO_SDX_GET_STRIDE(sdx));
|
|
537
535
|
}
|
|
538
536
|
}
|
|
539
537
|
// flat dimenion == last dimension
|
|
540
538
|
if (RTEST(cumo_na_check_ladder(self,sd))) {
|
|
541
|
-
//if (0) {
|
|
542
539
|
na2->stridx[sd] = na1->stridx[nd-1];
|
|
543
540
|
} else {
|
|
544
541
|
// set index
|
|
@@ -607,28 +604,30 @@ void cumo_na_diagonal_stride_index_kernel_launch(size_t *idx, ssize_t s0, size_t
|
|
|
607
604
|
@return [Cumo::NArray] diagonal view of NArray.
|
|
608
605
|
@example
|
|
609
606
|
a = Cumo::DFloat.new(4,5).seq
|
|
610
|
-
=> Cumo::DFloat#shape=[4,5]
|
|
611
|
-
[[0, 1, 2, 3, 4],
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
607
|
+
# => Cumo::DFloat#shape=[4,5]
|
|
608
|
+
# [[0, 1, 2, 3, 4],
|
|
609
|
+
# [5, 6, 7, 8, 9],
|
|
610
|
+
# [10, 11, 12, 13, 14],
|
|
611
|
+
# [15, 16, 17, 18, 19]]
|
|
615
612
|
b = a.diagonal(1)
|
|
616
|
-
=> Cumo::DFloat(view)#shape=[4]
|
|
617
|
-
[1, 7, 13, 19]
|
|
613
|
+
# => Cumo::DFloat(view)#shape=[4]
|
|
614
|
+
# [1, 7, 13, 19]
|
|
615
|
+
|
|
618
616
|
b.store(0)
|
|
619
617
|
a
|
|
620
|
-
=> Cumo::DFloat#shape=[4,5]
|
|
621
|
-
[[0, 0, 2, 3, 4],
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
618
|
+
# => Cumo::DFloat#shape=[4,5]
|
|
619
|
+
# [[0, 0, 2, 3, 4],
|
|
620
|
+
# [5, 6, 0, 8, 9],
|
|
621
|
+
# [10, 11, 12, 0, 14],
|
|
622
|
+
# [15, 16, 17, 18, 0]]
|
|
623
|
+
|
|
625
624
|
b.store([1,2,3,4])
|
|
626
625
|
a
|
|
627
|
-
=> Cumo::DFloat#shape=[4,5]
|
|
628
|
-
[[0, 1, 2, 3, 4],
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
626
|
+
# => Cumo::DFloat#shape=[4,5]
|
|
627
|
+
# [[0, 1, 2, 3, 4],
|
|
628
|
+
# [5, 6, 2, 8, 9],
|
|
629
|
+
# [10, 11, 12, 3, 14],
|
|
630
|
+
# [15, 16, 17, 18, 4]]
|
|
632
631
|
*/
|
|
633
632
|
static VALUE
|
|
634
633
|
cumo_na_diagonal(int argc, VALUE *argv, VALUE self)
|
|
@@ -56,8 +56,8 @@ static void
|
|
|
56
56
|
<% else %>
|
|
57
57
|
@overload <%=name%>(axis:nil, keepdims:false)
|
|
58
58
|
<% end %>
|
|
59
|
-
@param [Numeric,Array,Range] axis
|
|
60
|
-
@param [TrueClass] keepdims
|
|
59
|
+
@param [Numeric,Array,Range] axis Performs <%=name%> along the axis.
|
|
60
|
+
@param [TrueClass] keepdims If true, the reduced axes are left in the result array as dimensions with size one.
|
|
61
61
|
@return [Cumo::<%=class_name%>] returns result of <%=name%>.
|
|
62
62
|
*/
|
|
63
63
|
static VALUE
|
|
@@ -91,7 +91,7 @@ static VALUE
|
|
|
91
91
|
@overload <%=op_map%>(other, axis:nil, keepdims:false)
|
|
92
92
|
<% end %>
|
|
93
93
|
@param [Cumo::NArray,Numeric] other
|
|
94
|
-
@param [Numeric,Array,Range] axis
|
|
94
|
+
@param [Numeric,Array,Range] axis Performs <%=name%> along the axis.
|
|
95
95
|
@param [TrueClass] keepdims (keyword) If true, the reduced axes are left in the result array as dimensions with size one.
|
|
96
96
|
<% if is_float %>
|
|
97
97
|
@param [TrueClass] nan (keyword) If true, apply NaN-aware algorithm (avoid NaN if exists).
|
|
@@ -29,7 +29,9 @@ static void
|
|
|
29
29
|
assert(na->base.type == CUMO_NARRAY_DATA_T);
|
|
30
30
|
|
|
31
31
|
if (na->ptr != NULL) {
|
|
32
|
-
|
|
32
|
+
if (na->owned) {
|
|
33
|
+
cumo_cuda_runtime_free(na->ptr);
|
|
34
|
+
}
|
|
33
35
|
na->ptr = NULL;
|
|
34
36
|
}
|
|
35
37
|
if (na->base.size > 0) {
|
|
@@ -103,5 +105,6 @@ static VALUE
|
|
|
103
105
|
na->base.shape = NULL;
|
|
104
106
|
na->base.reduce = INT2FIX(0);
|
|
105
107
|
na->ptr = NULL;
|
|
108
|
+
na->owned = FALSE;
|
|
106
109
|
return TypedData_Wrap_Struct(klass, &<%=type_name%>_data_type, (void*)na);
|
|
107
110
|
}
|
|
@@ -21,35 +21,35 @@ static VALUE
|
|
|
21
21
|
|
|
22
22
|
@example
|
|
23
23
|
a = Cumo::DFloat.new(4,5).seq
|
|
24
|
-
=> Cumo::DFloat#shape=[4,5]
|
|
25
|
-
[[0, 1, 2, 3, 4],
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
24
|
+
# => Cumo::DFloat#shape=[4,5]
|
|
25
|
+
# [[0, 1, 2, 3, 4],
|
|
26
|
+
# [5, 6, 7, 8, 9],
|
|
27
|
+
# [10, 11, 12, 13, 14],
|
|
28
|
+
# [15, 16, 17, 18, 19]]
|
|
29
29
|
|
|
30
30
|
a[7]
|
|
31
|
-
=> Cumo::DFloat#shape=[]
|
|
32
|
-
6.0
|
|
31
|
+
# => Cumo::DFloat#shape=[]
|
|
32
|
+
# 6.0
|
|
33
33
|
|
|
34
34
|
a[1,1]
|
|
35
|
-
=> Cumo::DFloat#shape=[]
|
|
36
|
-
6.0
|
|
35
|
+
# => Cumo::DFloat#shape=[]
|
|
36
|
+
# 6.0
|
|
37
37
|
|
|
38
38
|
a[1..3,1]
|
|
39
|
-
=> Cumo::DFloat#shape=[3]
|
|
40
|
-
[6, 11, 16]
|
|
39
|
+
# => Cumo::DFloat#shape=[3]
|
|
40
|
+
# [6, 11, 16]
|
|
41
41
|
|
|
42
42
|
a[1,[1,3,4]]
|
|
43
|
-
=> Cumo::DFloat#shape=[3]
|
|
44
|
-
[6, 8, 9]
|
|
43
|
+
# => Cumo::DFloat#shape=[3]
|
|
44
|
+
# [6, 8, 9]
|
|
45
45
|
|
|
46
46
|
a[true,2].fill(99)
|
|
47
47
|
a
|
|
48
|
-
=> Cumo::DFloat#shape=[4,5]
|
|
49
|
-
[[0, 1, 99, 3, 4],
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
48
|
+
# => Cumo::DFloat#shape=[4,5]
|
|
49
|
+
# [[0, 1, 99, 3, 4],
|
|
50
|
+
# [5, 6, 99, 8, 9],
|
|
51
|
+
# [10, 11, 99, 13, 14],
|
|
52
|
+
# [15, 16, 99, 18, 19]]
|
|
53
53
|
*/
|
|
54
54
|
static VALUE
|
|
55
55
|
<%=c_func(-1)%>(int argc, VALUE *argv, VALUE self)
|
|
@@ -10,31 +10,31 @@
|
|
|
10
10
|
|
|
11
11
|
@example
|
|
12
12
|
a = Cumo::DFloat.new(3,4).seq
|
|
13
|
-
=> Cumo::DFloat#shape=[3,4]
|
|
14
|
-
[[0, 1, 2, 3],
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
# => Cumo::DFloat#shape=[3,4]
|
|
14
|
+
# [[0, 1, 2, 3],
|
|
15
|
+
# [4, 5, 6, 7],
|
|
16
|
+
# [8, 9, 10, 11]]
|
|
17
17
|
|
|
18
18
|
a[1,2]=99
|
|
19
19
|
a
|
|
20
|
-
=> Cumo::DFloat#shape=[3,4]
|
|
21
|
-
[[0, 1, 2, 3],
|
|
22
|
-
|
|
23
|
-
|
|
20
|
+
# => Cumo::DFloat#shape=[3,4]
|
|
21
|
+
# [[0, 1, 2, 3],
|
|
22
|
+
# [4, 5, 99, 7],
|
|
23
|
+
# [8, 9, 10, 11]]
|
|
24
24
|
|
|
25
25
|
a[1,[0,2]] = [101,102]
|
|
26
26
|
a
|
|
27
|
-
=> Cumo::DFloat#shape=[3,4]
|
|
28
|
-
[[0, 1, 2, 3],
|
|
29
|
-
|
|
30
|
-
|
|
27
|
+
# => Cumo::DFloat#shape=[3,4]
|
|
28
|
+
# [[0, 1, 2, 3],
|
|
29
|
+
# [101, 5, 102, 7],
|
|
30
|
+
# [8, 9, 10, 11]]
|
|
31
31
|
|
|
32
32
|
a[1,true]=99
|
|
33
33
|
a
|
|
34
|
-
=> Cumo::DFloat#shape=[3,4]
|
|
35
|
-
[[0, 1, 2, 3],
|
|
36
|
-
|
|
37
|
-
|
|
34
|
+
# => Cumo::DFloat#shape=[3,4]
|
|
35
|
+
# [[0, 1, 2, 3],
|
|
36
|
+
# [99, 99, 99, 99],
|
|
37
|
+
# [8, 9, 10, 11]]
|
|
38
38
|
|
|
39
39
|
*/
|
|
40
40
|
static VALUE
|
|
@@ -157,8 +157,11 @@ static VALUE
|
|
|
157
157
|
status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
|
|
158
158
|
if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_ERROR;
|
|
159
159
|
|
|
160
|
+
status = cudnnCreateTensorDescriptor(&bn_desc);
|
|
161
|
+
if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_ERROR;
|
|
162
|
+
|
|
160
163
|
mode = cumo_cuda_cudnn_GetBatchNormMode(axis_ndim, int_axis);
|
|
161
|
-
status =
|
|
164
|
+
status = cudnnDeriveBNTensorDescriptor(bn_desc, x_desc, mode);
|
|
162
165
|
if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_ERROR;
|
|
163
166
|
// TODO: bn_desc may return another type, and may need to cast gamma, beta, mean, var
|
|
164
167
|
|
|
@@ -134,8 +134,11 @@ static VALUE
|
|
|
134
134
|
status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
|
|
135
135
|
if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
|
|
136
136
|
|
|
137
|
+
status = cudnnCreateTensorDescriptor(&bn_desc);
|
|
138
|
+
if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
|
|
139
|
+
|
|
137
140
|
mode = cumo_cuda_cudnn_GetBatchNormMode(axis_ndim, int_axis);
|
|
138
|
-
status =
|
|
141
|
+
status = cudnnDeriveBNTensorDescriptor(bn_desc, x_desc, mode);
|
|
139
142
|
if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
|
|
140
143
|
// TODO: bn_desc may return another type, and may need to cast gamma, gy, mean, var
|
|
141
144
|
|
|
@@ -116,22 +116,22 @@ static VALUE
|
|
|
116
116
|
otherwise returns UInt32 or UInt64 depending on the size along last axis.
|
|
117
117
|
@example
|
|
118
118
|
Cumo::Int32[0..4].bincount
|
|
119
|
-
=> Cumo::UInt32#shape=[5]
|
|
120
|
-
|
|
119
|
+
# => Cumo::UInt32#shape=[5]
|
|
120
|
+
# [1, 1, 1, 1, 1]
|
|
121
121
|
|
|
122
122
|
Cumo::Int32[0, 1, 1, 3, 2, 1, 7].bincount
|
|
123
|
-
=> Cumo::UInt32#shape=[8]
|
|
124
|
-
|
|
123
|
+
# => Cumo::UInt32#shape=[8]
|
|
124
|
+
# [1, 3, 1, 1, 0, 0, 0, 1]
|
|
125
125
|
|
|
126
126
|
x = Cumo::Int32[0, 1, 1, 3, 2, 1, 7, 23]
|
|
127
127
|
x.bincount.size == x.max+1
|
|
128
|
-
=> true
|
|
128
|
+
# => true
|
|
129
129
|
|
|
130
130
|
w = Cumo::DFloat[0.3, 0.5, 0.2, 0.7, 1.0, -0.6]
|
|
131
131
|
x = Cumo::Int32[0, 1, 1, 2, 2, 2]
|
|
132
132
|
x.bincount(w)
|
|
133
|
-
=> Cumo::DFloat#shape=[3]
|
|
134
|
-
|
|
133
|
+
# => Cumo::DFloat#shape=[3]
|
|
134
|
+
# [0.3, 0.7, 1.1]
|
|
135
135
|
|
|
136
136
|
*/
|
|
137
137
|
static VALUE
|
|
@@ -75,28 +75,24 @@ static void
|
|
|
75
75
|
|
|
76
76
|
@example
|
|
77
77
|
a = Cumo::Int32.new(10).seq
|
|
78
|
-
|
|
79
|
-
# Cumo::Int32#shape=[10]
|
|
80
|
-
# [1, 1, 2, 3, 4, 5, 6, 7, 8, 8]
|
|
81
|
-
|
|
82
|
-
p a
|
|
83
|
-
# Cumo::Int32#shape=[10]
|
|
78
|
+
# => Cumo::Int32#shape=[10]
|
|
84
79
|
# [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
|
|
85
80
|
|
|
86
|
-
|
|
87
|
-
# Cumo::Int32
|
|
88
|
-
# [
|
|
81
|
+
a.clip(1,8)
|
|
82
|
+
# => Cumo::Int32#shape=[10]
|
|
83
|
+
# [1, 1, 2, 3, 4, 5, 6, 7, 8, 8]
|
|
89
84
|
|
|
90
|
-
|
|
91
|
-
|
|
85
|
+
a.inplace.clip(3,6)
|
|
86
|
+
a
|
|
87
|
+
# => Cumo::Int32#shape=[10]
|
|
92
88
|
# [3, 3, 3, 3, 4, 5, 6, 6, 6, 6]
|
|
93
89
|
|
|
94
|
-
|
|
95
|
-
# Cumo::Int32#shape=[10]
|
|
90
|
+
b = Cumo::Int32.new(10).seq
|
|
91
|
+
# => Cumo::Int32#shape=[10]
|
|
96
92
|
# [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
|
|
97
93
|
|
|
98
|
-
|
|
99
|
-
# Cumo::Int32#shape=[10]
|
|
94
|
+
b.clip([3,4,1,1,1,4,4,4,4,4], 8)
|
|
95
|
+
# => Cumo::Int32#shape=[10]
|
|
100
96
|
# [3, 4, 2, 3, 4, 5, 6, 7, 8, 8]
|
|
101
97
|
*/
|
|
102
98
|
static VALUE
|
|
@@ -79,10 +79,10 @@ void cumo_<%=type_name%>_mean_kernel_launch(uint64_t n, char *p1, ssize_t s1, ch
|
|
|
79
79
|
{
|
|
80
80
|
ssize_t s1_idx = s1 / sizeof(dtype);
|
|
81
81
|
thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
cumo_<%=type_name%>_mean_kernel<<<1,1>>>(data_begin, data_end, (dtype*)p2, n);
|
|
82
|
+
if (s1_idx == 1 || n == 1) {
|
|
83
|
+
cumo_<%=type_name%>_mean_kernel<<<1,1>>>(data_begin, data_begin + n, (dtype*)p2, n);
|
|
85
84
|
} else {
|
|
85
|
+
thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
|
|
86
86
|
cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
|
|
87
87
|
cumo_<%=type_name%>_mean_kernel<<<1,1>>>(range.begin(), range.end(), (dtype*)p2, n);
|
|
88
88
|
}
|
|
@@ -92,10 +92,10 @@ void cumo_<%=type_name%>_var_kernel_launch(uint64_t n, char *p1, ssize_t s1, cha
|
|
|
92
92
|
{
|
|
93
93
|
ssize_t s1_idx = s1 / sizeof(dtype);
|
|
94
94
|
thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
cumo_<%=type_name%>_var_kernel<<<1,1>>>(data_begin, data_end, (rtype*)p2);
|
|
95
|
+
if (s1_idx == 1 || n == 1) {
|
|
96
|
+
cumo_<%=type_name%>_var_kernel<<<1,1>>>(data_begin, data_begin + n, (rtype*)p2);
|
|
98
97
|
} else {
|
|
98
|
+
thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
|
|
99
99
|
cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
|
|
100
100
|
cumo_<%=type_name%>_var_kernel<<<1,1>>>(range.begin(), range.end(), (rtype*)p2);
|
|
101
101
|
}
|
|
@@ -105,10 +105,10 @@ void cumo_<%=type_name%>_stddev_kernel_launch(uint64_t n, char *p1, ssize_t s1,
|
|
|
105
105
|
{
|
|
106
106
|
ssize_t s1_idx = s1 / sizeof(dtype);
|
|
107
107
|
thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(data_begin, data_end, (rtype*)p2);
|
|
108
|
+
if (s1_idx == 1 || n == 1) {
|
|
109
|
+
cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(data_begin, data_begin + n, (rtype*)p2);
|
|
111
110
|
} else {
|
|
111
|
+
thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
|
|
112
112
|
cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
|
|
113
113
|
cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(range.begin(), range.end(), (rtype*)p2);
|
|
114
114
|
}
|
|
@@ -118,10 +118,10 @@ void cumo_<%=type_name%>_rms_kernel_launch(uint64_t n, char *p1, ssize_t s1, cha
|
|
|
118
118
|
{
|
|
119
119
|
ssize_t s1_idx = s1 / sizeof(dtype);
|
|
120
120
|
thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
cumo_<%=type_name%>_rms_kernel<<<1,1>>>(data_begin, data_end, (rtype*)p2, n);
|
|
121
|
+
if (s1_idx == 1 || n == 1) {
|
|
122
|
+
cumo_<%=type_name%>_rms_kernel<<<1,1>>>(data_begin, data_begin + n, (rtype*)p2, n);
|
|
124
123
|
} else {
|
|
124
|
+
thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
|
|
125
125
|
cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
|
|
126
126
|
cumo_<%=type_name%>_rms_kernel<<<1,1>>>(range.begin(), range.end(), (rtype*)p2, n);
|
|
127
127
|
}
|
|
@@ -30,7 +30,7 @@ static void
|
|
|
30
30
|
/*
|
|
31
31
|
<%=name%> of self.
|
|
32
32
|
@overload <%=name%>(axis:nil, nan:false)
|
|
33
|
-
@param [Numeric,Array,Range] axis
|
|
33
|
+
@param [Numeric,Array,Range] axis Performs <%=name%> along the axis.
|
|
34
34
|
@param [TrueClass] nan If true, apply NaN-aware algorithm (avoid NaN if exists).
|
|
35
35
|
@return [Cumo::<%=class_name%>] <%=name%> of self.
|
|
36
36
|
*/
|
|
@@ -34,8 +34,10 @@ static void
|
|
|
34
34
|
passing that element as a parameter.
|
|
35
35
|
@overload <%=name%>
|
|
36
36
|
@return [Cumo::NArray] self
|
|
37
|
-
For a block {|x| ... }
|
|
38
|
-
@
|
|
37
|
+
For a block `{|x| ... }`,
|
|
38
|
+
@yieldparam [Numeric] x an element of NArray.
|
|
39
|
+
@see #each_with_index
|
|
40
|
+
@see #map
|
|
39
41
|
*/
|
|
40
42
|
static VALUE
|
|
41
43
|
<%=c_func(0)%>(VALUE self)
|
|
@@ -55,9 +55,12 @@ static void
|
|
|
55
55
|
Invokes the given block once for each element of self,
|
|
56
56
|
passing that element and indices along each axis as parameters.
|
|
57
57
|
@overload <%=name%>
|
|
58
|
+
For a block `{|x,i,j,...| ... }`,
|
|
59
|
+
@yieldparam [Numeric] x an element
|
|
60
|
+
@yieldparam [Integer] i,j,... multitimensional indices
|
|
58
61
|
@return [Cumo::NArray] self
|
|
59
|
-
|
|
60
|
-
@
|
|
62
|
+
@see #each
|
|
63
|
+
@see #map_with_index
|
|
61
64
|
*/
|
|
62
65
|
static VALUE
|
|
63
66
|
<%=c_func(0)%>(VALUE self)
|
|
@@ -106,8 +106,11 @@ static VALUE
|
|
|
106
106
|
status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
|
|
107
107
|
if (status != CUDNN_STATUS_SUCCESS) goto FIXED_BATCH_NORM_ERROR;
|
|
108
108
|
|
|
109
|
+
status = cudnnCreateTensorDescriptor(&bn_desc);
|
|
110
|
+
if (status != CUDNN_STATUS_SUCCESS) goto FIXED_BATCH_NORM_ERROR;
|
|
111
|
+
|
|
109
112
|
mode = cumo_cuda_cudnn_GetBatchNormMode(axis_ndim, int_axis);
|
|
110
|
-
status =
|
|
113
|
+
status = cudnnDeriveBNTensorDescriptor(bn_desc, x_desc, mode);
|
|
111
114
|
if (status != CUDNN_STATUS_SUCCESS) goto FIXED_BATCH_NORM_ERROR;
|
|
112
115
|
// TODO: bn_desc may return another type, and may need to cast gamma, beta, mean, var
|
|
113
116
|
|
|
@@ -57,10 +57,10 @@ void cumo_<%=type_name%>_mean_kernel_launch(uint64_t n, char *p1, ssize_t s1, ch
|
|
|
57
57
|
{
|
|
58
58
|
ssize_t s1_idx = s1 / sizeof(dtype);
|
|
59
59
|
thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
cumo_<%=type_name%>_mean_kernel<<<1,1>>>(data_begin, data_end, (<%=dtype%>*)p2, n);
|
|
60
|
+
if (s1_idx == 1 || n == 1) {
|
|
61
|
+
cumo_<%=type_name%>_mean_kernel<<<1,1>>>(data_begin, data_begin + n, (<%=dtype%>*)p2, n);
|
|
63
62
|
} else {
|
|
63
|
+
thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
|
|
64
64
|
cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
|
|
65
65
|
cumo_<%=type_name%>_mean_kernel<<<1,1>>>(range.begin(), range.end(), (<%=dtype%>*)p2, n);
|
|
66
66
|
}
|
|
@@ -70,10 +70,10 @@ void cumo_<%=type_name%>_var_kernel_launch(uint64_t n, char *p1, ssize_t s1, cha
|
|
|
70
70
|
{
|
|
71
71
|
ssize_t s1_idx = s1 / sizeof(dtype);
|
|
72
72
|
thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
cumo_<%=type_name%>_var_kernel<<<1,1>>>(data_begin, data_end, (<%=dtype%>*)p2);
|
|
73
|
+
if (s1_idx == 1 || n == 1) {
|
|
74
|
+
cumo_<%=type_name%>_var_kernel<<<1,1>>>(data_begin, data_begin + n, (<%=dtype%>*)p2);
|
|
76
75
|
} else {
|
|
76
|
+
thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
|
|
77
77
|
cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
|
|
78
78
|
cumo_<%=type_name%>_var_kernel<<<1,1>>>(range.begin(), range.end(), (<%=dtype%>*)p2);
|
|
79
79
|
}
|
|
@@ -83,10 +83,10 @@ void cumo_<%=type_name%>_stddev_kernel_launch(uint64_t n, char *p1, ssize_t s1,
|
|
|
83
83
|
{
|
|
84
84
|
ssize_t s1_idx = s1 / sizeof(dtype);
|
|
85
85
|
thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(data_begin, data_end, (<%=dtype%>*)p2);
|
|
86
|
+
if (s1_idx == 1 || n == 1) {
|
|
87
|
+
cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(data_begin, data_begin + n, (<%=dtype%>*)p2);
|
|
89
88
|
} else {
|
|
89
|
+
thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
|
|
90
90
|
cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
|
|
91
91
|
cumo_<%=type_name%>_stddev_kernel<<<1,1>>>(range.begin(), range.end(), (<%=dtype%>*)p2);
|
|
92
92
|
}
|
|
@@ -96,10 +96,10 @@ void cumo_<%=type_name%>_rms_kernel_launch(uint64_t n, char *p1, ssize_t s1, cha
|
|
|
96
96
|
{
|
|
97
97
|
ssize_t s1_idx = s1 / sizeof(dtype);
|
|
98
98
|
thrust::device_ptr<dtype> data_begin = thrust::device_pointer_cast((dtype*)p1);
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
cumo_<%=type_name%>_rms_kernel<<<1,1>>>(data_begin, data_end, (<%=dtype%>*)p2, n);
|
|
99
|
+
if (s1_idx == 1 || n == 1) {
|
|
100
|
+
cumo_<%=type_name%>_rms_kernel<<<1,1>>>(data_begin, data_begin + n, (<%=dtype%>*)p2, n);
|
|
102
101
|
} else {
|
|
102
|
+
thrust::device_ptr<dtype> data_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
|
|
103
103
|
cumo_thrust_strided_range<thrust::device_vector<dtype>::iterator> range(data_begin, data_end, s1_idx);
|
|
104
104
|
cumo_<%=type_name%>_rms_kernel<<<1,1>>>(range.begin(), range.end(), (<%=dtype%>*)p2, n);
|
|
105
105
|
}
|
|
@@ -62,7 +62,7 @@ static void
|
|
|
62
62
|
|
|
63
63
|
/*
|
|
64
64
|
Set logarithmic sequence of numbers to self. The sequence is obtained from
|
|
65
|
-
base**(beg+i*step)
|
|
65
|
+
`base**(beg+i*step)`
|
|
66
66
|
where i is 1-dimensional index.
|
|
67
67
|
Applicable classes: DFloat, SFloat, DComplex, SCopmplex.
|
|
68
68
|
|
|
@@ -74,11 +74,12 @@ static void
|
|
|
74
74
|
|
|
75
75
|
@example
|
|
76
76
|
Cumo::DFloat.new(5).logseq(4,-1,2)
|
|
77
|
-
=> Cumo::DFloat#shape=[5]
|
|
78
|
-
|
|
77
|
+
# => Cumo::DFloat#shape=[5]
|
|
78
|
+
# [16, 8, 4, 2, 1]
|
|
79
|
+
|
|
79
80
|
Cumo::DComplex.new(5).logseq(0,1i*Math::PI/3,Math::E)
|
|
80
|
-
=> Cumo::DComplex#shape=[5]
|
|
81
|
-
|
|
81
|
+
# => Cumo::DComplex#shape=[5]
|
|
82
|
+
# [1+7.26156e-310i, 0.5+0.866025i, -0.5+0.866025i, -1+1.22465e-16i, ...]
|
|
82
83
|
*/
|
|
83
84
|
static VALUE
|
|
84
85
|
<%=c_func(-1)%>(int argc, VALUE *args, VALUE self)
|
|
@@ -78,14 +78,13 @@ static void
|
|
|
78
78
|
passing that element and indices along each axis as parameters.
|
|
79
79
|
Creates a new NArray containing the values returned by the block.
|
|
80
80
|
Inplace option is allowed, i.e., `nary.inplace.map` overwrites `nary`.
|
|
81
|
-
|
|
82
81
|
@overload <%=name%>
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
@
|
|
86
|
-
|
|
82
|
+
For a block `{|x,i,j,...| ... }`,
|
|
83
|
+
@yieldparam [Numeric] x an element
|
|
84
|
+
@yieldparam [Integer] i,j,... multitimensional indices
|
|
87
85
|
@return [Cumo::NArray] mapped array
|
|
88
|
-
|
|
86
|
+
@see #map
|
|
87
|
+
@see #each_with_index
|
|
89
88
|
*/
|
|
90
89
|
static VALUE
|
|
91
90
|
<%=c_func(0)%>(VALUE self)
|
|
@@ -40,8 +40,8 @@ static void
|
|
|
40
40
|
<% else %>
|
|
41
41
|
@overload <%=name%>(axis:nil, keepdims:false)
|
|
42
42
|
<% end %>
|
|
43
|
-
@param [Numeric,Array,Range] axis
|
|
44
|
-
@param [TrueClass] keepdims
|
|
43
|
+
@param [Numeric,Array,Range] axis Finds <%=name%> along the axis.
|
|
44
|
+
@param [TrueClass] keepdims If true, the reduced axes are left in the result array as dimensions with size one.
|
|
45
45
|
@return [Cumo::<%=class_name%>] returns <%=name%> of self.
|
|
46
46
|
*/
|
|
47
47
|
|
|
@@ -26,7 +26,7 @@ static void
|
|
|
26
26
|
<% else %>
|
|
27
27
|
@overload <%=name%>(axis:nil, keepdims:false)
|
|
28
28
|
<% end %>
|
|
29
|
-
@param [Numeric,Array,Range] axis
|
|
29
|
+
@param [Numeric,Array,Range] axis Finds min-max along the axis.
|
|
30
30
|
@param [TrueClass] keepdims (keyword) If true, the reduced axes are left in the result array as dimensions with size one.
|
|
31
31
|
@return [Cumo::<%=class_name%>,Cumo::<%=class_name%>] min and max of self.
|
|
32
32
|
*/
|