cumo 0.4.1 → 0.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/ext/cumo/cuda/cudnn_impl.cpp +19 -8
- data/ext/cumo/include/cumo.h +2 -2
- data/ext/cumo/include/cumo/cuda/cudnn.h +9 -1
- data/ext/cumo/narray/gen/tmpl/batch_norm.c +33 -22
- data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +29 -27
- data/ext/cumo/narray/gen/tmpl/cond_unary.c +1 -0
- data/ext/cumo/narray/gen/tmpl/cum.c +3 -1
- data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +18 -18
- data/ext/cumo/narray/index.c +4 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4886418f8fc62df52cc7b235e755e2d7733221b39d46f2f68e25a1ee49923487
|
4
|
+
data.tar.gz: 7476161b95f04d560397d6d2d6abf64bfbf9987a10559ea90e66fa12275075bb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 543fc92511dc863926f970ff23d811fc7975d182b74f11053ace504e68a6fd70b607c132a46bdf6f9396131560885d92a91a8c91544e0524d6a3cad12b3cdb64
|
7
|
+
data.tar.gz: 610458caead3fef458aba25384ad817ff27e727f01d3452d165332163b93c64b14934b7d9546c942fc60920d63e6fb5e745180c1c69ad6ee0d2f96f3d8a9887b
|
data/CHANGELOG.md
CHANGED
@@ -540,29 +540,40 @@ cumo_cuda_cudnn_CreateBNTensorDescriptor(
|
|
540
540
|
}
|
541
541
|
|
542
542
|
size_t
|
543
|
+
cumo_cuda_cudnn_GetTotalSize(cumo_cuda_cudnn_shape_t *shape)
|
544
|
+
{
|
545
|
+
size_t i = 0;
|
546
|
+
size_t total_size = 1;
|
547
|
+
for (i = 0; i < shape->ndim; ++i) {
|
548
|
+
total_size *= shape->data[i];
|
549
|
+
}
|
550
|
+
return total_size;
|
551
|
+
}
|
552
|
+
|
553
|
+
cumo_cuda_cudnn_shape_t
|
543
554
|
cumo_cuda_cudnn_ReduceShape(
|
544
|
-
size_t *reduced_shape,
|
545
555
|
size_t shape_ndim,
|
546
556
|
size_t *shape,
|
547
557
|
size_t axes_ndim,
|
548
558
|
int *axes,
|
549
|
-
char keepdims)
|
550
|
-
|
559
|
+
char keepdims)
|
560
|
+
{
|
561
|
+
cumo_cuda_cudnn_shape_t reduced_shape{};
|
551
562
|
size_t i_axis = 0;
|
552
|
-
|
563
|
+
assert(shape_ndim >= axes_ndim);
|
553
564
|
for (size_t i = 0; i < shape_ndim; ++i) {
|
554
565
|
if (i_axis < axes_ndim && i == (size_t)axes[i_axis]) {
|
555
566
|
++i_axis;
|
556
567
|
if (keepdims) {
|
557
|
-
reduced_shape[
|
568
|
+
reduced_shape.data[reduced_shape.ndim++] = 1;
|
558
569
|
}
|
559
570
|
} else {
|
560
|
-
reduced_shape[
|
571
|
+
reduced_shape.data[reduced_shape.ndim++] = shape[i];
|
561
572
|
}
|
562
573
|
}
|
563
574
|
assert(i_axis == axes_ndim);
|
564
|
-
assert(
|
565
|
-
return
|
575
|
+
assert(reduced_shape.ndim == shape_ndim - static_cast<int8_t>(!keepdims) * axes_ndim);
|
576
|
+
return reduced_shape;
|
566
577
|
}
|
567
578
|
|
568
579
|
#if defined(__cplusplus)
|
data/ext/cumo/include/cumo.h
CHANGED
@@ -203,9 +203,17 @@ cumo_cuda_cudnn_CreateBNTensorDescriptor(
|
|
203
203
|
cudnnTensorDescriptor_t x_desc,
|
204
204
|
cudnnBatchNormMode_t mode);
|
205
205
|
|
206
|
+
|
207
|
+
typedef struct {
|
208
|
+
size_t ndim;
|
209
|
+
size_t data[CUMO_NA_MAX_DIMENSION];
|
210
|
+
} cumo_cuda_cudnn_shape_t;
|
211
|
+
|
206
212
|
size_t
|
213
|
+
cumo_cuda_cudnn_GetTotalSize(cumo_cuda_cudnn_shape_t *shape);
|
214
|
+
|
215
|
+
cumo_cuda_cudnn_shape_t
|
207
216
|
cumo_cuda_cudnn_ReduceShape(
|
208
|
-
size_t *reduced_shape,
|
209
217
|
size_t shape_ndim,
|
210
218
|
size_t *shape,
|
211
219
|
size_t axes_ndim,
|
@@ -20,8 +20,8 @@ static VALUE
|
|
20
20
|
cudnnDataType_t cudnn_dtype = <%= cudnn_dtype %>;
|
21
21
|
cudnnStatus_t status = 0;
|
22
22
|
cudnnHandle_t handle = 0;
|
23
|
-
dtype
|
24
|
-
dtype
|
23
|
+
dtype coef_one = 1;
|
24
|
+
dtype coef_zero = 0;
|
25
25
|
|
26
26
|
VALUE x=self, gamma, beta, running_mean, running_var, eps, decay, axis, mean, inv_std, y;
|
27
27
|
VALUE kw_hash = Qnil;
|
@@ -37,8 +37,8 @@ static VALUE
|
|
37
37
|
};
|
38
38
|
VALUE opts[] = {Qundef, Qundef, Qundef, Qundef, Qundef, Qundef, Qundef, Qundef};
|
39
39
|
|
40
|
-
cumo_narray_t *nx;
|
41
|
-
size_t *x_shape;
|
40
|
+
cumo_narray_t *nx;
|
41
|
+
size_t *x_shape;
|
42
42
|
size_t x_ndim;
|
43
43
|
|
44
44
|
VALUE x_cont, gamma_cont, beta_cont;
|
@@ -92,24 +92,35 @@ static VALUE
|
|
92
92
|
}
|
93
93
|
|
94
94
|
CumoGetNArray(x, nx);
|
95
|
-
// CumoGetNArray(gamma, ngamma);
|
96
|
-
// CumoGetNArray(beta, nbeta);
|
97
95
|
x_ndim = nx->ndim;
|
98
96
|
x_shape = nx->shape;
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
97
|
+
|
98
|
+
{
|
99
|
+
cumo_narray_t *ngamma, *nbeta, *nrunning_mean, *nrunning_var, *nmean, *ninv_std;
|
100
|
+
cumo_cuda_cudnn_shape_t reduced_shape = cumo_cuda_cudnn_ReduceShape(x_ndim, x_shape, axis_ndim, int_axis, 1);
|
101
|
+
size_t reduced_total_size = cumo_cuda_cudnn_GetTotalSize(&reduced_shape);
|
102
|
+
|
103
|
+
CumoGetNArray(gamma, ngamma);
|
104
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(ngamma->size, reduced_total_size);
|
105
|
+
CumoGetNArray(beta, nbeta);
|
106
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nbeta->size, reduced_total_size);
|
107
|
+
if (running_mean != Qnil) {
|
108
|
+
CumoGetNArray(running_mean, nrunning_mean);
|
109
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nrunning_mean->size, reduced_total_size);
|
110
|
+
}
|
111
|
+
if (running_var != Qnil) {
|
112
|
+
CumoGetNArray(running_var, nrunning_var);
|
113
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nrunning_var->size, reduced_total_size);
|
114
|
+
}
|
115
|
+
if (mean != Qnil) {
|
116
|
+
CumoGetNArray(mean, nmean);
|
117
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nmean->size, reduced_total_size);
|
118
|
+
}
|
119
|
+
if (inv_std != Qnil) {
|
120
|
+
CumoGetNArray(inv_std, ninv_std);
|
121
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(ninv_std->size, reduced_total_size);
|
122
|
+
}
|
123
|
+
}
|
113
124
|
|
114
125
|
CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(x, cT);
|
115
126
|
CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(gamma, cT);
|
@@ -156,8 +167,8 @@ static VALUE
|
|
156
167
|
status = cudnnBatchNormalizationForwardTraining(
|
157
168
|
handle,
|
158
169
|
mode,
|
159
|
-
(void*)&
|
160
|
-
(void*)&
|
170
|
+
(void*)&coef_one,
|
171
|
+
(void*)&coef_zero,
|
161
172
|
x_desc,
|
162
173
|
x_cont_ptr,
|
163
174
|
x_desc,
|
@@ -13,15 +13,15 @@
|
|
13
13
|
end
|
14
14
|
%>
|
15
15
|
|
16
|
-
// gx, ggamma, gbeta = x.
|
16
|
+
// gx, ggamma, gbeta = x.batch_norm_backward(gamma, gy, mean:, inv_std:, eps:, axis:)
|
17
17
|
static VALUE
|
18
18
|
<%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)
|
19
19
|
{
|
20
20
|
cudnnDataType_t cudnn_dtype = <%= cudnn_dtype %>;
|
21
21
|
cudnnStatus_t status = 0;
|
22
22
|
cudnnHandle_t handle = 0;
|
23
|
-
dtype
|
24
|
-
dtype
|
23
|
+
dtype coef_one = 1;
|
24
|
+
dtype coef_zero = 0;
|
25
25
|
|
26
26
|
VALUE x=self, gamma, gy, mean, inv_std, eps, axis, gx, ggamma, gbeta;
|
27
27
|
VALUE kw_hash = Qnil;
|
@@ -36,9 +36,9 @@ static VALUE
|
|
36
36
|
};
|
37
37
|
VALUE opts[] = {Qundef, Qundef, Qundef, Qundef, Qundef, Qundef, Qundef};
|
38
38
|
|
39
|
-
cumo_narray_t *nx, *ngamma;
|
40
|
-
size_t *x_shape, *gamma_shape;
|
41
|
-
size_t x_ndim, gamma_ndim;
|
39
|
+
cumo_narray_t *nx, *ngamma;
|
40
|
+
size_t *x_shape, *gamma_shape;
|
41
|
+
size_t x_ndim, gamma_ndim;
|
42
42
|
|
43
43
|
VALUE x_cont, gamma_cont, gy_cont;
|
44
44
|
cudnnTensorDescriptor_t x_desc = 0;
|
@@ -79,27 +79,29 @@ static VALUE
|
|
79
79
|
|
80
80
|
CumoGetNArray(x, nx);
|
81
81
|
CumoGetNArray(gamma, ngamma);
|
82
|
-
// CumoGetNArray(gy, ngy);
|
83
82
|
x_ndim = nx->ndim;
|
84
83
|
x_shape = nx->shape;
|
85
84
|
gamma_ndim = ngamma->ndim;
|
86
85
|
gamma_shape = ngamma->shape;
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
86
|
+
|
87
|
+
{
|
88
|
+
cumo_narray_t *ngy, *nmean, *ninv_std;
|
89
|
+
cumo_cuda_cudnn_shape_t reduced_shape = cumo_cuda_cudnn_ReduceShape(x_ndim, x_shape, axis_ndim, int_axis, 1);
|
90
|
+
size_t reduced_total_size = cumo_cuda_cudnn_GetTotalSize(&reduced_shape);
|
91
|
+
|
92
|
+
CumoGetNArray(gy, ngy);
|
93
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nx->size, ngy->size);
|
94
|
+
|
95
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(ngamma->size, reduced_total_size);
|
96
|
+
if (mean != Qnil) {
|
97
|
+
CumoGetNArray(mean, nmean);
|
98
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nmean->size, reduced_total_size);
|
99
|
+
}
|
100
|
+
if (inv_std != Qnil) {
|
101
|
+
CumoGetNArray(inv_std, ninv_std);
|
102
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(ninv_std->size, reduced_total_size);
|
103
|
+
}
|
104
|
+
}
|
103
105
|
|
104
106
|
CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(x, cT);
|
105
107
|
CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(gamma, cT);
|
@@ -142,10 +144,10 @@ static VALUE
|
|
142
144
|
status = cudnnBatchNormalizationBackward(
|
143
145
|
handle,
|
144
146
|
mode,
|
145
|
-
(void*)&
|
146
|
-
(void*)&
|
147
|
-
(void*)&
|
148
|
-
(void*)&
|
147
|
+
(void*)&coef_one,
|
148
|
+
(void*)&coef_zero,
|
149
|
+
(void*)&coef_one,
|
150
|
+
(void*)&coef_zero,
|
149
151
|
x_desc,
|
150
152
|
x_cont_ptr,
|
151
153
|
x_desc,
|
@@ -13,6 +13,7 @@ static void
|
|
13
13
|
CUMO_INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
14
14
|
CUMO_INIT_PTR_BIT(lp, 1, a2, p2, s2);
|
15
15
|
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
|
16
|
+
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
16
17
|
if (idx1) {
|
17
18
|
for (; i--;) {
|
18
19
|
CUMO_GET_DATA_INDEX(p1,idx1,dtype,x);
|
@@ -12,10 +12,12 @@ static void
|
|
12
12
|
CUMO_INIT_PTR(lp, 1, p2, s2);
|
13
13
|
//printf("i=%lu p1=%lx s1=%lu p2=%lx s2=%lu\n",i,(size_t)p1,s1,(size_t)p2,s2);
|
14
14
|
|
15
|
+
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%><%=j%>", "<%=type_name%>");
|
16
|
+
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
17
|
+
|
15
18
|
CUMO_GET_DATA_STRIDE(p1,s1,dtype,x);
|
16
19
|
CUMO_SET_DATA_STRIDE(p2,s2,dtype,x);
|
17
20
|
//printf("i=%lu x=%f\n",i,x);
|
18
|
-
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%><%=j%>", "<%=type_name%>");
|
19
21
|
for (i--; i--;) {
|
20
22
|
CUMO_GET_DATA_STRIDE(p1,s1,dtype,y);
|
21
23
|
m_<%=name%><%=j%>(x,y);
|
@@ -32,8 +32,8 @@ static VALUE
|
|
32
32
|
};
|
33
33
|
VALUE opts[] = {Qundef, Qundef, Qundef};
|
34
34
|
|
35
|
-
cumo_narray_t *nx;
|
36
|
-
size_t *x_shape;
|
35
|
+
cumo_narray_t *nx;
|
36
|
+
size_t *x_shape;
|
37
37
|
size_t x_ndim;
|
38
38
|
|
39
39
|
VALUE x_cont, gamma_cont, beta_cont, mean_cont, var_cont;
|
@@ -62,24 +62,24 @@ static VALUE
|
|
62
62
|
}
|
63
63
|
|
64
64
|
CumoGetNArray(x, nx);
|
65
|
-
// CumoGetNArray(gamma, ngamma);
|
66
|
-
// CumoGetNArray(beta, nbeta);
|
67
65
|
x_ndim = nx->ndim;
|
68
66
|
x_shape = nx->shape;
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
67
|
+
|
68
|
+
{
|
69
|
+
cumo_narray_t *ngamma, *nbeta, *nmean, *nvar;
|
70
|
+
cumo_cuda_cudnn_shape_t reduced_shape = cumo_cuda_cudnn_ReduceShape(x_ndim, x_shape, axis_ndim, int_axis, 1);
|
71
|
+
size_t reduced_total_size = cumo_cuda_cudnn_GetTotalSize(&reduced_shape);
|
72
|
+
|
73
|
+
CumoGetNArray(gamma, ngamma);
|
74
|
+
CumoGetNArray(beta, nbeta);
|
75
|
+
CumoGetNArray(mean, nmean);
|
76
|
+
CumoGetNArray(var, nvar);
|
77
|
+
|
78
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(ngamma->size, reduced_total_size);
|
79
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nbeta->size, reduced_total_size);
|
80
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nmean->size, reduced_total_size);
|
81
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nvar->size, reduced_total_size);
|
82
|
+
}
|
83
83
|
|
84
84
|
CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(x, cT);
|
85
85
|
CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(gamma, cT);
|
data/ext/cumo/narray/index.c
CHANGED
@@ -857,6 +857,8 @@ cumo_na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride,
|
|
857
857
|
x = cumo_na_range_check(idx[i], na->shape[i], i);
|
858
858
|
sdx = nv->stridx[i];
|
859
859
|
if (CUMO_SDX_IS_INDEX(sdx)) {
|
860
|
+
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("index", "cumo_na_get_result_dimension");
|
861
|
+
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
860
862
|
pos += CUMO_SDX_GET_INDEX(sdx)[x];
|
861
863
|
} else {
|
862
864
|
pos += CUMO_SDX_GET_STRIDE(sdx)*x;
|
@@ -872,6 +874,8 @@ cumo_na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride,
|
|
872
874
|
x = x / s;
|
873
875
|
sdx = nv->stridx[i];
|
874
876
|
if (CUMO_SDX_IS_INDEX(sdx)) {
|
877
|
+
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("index", "cumo_na_get_result_dimension");
|
878
|
+
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
875
879
|
pos += CUMO_SDX_GET_INDEX(sdx)[m];
|
876
880
|
} else {
|
877
881
|
pos += CUMO_SDX_GET_STRIDE(sdx)*m;
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cumo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-06-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|