cumo 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/ext/cumo/cuda/cudnn_impl.cpp +19 -8
- data/ext/cumo/include/cumo.h +2 -2
- data/ext/cumo/include/cumo/cuda/cudnn.h +9 -1
- data/ext/cumo/narray/gen/tmpl/batch_norm.c +33 -22
- data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +29 -27
- data/ext/cumo/narray/gen/tmpl/cond_unary.c +1 -0
- data/ext/cumo/narray/gen/tmpl/cum.c +3 -1
- data/ext/cumo/narray/gen/tmpl/fixed_batch_norm.c +18 -18
- data/ext/cumo/narray/index.c +4 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4886418f8fc62df52cc7b235e755e2d7733221b39d46f2f68e25a1ee49923487
|
4
|
+
data.tar.gz: 7476161b95f04d560397d6d2d6abf64bfbf9987a10559ea90e66fa12275075bb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 543fc92511dc863926f970ff23d811fc7975d182b74f11053ace504e68a6fd70b607c132a46bdf6f9396131560885d92a91a8c91544e0524d6a3cad12b3cdb64
|
7
|
+
data.tar.gz: 610458caead3fef458aba25384ad817ff27e727f01d3452d165332163b93c64b14934b7d9546c942fc60920d63e6fb5e745180c1c69ad6ee0d2f96f3d8a9887b
|
data/CHANGELOG.md
CHANGED
@@ -540,29 +540,40 @@ cumo_cuda_cudnn_CreateBNTensorDescriptor(
|
|
540
540
|
}
|
541
541
|
|
542
542
|
size_t
|
543
|
+
cumo_cuda_cudnn_GetTotalSize(cumo_cuda_cudnn_shape_t *shape)
|
544
|
+
{
|
545
|
+
size_t i = 0;
|
546
|
+
size_t total_size = 1;
|
547
|
+
for (i = 0; i < shape->ndim; ++i) {
|
548
|
+
total_size *= shape->data[i];
|
549
|
+
}
|
550
|
+
return total_size;
|
551
|
+
}
|
552
|
+
|
553
|
+
cumo_cuda_cudnn_shape_t
|
543
554
|
cumo_cuda_cudnn_ReduceShape(
|
544
|
-
size_t *reduced_shape,
|
545
555
|
size_t shape_ndim,
|
546
556
|
size_t *shape,
|
547
557
|
size_t axes_ndim,
|
548
558
|
int *axes,
|
549
|
-
char keepdims)
|
550
|
-
|
559
|
+
char keepdims)
|
560
|
+
{
|
561
|
+
cumo_cuda_cudnn_shape_t reduced_shape{};
|
551
562
|
size_t i_axis = 0;
|
552
|
-
|
563
|
+
assert(shape_ndim >= axes_ndim);
|
553
564
|
for (size_t i = 0; i < shape_ndim; ++i) {
|
554
565
|
if (i_axis < axes_ndim && i == (size_t)axes[i_axis]) {
|
555
566
|
++i_axis;
|
556
567
|
if (keepdims) {
|
557
|
-
reduced_shape[
|
568
|
+
reduced_shape.data[reduced_shape.ndim++] = 1;
|
558
569
|
}
|
559
570
|
} else {
|
560
|
-
reduced_shape[
|
571
|
+
reduced_shape.data[reduced_shape.ndim++] = shape[i];
|
561
572
|
}
|
562
573
|
}
|
563
574
|
assert(i_axis == axes_ndim);
|
564
|
-
assert(
|
565
|
-
return
|
575
|
+
assert(reduced_shape.ndim == shape_ndim - static_cast<int8_t>(!keepdims) * axes_ndim);
|
576
|
+
return reduced_shape;
|
566
577
|
}
|
567
578
|
|
568
579
|
#if defined(__cplusplus)
|
data/ext/cumo/include/cumo.h
CHANGED
@@ -203,9 +203,17 @@ cumo_cuda_cudnn_CreateBNTensorDescriptor(
|
|
203
203
|
cudnnTensorDescriptor_t x_desc,
|
204
204
|
cudnnBatchNormMode_t mode);
|
205
205
|
|
206
|
+
|
207
|
+
typedef struct {
|
208
|
+
size_t ndim;
|
209
|
+
size_t data[CUMO_NA_MAX_DIMENSION];
|
210
|
+
} cumo_cuda_cudnn_shape_t;
|
211
|
+
|
206
212
|
size_t
|
213
|
+
cumo_cuda_cudnn_GetTotalSize(cumo_cuda_cudnn_shape_t *shape);
|
214
|
+
|
215
|
+
cumo_cuda_cudnn_shape_t
|
207
216
|
cumo_cuda_cudnn_ReduceShape(
|
208
|
-
size_t *reduced_shape,
|
209
217
|
size_t shape_ndim,
|
210
218
|
size_t *shape,
|
211
219
|
size_t axes_ndim,
|
@@ -20,8 +20,8 @@ static VALUE
|
|
20
20
|
cudnnDataType_t cudnn_dtype = <%= cudnn_dtype %>;
|
21
21
|
cudnnStatus_t status = 0;
|
22
22
|
cudnnHandle_t handle = 0;
|
23
|
-
dtype
|
24
|
-
dtype
|
23
|
+
dtype coef_one = 1;
|
24
|
+
dtype coef_zero = 0;
|
25
25
|
|
26
26
|
VALUE x=self, gamma, beta, running_mean, running_var, eps, decay, axis, mean, inv_std, y;
|
27
27
|
VALUE kw_hash = Qnil;
|
@@ -37,8 +37,8 @@ static VALUE
|
|
37
37
|
};
|
38
38
|
VALUE opts[] = {Qundef, Qundef, Qundef, Qundef, Qundef, Qundef, Qundef, Qundef};
|
39
39
|
|
40
|
-
cumo_narray_t *nx;
|
41
|
-
size_t *x_shape;
|
40
|
+
cumo_narray_t *nx;
|
41
|
+
size_t *x_shape;
|
42
42
|
size_t x_ndim;
|
43
43
|
|
44
44
|
VALUE x_cont, gamma_cont, beta_cont;
|
@@ -92,24 +92,35 @@ static VALUE
|
|
92
92
|
}
|
93
93
|
|
94
94
|
CumoGetNArray(x, nx);
|
95
|
-
// CumoGetNArray(gamma, ngamma);
|
96
|
-
// CumoGetNArray(beta, nbeta);
|
97
95
|
x_ndim = nx->ndim;
|
98
96
|
x_shape = nx->shape;
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
97
|
+
|
98
|
+
{
|
99
|
+
cumo_narray_t *ngamma, *nbeta, *nrunning_mean, *nrunning_var, *nmean, *ninv_std;
|
100
|
+
cumo_cuda_cudnn_shape_t reduced_shape = cumo_cuda_cudnn_ReduceShape(x_ndim, x_shape, axis_ndim, int_axis, 1);
|
101
|
+
size_t reduced_total_size = cumo_cuda_cudnn_GetTotalSize(&reduced_shape);
|
102
|
+
|
103
|
+
CumoGetNArray(gamma, ngamma);
|
104
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(ngamma->size, reduced_total_size);
|
105
|
+
CumoGetNArray(beta, nbeta);
|
106
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nbeta->size, reduced_total_size);
|
107
|
+
if (running_mean != Qnil) {
|
108
|
+
CumoGetNArray(running_mean, nrunning_mean);
|
109
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nrunning_mean->size, reduced_total_size);
|
110
|
+
}
|
111
|
+
if (running_var != Qnil) {
|
112
|
+
CumoGetNArray(running_var, nrunning_var);
|
113
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nrunning_var->size, reduced_total_size);
|
114
|
+
}
|
115
|
+
if (mean != Qnil) {
|
116
|
+
CumoGetNArray(mean, nmean);
|
117
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nmean->size, reduced_total_size);
|
118
|
+
}
|
119
|
+
if (inv_std != Qnil) {
|
120
|
+
CumoGetNArray(inv_std, ninv_std);
|
121
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(ninv_std->size, reduced_total_size);
|
122
|
+
}
|
123
|
+
}
|
113
124
|
|
114
125
|
CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(x, cT);
|
115
126
|
CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(gamma, cT);
|
@@ -156,8 +167,8 @@ static VALUE
|
|
156
167
|
status = cudnnBatchNormalizationForwardTraining(
|
157
168
|
handle,
|
158
169
|
mode,
|
159
|
-
(void*)&
|
160
|
-
(void*)&
|
170
|
+
(void*)&coef_one,
|
171
|
+
(void*)&coef_zero,
|
161
172
|
x_desc,
|
162
173
|
x_cont_ptr,
|
163
174
|
x_desc,
|
@@ -13,15 +13,15 @@
|
|
13
13
|
end
|
14
14
|
%>
|
15
15
|
|
16
|
-
// gx, ggamma, gbeta = x.
|
16
|
+
// gx, ggamma, gbeta = x.batch_norm_backward(gamma, gy, mean:, inv_std:, eps:, axis:)
|
17
17
|
static VALUE
|
18
18
|
<%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)
|
19
19
|
{
|
20
20
|
cudnnDataType_t cudnn_dtype = <%= cudnn_dtype %>;
|
21
21
|
cudnnStatus_t status = 0;
|
22
22
|
cudnnHandle_t handle = 0;
|
23
|
-
dtype
|
24
|
-
dtype
|
23
|
+
dtype coef_one = 1;
|
24
|
+
dtype coef_zero = 0;
|
25
25
|
|
26
26
|
VALUE x=self, gamma, gy, mean, inv_std, eps, axis, gx, ggamma, gbeta;
|
27
27
|
VALUE kw_hash = Qnil;
|
@@ -36,9 +36,9 @@ static VALUE
|
|
36
36
|
};
|
37
37
|
VALUE opts[] = {Qundef, Qundef, Qundef, Qundef, Qundef, Qundef, Qundef};
|
38
38
|
|
39
|
-
cumo_narray_t *nx, *ngamma;
|
40
|
-
size_t *x_shape, *gamma_shape;
|
41
|
-
size_t x_ndim, gamma_ndim;
|
39
|
+
cumo_narray_t *nx, *ngamma;
|
40
|
+
size_t *x_shape, *gamma_shape;
|
41
|
+
size_t x_ndim, gamma_ndim;
|
42
42
|
|
43
43
|
VALUE x_cont, gamma_cont, gy_cont;
|
44
44
|
cudnnTensorDescriptor_t x_desc = 0;
|
@@ -79,27 +79,29 @@ static VALUE
|
|
79
79
|
|
80
80
|
CumoGetNArray(x, nx);
|
81
81
|
CumoGetNArray(gamma, ngamma);
|
82
|
-
// CumoGetNArray(gy, ngy);
|
83
82
|
x_ndim = nx->ndim;
|
84
83
|
x_shape = nx->shape;
|
85
84
|
gamma_ndim = ngamma->ndim;
|
86
85
|
gamma_shape = ngamma->shape;
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
86
|
+
|
87
|
+
{
|
88
|
+
cumo_narray_t *ngy, *nmean, *ninv_std;
|
89
|
+
cumo_cuda_cudnn_shape_t reduced_shape = cumo_cuda_cudnn_ReduceShape(x_ndim, x_shape, axis_ndim, int_axis, 1);
|
90
|
+
size_t reduced_total_size = cumo_cuda_cudnn_GetTotalSize(&reduced_shape);
|
91
|
+
|
92
|
+
CumoGetNArray(gy, ngy);
|
93
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nx->size, ngy->size);
|
94
|
+
|
95
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(ngamma->size, reduced_total_size);
|
96
|
+
if (mean != Qnil) {
|
97
|
+
CumoGetNArray(mean, nmean);
|
98
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nmean->size, reduced_total_size);
|
99
|
+
}
|
100
|
+
if (inv_std != Qnil) {
|
101
|
+
CumoGetNArray(inv_std, ninv_std);
|
102
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(ninv_std->size, reduced_total_size);
|
103
|
+
}
|
104
|
+
}
|
103
105
|
|
104
106
|
CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(x, cT);
|
105
107
|
CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(gamma, cT);
|
@@ -142,10 +144,10 @@ static VALUE
|
|
142
144
|
status = cudnnBatchNormalizationBackward(
|
143
145
|
handle,
|
144
146
|
mode,
|
145
|
-
(void*)&
|
146
|
-
(void*)&
|
147
|
-
(void*)&
|
148
|
-
(void*)&
|
147
|
+
(void*)&coef_one,
|
148
|
+
(void*)&coef_zero,
|
149
|
+
(void*)&coef_one,
|
150
|
+
(void*)&coef_zero,
|
149
151
|
x_desc,
|
150
152
|
x_cont_ptr,
|
151
153
|
x_desc,
|
@@ -13,6 +13,7 @@ static void
|
|
13
13
|
CUMO_INIT_PTR_IDX(lp, 0, p1, s1, idx1);
|
14
14
|
CUMO_INIT_PTR_BIT(lp, 1, a2, p2, s2);
|
15
15
|
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
|
16
|
+
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
16
17
|
if (idx1) {
|
17
18
|
for (; i--;) {
|
18
19
|
CUMO_GET_DATA_INDEX(p1,idx1,dtype,x);
|
@@ -12,10 +12,12 @@ static void
|
|
12
12
|
CUMO_INIT_PTR(lp, 1, p2, s2);
|
13
13
|
//printf("i=%lu p1=%lx s1=%lu p2=%lx s2=%lu\n",i,(size_t)p1,s1,(size_t)p2,s2);
|
14
14
|
|
15
|
+
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%><%=j%>", "<%=type_name%>");
|
16
|
+
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
17
|
+
|
15
18
|
CUMO_GET_DATA_STRIDE(p1,s1,dtype,x);
|
16
19
|
CUMO_SET_DATA_STRIDE(p2,s2,dtype,x);
|
17
20
|
//printf("i=%lu x=%f\n",i,x);
|
18
|
-
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%><%=j%>", "<%=type_name%>");
|
19
21
|
for (i--; i--;) {
|
20
22
|
CUMO_GET_DATA_STRIDE(p1,s1,dtype,y);
|
21
23
|
m_<%=name%><%=j%>(x,y);
|
@@ -32,8 +32,8 @@ static VALUE
|
|
32
32
|
};
|
33
33
|
VALUE opts[] = {Qundef, Qundef, Qundef};
|
34
34
|
|
35
|
-
cumo_narray_t *nx;
|
36
|
-
size_t *x_shape;
|
35
|
+
cumo_narray_t *nx;
|
36
|
+
size_t *x_shape;
|
37
37
|
size_t x_ndim;
|
38
38
|
|
39
39
|
VALUE x_cont, gamma_cont, beta_cont, mean_cont, var_cont;
|
@@ -62,24 +62,24 @@ static VALUE
|
|
62
62
|
}
|
63
63
|
|
64
64
|
CumoGetNArray(x, nx);
|
65
|
-
// CumoGetNArray(gamma, ngamma);
|
66
|
-
// CumoGetNArray(beta, nbeta);
|
67
65
|
x_ndim = nx->ndim;
|
68
66
|
x_shape = nx->shape;
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
67
|
+
|
68
|
+
{
|
69
|
+
cumo_narray_t *ngamma, *nbeta, *nmean, *nvar;
|
70
|
+
cumo_cuda_cudnn_shape_t reduced_shape = cumo_cuda_cudnn_ReduceShape(x_ndim, x_shape, axis_ndim, int_axis, 1);
|
71
|
+
size_t reduced_total_size = cumo_cuda_cudnn_GetTotalSize(&reduced_shape);
|
72
|
+
|
73
|
+
CumoGetNArray(gamma, ngamma);
|
74
|
+
CumoGetNArray(beta, nbeta);
|
75
|
+
CumoGetNArray(mean, nmean);
|
76
|
+
CumoGetNArray(var, nvar);
|
77
|
+
|
78
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(ngamma->size, reduced_total_size);
|
79
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nbeta->size, reduced_total_size);
|
80
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nmean->size, reduced_total_size);
|
81
|
+
CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nvar->size, reduced_total_size);
|
82
|
+
}
|
83
83
|
|
84
84
|
CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(x, cT);
|
85
85
|
CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(gamma, cT);
|
data/ext/cumo/narray/index.c
CHANGED
@@ -857,6 +857,8 @@ cumo_na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride,
|
|
857
857
|
x = cumo_na_range_check(idx[i], na->shape[i], i);
|
858
858
|
sdx = nv->stridx[i];
|
859
859
|
if (CUMO_SDX_IS_INDEX(sdx)) {
|
860
|
+
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("index", "cumo_na_get_result_dimension");
|
861
|
+
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
860
862
|
pos += CUMO_SDX_GET_INDEX(sdx)[x];
|
861
863
|
} else {
|
862
864
|
pos += CUMO_SDX_GET_STRIDE(sdx)*x;
|
@@ -872,6 +874,8 @@ cumo_na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride,
|
|
872
874
|
x = x / s;
|
873
875
|
sdx = nv->stridx[i];
|
874
876
|
if (CUMO_SDX_IS_INDEX(sdx)) {
|
877
|
+
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("index", "cumo_na_get_result_dimension");
|
878
|
+
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
875
879
|
pos += CUMO_SDX_GET_INDEX(sdx)[m];
|
876
880
|
} else {
|
877
881
|
pos += CUMO_SDX_GET_STRIDE(sdx)*m;
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cumo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-06-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|