cumo 0.4.1 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 784e23c2ba8be26ed4a9834766f4a410b6a73fe2b828a696b3f8107bdd187861
4
- data.tar.gz: 9ff0de0b005907b070868cf40548b3b6c883459898e83b252784a7e30dc77058
3
+ metadata.gz: 4886418f8fc62df52cc7b235e755e2d7733221b39d46f2f68e25a1ee49923487
4
+ data.tar.gz: 7476161b95f04d560397d6d2d6abf64bfbf9987a10559ea90e66fa12275075bb
5
5
  SHA512:
6
- metadata.gz: 6cb2f338da7f52501e0d3d6716990a9ddc4165b2aa564b5e22afe98526a151a25474f27081850719fc00e49c0407859af8470070666de0653e4cd74bf0155866
7
- data.tar.gz: 3a4362323eb239376644bfb49857afce658d79f8e7ec786e993502592387caa281c9a626fae01b374f28ab6ae8dcb4bc435c7fe73f97728b3200b9f39409b357
6
+ metadata.gz: 543fc92511dc863926f970ff23d811fc7975d182b74f11053ace504e68a6fd70b607c132a46bdf6f9396131560885d92a91a8c91544e0524d6a3cad12b3cdb64
7
+ data.tar.gz: 610458caead3fef458aba25384ad817ff27e727f01d3452d165332163b93c64b14934b7d9546c942fc60920d63e6fb5e745180c1c69ad6ee0d2f96f3d8a9887b
@@ -1,3 +1,11 @@
1
+ # 0.4.2 (2019-01-11)
2
+
3
+ Fixes:
4
+
5
+ * cond_unary.c: add cudaDeviceSynchronize to avoid bus error
6
+ * index.c: add cudaDeviceSynchronize to avoid bus error
7
+ * cum.c: add cudaDeviceSynchronize to avoid bus error
8
+
1
9
  # 0.4.1 (2019-05-06)
2
10
 
3
11
  Fixes:
@@ -540,29 +540,40 @@ cumo_cuda_cudnn_CreateBNTensorDescriptor(
540
540
  }
541
541
 
542
542
  size_t
543
+ cumo_cuda_cudnn_GetTotalSize(cumo_cuda_cudnn_shape_t *shape)
544
+ {
545
+ size_t i = 0;
546
+ size_t total_size = 1;
547
+ for (i = 0; i < shape->ndim; ++i) {
548
+ total_size *= shape->data[i];
549
+ }
550
+ return total_size;
551
+ }
552
+
553
+ cumo_cuda_cudnn_shape_t
543
554
  cumo_cuda_cudnn_ReduceShape(
544
- size_t *reduced_shape,
545
555
  size_t shape_ndim,
546
556
  size_t *shape,
547
557
  size_t axes_ndim,
548
558
  int *axes,
549
- char keepdims) {
550
- assert(shape_ndim >= axes_ndim);
559
+ char keepdims)
560
+ {
561
+ cumo_cuda_cudnn_shape_t reduced_shape{};
551
562
  size_t i_axis = 0;
552
- size_t i_shape = 0;
563
+ assert(shape_ndim >= axes_ndim);
553
564
  for (size_t i = 0; i < shape_ndim; ++i) {
554
565
  if (i_axis < axes_ndim && i == (size_t)axes[i_axis]) {
555
566
  ++i_axis;
556
567
  if (keepdims) {
557
- reduced_shape[i_shape++] = 1;
568
+ reduced_shape.data[reduced_shape.ndim++] = 1;
558
569
  }
559
570
  } else {
560
- reduced_shape[i_shape++] = shape[i];
571
+ reduced_shape.data[reduced_shape.ndim++] = shape[i];
561
572
  }
562
573
  }
563
574
  assert(i_axis == axes_ndim);
564
- assert(i_shape == shape_ndim - static_cast<int8_t>(!keepdims) * axes_ndim);
565
- return i_shape;
575
+ assert(reduced_shape.ndim == shape_ndim - static_cast<int8_t>(!keepdims) * axes_ndim);
576
+ return reduced_shape;
566
577
  }
567
578
 
568
579
  #if defined(__cplusplus)
@@ -10,8 +10,8 @@ extern "C" {
10
10
  #endif
11
11
  #endif
12
12
 
13
- #define CUMO_VERSION "0.4.1"
14
- #define CUMO_VERSION_CODE 41
13
+ #define CUMO_VERSION "0.4.2"
14
+ #define CUMO_VERSION_CODE 42
15
15
 
16
16
  bool cumo_compatible_mode_enabled_p();
17
17
  bool cumo_show_warning_enabled_p();
@@ -203,9 +203,17 @@ cumo_cuda_cudnn_CreateBNTensorDescriptor(
203
203
  cudnnTensorDescriptor_t x_desc,
204
204
  cudnnBatchNormMode_t mode);
205
205
 
206
+
207
+ typedef struct {
208
+ size_t ndim;
209
+ size_t data[CUMO_NA_MAX_DIMENSION];
210
+ } cumo_cuda_cudnn_shape_t;
211
+
206
212
  size_t
213
+ cumo_cuda_cudnn_GetTotalSize(cumo_cuda_cudnn_shape_t *shape);
214
+
215
+ cumo_cuda_cudnn_shape_t
207
216
  cumo_cuda_cudnn_ReduceShape(
208
- size_t *reduced_shape,
209
217
  size_t shape_ndim,
210
218
  size_t *shape,
211
219
  size_t axes_ndim,
@@ -20,8 +20,8 @@ static VALUE
20
20
  cudnnDataType_t cudnn_dtype = <%= cudnn_dtype %>;
21
21
  cudnnStatus_t status = 0;
22
22
  cudnnHandle_t handle = 0;
23
- dtype coef_alpha = 1;
24
- dtype coef_beta = 0;
23
+ dtype coef_one = 1;
24
+ dtype coef_zero = 0;
25
25
 
26
26
  VALUE x=self, gamma, beta, running_mean, running_var, eps, decay, axis, mean, inv_std, y;
27
27
  VALUE kw_hash = Qnil;
@@ -37,8 +37,8 @@ static VALUE
37
37
  };
38
38
  VALUE opts[] = {Qundef, Qundef, Qundef, Qundef, Qundef, Qundef, Qundef, Qundef};
39
39
 
40
- cumo_narray_t *nx; // , *ngamma, *nbeta;
41
- size_t *x_shape; // *gamma_shape, *beta_shape, reduced_shape[CUMO_NA_MAX_DIMENSION];
40
+ cumo_narray_t *nx;
41
+ size_t *x_shape;
42
42
  size_t x_ndim;
43
43
 
44
44
  VALUE x_cont, gamma_cont, beta_cont;
@@ -92,24 +92,35 @@ static VALUE
92
92
  }
93
93
 
94
94
  CumoGetNArray(x, nx);
95
- // CumoGetNArray(gamma, ngamma);
96
- // CumoGetNArray(beta, nbeta);
97
95
  x_ndim = nx->ndim;
98
96
  x_shape = nx->shape;
99
- // gamma_ndim = ngamma->ndim;
100
- // gamma_shape = ngamma->shape;
101
- // beta_ndim = nbeta->ndim;
102
- // beta_shape = nbeta->shape;
103
-
104
- // TODO: Size check of gammma, beta, running_mean, running_var, mean, inv_std
105
- // are equivalent with either of reduced_shape(keepdims: false) or reduced_shape(keepdims: true)
106
- // reduced_ndim = cumo_cuda_cudnn_ReduceShape(reduced_shape, x_ndim, x_shape, axis_ndim, int_axis, 1);
107
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(reduced_ndim, gamma_ndim);
108
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(reduced_ndim, beta_ndim);
109
- // for (size_t idim = 0; idim < reduced_ndim; ++idim) {
110
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(reduced_shape[idim], gamma_shape[idim]);
111
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(reduced_shape[idim], beta_shape[idim]);
112
- // }
97
+
98
+ {
99
+ cumo_narray_t *ngamma, *nbeta, *nrunning_mean, *nrunning_var, *nmean, *ninv_std;
100
+ cumo_cuda_cudnn_shape_t reduced_shape = cumo_cuda_cudnn_ReduceShape(x_ndim, x_shape, axis_ndim, int_axis, 1);
101
+ size_t reduced_total_size = cumo_cuda_cudnn_GetTotalSize(&reduced_shape);
102
+
103
+ CumoGetNArray(gamma, ngamma);
104
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(ngamma->size, reduced_total_size);
105
+ CumoGetNArray(beta, nbeta);
106
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nbeta->size, reduced_total_size);
107
+ if (running_mean != Qnil) {
108
+ CumoGetNArray(running_mean, nrunning_mean);
109
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nrunning_mean->size, reduced_total_size);
110
+ }
111
+ if (running_var != Qnil) {
112
+ CumoGetNArray(running_var, nrunning_var);
113
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nrunning_var->size, reduced_total_size);
114
+ }
115
+ if (mean != Qnil) {
116
+ CumoGetNArray(mean, nmean);
117
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nmean->size, reduced_total_size);
118
+ }
119
+ if (inv_std != Qnil) {
120
+ CumoGetNArray(inv_std, ninv_std);
121
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(ninv_std->size, reduced_total_size);
122
+ }
123
+ }
113
124
 
114
125
  CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(x, cT);
115
126
  CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(gamma, cT);
@@ -156,8 +167,8 @@ static VALUE
156
167
  status = cudnnBatchNormalizationForwardTraining(
157
168
  handle,
158
169
  mode,
159
- (void*)&coef_alpha,
160
- (void*)&coef_beta,
170
+ (void*)&coef_one,
171
+ (void*)&coef_zero,
161
172
  x_desc,
162
173
  x_cont_ptr,
163
174
  x_desc,
@@ -13,15 +13,15 @@
13
13
  end
14
14
  %>
15
15
 
16
- // gx, ggamma, gbeta = x.batch_normalizatoin_backward(gamma, gy, mean:, inv_std:, eps:, axis:)
16
+ // gx, ggamma, gbeta = x.batch_norm_backward(gamma, gy, mean:, inv_std:, eps:, axis:)
17
17
  static VALUE
18
18
  <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)
19
19
  {
20
20
  cudnnDataType_t cudnn_dtype = <%= cudnn_dtype %>;
21
21
  cudnnStatus_t status = 0;
22
22
  cudnnHandle_t handle = 0;
23
- dtype coef_alpha = 1;
24
- dtype coef_beta = 0;
23
+ dtype coef_one = 1;
24
+ dtype coef_zero = 0;
25
25
 
26
26
  VALUE x=self, gamma, gy, mean, inv_std, eps, axis, gx, ggamma, gbeta;
27
27
  VALUE kw_hash = Qnil;
@@ -36,9 +36,9 @@ static VALUE
36
36
  };
37
37
  VALUE opts[] = {Qundef, Qundef, Qundef, Qundef, Qundef, Qundef, Qundef};
38
38
 
39
- cumo_narray_t *nx, *ngamma; // , *ngy;
40
- size_t *x_shape, *gamma_shape; // , *gy_shape, reduced_shape[CUMO_NA_MAX_DIMENSION];
41
- size_t x_ndim, gamma_ndim; // , gy_ndim, reduced_ndim;
39
+ cumo_narray_t *nx, *ngamma;
40
+ size_t *x_shape, *gamma_shape;
41
+ size_t x_ndim, gamma_ndim;
42
42
 
43
43
  VALUE x_cont, gamma_cont, gy_cont;
44
44
  cudnnTensorDescriptor_t x_desc = 0;
@@ -79,27 +79,29 @@ static VALUE
79
79
 
80
80
  CumoGetNArray(x, nx);
81
81
  CumoGetNArray(gamma, ngamma);
82
- // CumoGetNArray(gy, ngy);
83
82
  x_ndim = nx->ndim;
84
83
  x_shape = nx->shape;
85
84
  gamma_ndim = ngamma->ndim;
86
85
  gamma_shape = ngamma->shape;
87
- // gy_ndim = ngy->ndim;
88
- // gy_shape = ngy->shape;
89
-
90
- // TODO: Size check of gammma, beta, running_mean, running_var, mean, inv_std
91
- // are equivalent with either of reduced_shape(keepdims: false) or reduced_shape(keepdims: true)
92
- // reduced_ndim = cumo_cuda_cudnn_ReduceShape(reduced_shape, x_ndim, x_shape, axis_ndim, int_axis, 1);
93
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(reduced_ndim, gamma_ndim);
94
- // for (size_t idim = 0; idim < reduced_ndim; ++idim) {
95
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(reduced_shape[idim], gamma_shape[idim]);
96
- // }
97
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(x_ndim, gy_ndim);
98
- // for (size_t idim = 0; idim < x_ndim; ++idim) {
99
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(x_shape[idim], gy_shape[idim]);
100
- // }
101
-
102
- // TODO: Add ndim and shape (same with reduced) for mean and inv_std if given
86
+
87
+ {
88
+ cumo_narray_t *ngy, *nmean, *ninv_std;
89
+ cumo_cuda_cudnn_shape_t reduced_shape = cumo_cuda_cudnn_ReduceShape(x_ndim, x_shape, axis_ndim, int_axis, 1);
90
+ size_t reduced_total_size = cumo_cuda_cudnn_GetTotalSize(&reduced_shape);
91
+
92
+ CumoGetNArray(gy, ngy);
93
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nx->size, ngy->size);
94
+
95
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(ngamma->size, reduced_total_size);
96
+ if (mean != Qnil) {
97
+ CumoGetNArray(mean, nmean);
98
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nmean->size, reduced_total_size);
99
+ }
100
+ if (inv_std != Qnil) {
101
+ CumoGetNArray(inv_std, ninv_std);
102
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(ninv_std->size, reduced_total_size);
103
+ }
104
+ }
103
105
 
104
106
  CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(x, cT);
105
107
  CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(gamma, cT);
@@ -142,10 +144,10 @@ static VALUE
142
144
  status = cudnnBatchNormalizationBackward(
143
145
  handle,
144
146
  mode,
145
- (void*)&coef_alpha,
146
- (void*)&coef_beta,
147
- (void*)&coef_alpha,
148
- (void*)&coef_beta,
147
+ (void*)&coef_one,
148
+ (void*)&coef_zero,
149
+ (void*)&coef_one,
150
+ (void*)&coef_zero,
149
151
  x_desc,
150
152
  x_cont_ptr,
151
153
  x_desc,
@@ -13,6 +13,7 @@ static void
13
13
  CUMO_INIT_PTR_IDX(lp, 0, p1, s1, idx1);
14
14
  CUMO_INIT_PTR_BIT(lp, 1, a2, p2, s2);
15
15
  CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
16
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
16
17
  if (idx1) {
17
18
  for (; i--;) {
18
19
  CUMO_GET_DATA_INDEX(p1,idx1,dtype,x);
@@ -12,10 +12,12 @@ static void
12
12
  CUMO_INIT_PTR(lp, 1, p2, s2);
13
13
  //printf("i=%lu p1=%lx s1=%lu p2=%lx s2=%lu\n",i,(size_t)p1,s1,(size_t)p2,s2);
14
14
 
15
+ CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%><%=j%>", "<%=type_name%>");
16
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
17
+
15
18
  CUMO_GET_DATA_STRIDE(p1,s1,dtype,x);
16
19
  CUMO_SET_DATA_STRIDE(p2,s2,dtype,x);
17
20
  //printf("i=%lu x=%f\n",i,x);
18
- CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%><%=j%>", "<%=type_name%>");
19
21
  for (i--; i--;) {
20
22
  CUMO_GET_DATA_STRIDE(p1,s1,dtype,y);
21
23
  m_<%=name%><%=j%>(x,y);
@@ -32,8 +32,8 @@ static VALUE
32
32
  };
33
33
  VALUE opts[] = {Qundef, Qundef, Qundef};
34
34
 
35
- cumo_narray_t *nx; // , *ngamma, *nbeta;
36
- size_t *x_shape; // *gamma_shape, *beta_shape, reduced_shape[CUMO_NA_MAX_DIMENSION];
35
+ cumo_narray_t *nx;
36
+ size_t *x_shape;
37
37
  size_t x_ndim;
38
38
 
39
39
  VALUE x_cont, gamma_cont, beta_cont, mean_cont, var_cont;
@@ -62,24 +62,24 @@ static VALUE
62
62
  }
63
63
 
64
64
  CumoGetNArray(x, nx);
65
- // CumoGetNArray(gamma, ngamma);
66
- // CumoGetNArray(beta, nbeta);
67
65
  x_ndim = nx->ndim;
68
66
  x_shape = nx->shape;
69
- // gamma_ndim = ngamma->ndim;
70
- // gamma_shape = ngamma->shape;
71
- // beta_ndim = nbeta->ndim;
72
- // beta_shape = nbeta->shape;
73
-
74
- // TODO: Size check of gammma, beta, running_mean, running_var, mean, inv_std
75
- // are equivalent with either of reduced_shape(keepdims: false) or reduced_shape(keepdims: true)
76
- // reduced_ndim = cumo_cuda_cudnn_ReduceShape(reduced_shape, x_ndim, x_shape, axis_ndim, int_axis, 1);
77
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(reduced_ndim, gamma_ndim);
78
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(reduced_ndim, beta_ndim);
79
- // for (size_t idim = 0; idim < reduced_ndim; ++idim) {
80
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(reduced_shape[idim], gamma_shape[idim]);
81
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(reduced_shape[idim], beta_shape[idim]);
82
- // }
67
+
68
+ {
69
+ cumo_narray_t *ngamma, *nbeta, *nmean, *nvar;
70
+ cumo_cuda_cudnn_shape_t reduced_shape = cumo_cuda_cudnn_ReduceShape(x_ndim, x_shape, axis_ndim, int_axis, 1);
71
+ size_t reduced_total_size = cumo_cuda_cudnn_GetTotalSize(&reduced_shape);
72
+
73
+ CumoGetNArray(gamma, ngamma);
74
+ CumoGetNArray(beta, nbeta);
75
+ CumoGetNArray(mean, nmean);
76
+ CumoGetNArray(var, nvar);
77
+
78
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(ngamma->size, reduced_total_size);
79
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nbeta->size, reduced_total_size);
80
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nmean->size, reduced_total_size);
81
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nvar->size, reduced_total_size);
82
+ }
83
83
 
84
84
  CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(x, cT);
85
85
  CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(gamma, cT);
@@ -857,6 +857,8 @@ cumo_na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride,
857
857
  x = cumo_na_range_check(idx[i], na->shape[i], i);
858
858
  sdx = nv->stridx[i];
859
859
  if (CUMO_SDX_IS_INDEX(sdx)) {
860
+ CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("index", "cumo_na_get_result_dimension");
861
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
860
862
  pos += CUMO_SDX_GET_INDEX(sdx)[x];
861
863
  } else {
862
864
  pos += CUMO_SDX_GET_STRIDE(sdx)*x;
@@ -872,6 +874,8 @@ cumo_na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride,
872
874
  x = x / s;
873
875
  sdx = nv->stridx[i];
874
876
  if (CUMO_SDX_IS_INDEX(sdx)) {
877
+ CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("index", "cumo_na_get_result_dimension");
878
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
875
879
  pos += CUMO_SDX_GET_INDEX(sdx)[m];
876
880
  } else {
877
881
  pos += CUMO_SDX_GET_STRIDE(sdx)*m;
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cumo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-05-06 00:00:00.000000000 Z
11
+ date: 2019-06-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray