cumo 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 784e23c2ba8be26ed4a9834766f4a410b6a73fe2b828a696b3f8107bdd187861
4
- data.tar.gz: 9ff0de0b005907b070868cf40548b3b6c883459898e83b252784a7e30dc77058
3
+ metadata.gz: 4886418f8fc62df52cc7b235e755e2d7733221b39d46f2f68e25a1ee49923487
4
+ data.tar.gz: 7476161b95f04d560397d6d2d6abf64bfbf9987a10559ea90e66fa12275075bb
5
5
  SHA512:
6
- metadata.gz: 6cb2f338da7f52501e0d3d6716990a9ddc4165b2aa564b5e22afe98526a151a25474f27081850719fc00e49c0407859af8470070666de0653e4cd74bf0155866
7
- data.tar.gz: 3a4362323eb239376644bfb49857afce658d79f8e7ec786e993502592387caa281c9a626fae01b374f28ab6ae8dcb4bc435c7fe73f97728b3200b9f39409b357
6
+ metadata.gz: 543fc92511dc863926f970ff23d811fc7975d182b74f11053ace504e68a6fd70b607c132a46bdf6f9396131560885d92a91a8c91544e0524d6a3cad12b3cdb64
7
+ data.tar.gz: 610458caead3fef458aba25384ad817ff27e727f01d3452d165332163b93c64b14934b7d9546c942fc60920d63e6fb5e745180c1c69ad6ee0d2f96f3d8a9887b
@@ -1,3 +1,11 @@
1
+ # 0.4.2 (2019-01-11)
2
+
3
+ Fixes:
4
+
5
+ * cond_unary.c: add cudaDeviceSynchronize to avoid bus error
6
+ * index.c: add cudaDeviceSynchronize to avoid bus error
7
+ * cum.c: add cudaDeviceSynchronize to avoid bus error
8
+
1
9
  # 0.4.1 (2019-05-06)
2
10
 
3
11
  Fixes:
@@ -540,29 +540,40 @@ cumo_cuda_cudnn_CreateBNTensorDescriptor(
540
540
  }
541
541
 
542
542
  size_t
543
+ cumo_cuda_cudnn_GetTotalSize(cumo_cuda_cudnn_shape_t *shape)
544
+ {
545
+ size_t i = 0;
546
+ size_t total_size = 1;
547
+ for (i = 0; i < shape->ndim; ++i) {
548
+ total_size *= shape->data[i];
549
+ }
550
+ return total_size;
551
+ }
552
+
553
+ cumo_cuda_cudnn_shape_t
543
554
  cumo_cuda_cudnn_ReduceShape(
544
- size_t *reduced_shape,
545
555
  size_t shape_ndim,
546
556
  size_t *shape,
547
557
  size_t axes_ndim,
548
558
  int *axes,
549
- char keepdims) {
550
- assert(shape_ndim >= axes_ndim);
559
+ char keepdims)
560
+ {
561
+ cumo_cuda_cudnn_shape_t reduced_shape{};
551
562
  size_t i_axis = 0;
552
- size_t i_shape = 0;
563
+ assert(shape_ndim >= axes_ndim);
553
564
  for (size_t i = 0; i < shape_ndim; ++i) {
554
565
  if (i_axis < axes_ndim && i == (size_t)axes[i_axis]) {
555
566
  ++i_axis;
556
567
  if (keepdims) {
557
- reduced_shape[i_shape++] = 1;
568
+ reduced_shape.data[reduced_shape.ndim++] = 1;
558
569
  }
559
570
  } else {
560
- reduced_shape[i_shape++] = shape[i];
571
+ reduced_shape.data[reduced_shape.ndim++] = shape[i];
561
572
  }
562
573
  }
563
574
  assert(i_axis == axes_ndim);
564
- assert(i_shape == shape_ndim - static_cast<int8_t>(!keepdims) * axes_ndim);
565
- return i_shape;
575
+ assert(reduced_shape.ndim == shape_ndim - static_cast<int8_t>(!keepdims) * axes_ndim);
576
+ return reduced_shape;
566
577
  }
567
578
 
568
579
  #if defined(__cplusplus)
@@ -10,8 +10,8 @@ extern "C" {
10
10
  #endif
11
11
  #endif
12
12
 
13
- #define CUMO_VERSION "0.4.1"
14
- #define CUMO_VERSION_CODE 41
13
+ #define CUMO_VERSION "0.4.2"
14
+ #define CUMO_VERSION_CODE 42
15
15
 
16
16
  bool cumo_compatible_mode_enabled_p();
17
17
  bool cumo_show_warning_enabled_p();
@@ -203,9 +203,17 @@ cumo_cuda_cudnn_CreateBNTensorDescriptor(
203
203
  cudnnTensorDescriptor_t x_desc,
204
204
  cudnnBatchNormMode_t mode);
205
205
 
206
+
207
+ typedef struct {
208
+ size_t ndim;
209
+ size_t data[CUMO_NA_MAX_DIMENSION];
210
+ } cumo_cuda_cudnn_shape_t;
211
+
206
212
  size_t
213
+ cumo_cuda_cudnn_GetTotalSize(cumo_cuda_cudnn_shape_t *shape);
214
+
215
+ cumo_cuda_cudnn_shape_t
207
216
  cumo_cuda_cudnn_ReduceShape(
208
- size_t *reduced_shape,
209
217
  size_t shape_ndim,
210
218
  size_t *shape,
211
219
  size_t axes_ndim,
@@ -20,8 +20,8 @@ static VALUE
20
20
  cudnnDataType_t cudnn_dtype = <%= cudnn_dtype %>;
21
21
  cudnnStatus_t status = 0;
22
22
  cudnnHandle_t handle = 0;
23
- dtype coef_alpha = 1;
24
- dtype coef_beta = 0;
23
+ dtype coef_one = 1;
24
+ dtype coef_zero = 0;
25
25
 
26
26
  VALUE x=self, gamma, beta, running_mean, running_var, eps, decay, axis, mean, inv_std, y;
27
27
  VALUE kw_hash = Qnil;
@@ -37,8 +37,8 @@ static VALUE
37
37
  };
38
38
  VALUE opts[] = {Qundef, Qundef, Qundef, Qundef, Qundef, Qundef, Qundef, Qundef};
39
39
 
40
- cumo_narray_t *nx; // , *ngamma, *nbeta;
41
- size_t *x_shape; // *gamma_shape, *beta_shape, reduced_shape[CUMO_NA_MAX_DIMENSION];
40
+ cumo_narray_t *nx;
41
+ size_t *x_shape;
42
42
  size_t x_ndim;
43
43
 
44
44
  VALUE x_cont, gamma_cont, beta_cont;
@@ -92,24 +92,35 @@ static VALUE
92
92
  }
93
93
 
94
94
  CumoGetNArray(x, nx);
95
- // CumoGetNArray(gamma, ngamma);
96
- // CumoGetNArray(beta, nbeta);
97
95
  x_ndim = nx->ndim;
98
96
  x_shape = nx->shape;
99
- // gamma_ndim = ngamma->ndim;
100
- // gamma_shape = ngamma->shape;
101
- // beta_ndim = nbeta->ndim;
102
- // beta_shape = nbeta->shape;
103
-
104
- // TODO: Size check of gammma, beta, running_mean, running_var, mean, inv_std
105
- // are equivalent with either of reduced_shape(keepdims: false) or reduced_shape(keepdims: true)
106
- // reduced_ndim = cumo_cuda_cudnn_ReduceShape(reduced_shape, x_ndim, x_shape, axis_ndim, int_axis, 1);
107
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(reduced_ndim, gamma_ndim);
108
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(reduced_ndim, beta_ndim);
109
- // for (size_t idim = 0; idim < reduced_ndim; ++idim) {
110
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(reduced_shape[idim], gamma_shape[idim]);
111
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(reduced_shape[idim], beta_shape[idim]);
112
- // }
97
+
98
+ {
99
+ cumo_narray_t *ngamma, *nbeta, *nrunning_mean, *nrunning_var, *nmean, *ninv_std;
100
+ cumo_cuda_cudnn_shape_t reduced_shape = cumo_cuda_cudnn_ReduceShape(x_ndim, x_shape, axis_ndim, int_axis, 1);
101
+ size_t reduced_total_size = cumo_cuda_cudnn_GetTotalSize(&reduced_shape);
102
+
103
+ CumoGetNArray(gamma, ngamma);
104
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(ngamma->size, reduced_total_size);
105
+ CumoGetNArray(beta, nbeta);
106
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nbeta->size, reduced_total_size);
107
+ if (running_mean != Qnil) {
108
+ CumoGetNArray(running_mean, nrunning_mean);
109
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nrunning_mean->size, reduced_total_size);
110
+ }
111
+ if (running_var != Qnil) {
112
+ CumoGetNArray(running_var, nrunning_var);
113
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nrunning_var->size, reduced_total_size);
114
+ }
115
+ if (mean != Qnil) {
116
+ CumoGetNArray(mean, nmean);
117
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nmean->size, reduced_total_size);
118
+ }
119
+ if (inv_std != Qnil) {
120
+ CumoGetNArray(inv_std, ninv_std);
121
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(ninv_std->size, reduced_total_size);
122
+ }
123
+ }
113
124
 
114
125
  CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(x, cT);
115
126
  CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(gamma, cT);
@@ -156,8 +167,8 @@ static VALUE
156
167
  status = cudnnBatchNormalizationForwardTraining(
157
168
  handle,
158
169
  mode,
159
- (void*)&coef_alpha,
160
- (void*)&coef_beta,
170
+ (void*)&coef_one,
171
+ (void*)&coef_zero,
161
172
  x_desc,
162
173
  x_cont_ptr,
163
174
  x_desc,
@@ -13,15 +13,15 @@
13
13
  end
14
14
  %>
15
15
 
16
- // gx, ggamma, gbeta = x.batch_normalizatoin_backward(gamma, gy, mean:, inv_std:, eps:, axis:)
16
+ // gx, ggamma, gbeta = x.batch_norm_backward(gamma, gy, mean:, inv_std:, eps:, axis:)
17
17
  static VALUE
18
18
  <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self)
19
19
  {
20
20
  cudnnDataType_t cudnn_dtype = <%= cudnn_dtype %>;
21
21
  cudnnStatus_t status = 0;
22
22
  cudnnHandle_t handle = 0;
23
- dtype coef_alpha = 1;
24
- dtype coef_beta = 0;
23
+ dtype coef_one = 1;
24
+ dtype coef_zero = 0;
25
25
 
26
26
  VALUE x=self, gamma, gy, mean, inv_std, eps, axis, gx, ggamma, gbeta;
27
27
  VALUE kw_hash = Qnil;
@@ -36,9 +36,9 @@ static VALUE
36
36
  };
37
37
  VALUE opts[] = {Qundef, Qundef, Qundef, Qundef, Qundef, Qundef, Qundef};
38
38
 
39
- cumo_narray_t *nx, *ngamma; // , *ngy;
40
- size_t *x_shape, *gamma_shape; // , *gy_shape, reduced_shape[CUMO_NA_MAX_DIMENSION];
41
- size_t x_ndim, gamma_ndim; // , gy_ndim, reduced_ndim;
39
+ cumo_narray_t *nx, *ngamma;
40
+ size_t *x_shape, *gamma_shape;
41
+ size_t x_ndim, gamma_ndim;
42
42
 
43
43
  VALUE x_cont, gamma_cont, gy_cont;
44
44
  cudnnTensorDescriptor_t x_desc = 0;
@@ -79,27 +79,29 @@ static VALUE
79
79
 
80
80
  CumoGetNArray(x, nx);
81
81
  CumoGetNArray(gamma, ngamma);
82
- // CumoGetNArray(gy, ngy);
83
82
  x_ndim = nx->ndim;
84
83
  x_shape = nx->shape;
85
84
  gamma_ndim = ngamma->ndim;
86
85
  gamma_shape = ngamma->shape;
87
- // gy_ndim = ngy->ndim;
88
- // gy_shape = ngy->shape;
89
-
90
- // TODO: Size check of gammma, beta, running_mean, running_var, mean, inv_std
91
- // are equivalent with either of reduced_shape(keepdims: false) or reduced_shape(keepdims: true)
92
- // reduced_ndim = cumo_cuda_cudnn_ReduceShape(reduced_shape, x_ndim, x_shape, axis_ndim, int_axis, 1);
93
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(reduced_ndim, gamma_ndim);
94
- // for (size_t idim = 0; idim < reduced_ndim; ++idim) {
95
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(reduced_shape[idim], gamma_shape[idim]);
96
- // }
97
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(x_ndim, gy_ndim);
98
- // for (size_t idim = 0; idim < x_ndim; ++idim) {
99
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(x_shape[idim], gy_shape[idim]);
100
- // }
101
-
102
- // TODO: Add ndim and shape (same with reduced) for mean and inv_std if given
86
+
87
+ {
88
+ cumo_narray_t *ngy, *nmean, *ninv_std;
89
+ cumo_cuda_cudnn_shape_t reduced_shape = cumo_cuda_cudnn_ReduceShape(x_ndim, x_shape, axis_ndim, int_axis, 1);
90
+ size_t reduced_total_size = cumo_cuda_cudnn_GetTotalSize(&reduced_shape);
91
+
92
+ CumoGetNArray(gy, ngy);
93
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nx->size, ngy->size);
94
+
95
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(ngamma->size, reduced_total_size);
96
+ if (mean != Qnil) {
97
+ CumoGetNArray(mean, nmean);
98
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nmean->size, reduced_total_size);
99
+ }
100
+ if (inv_std != Qnil) {
101
+ CumoGetNArray(inv_std, ninv_std);
102
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(ninv_std->size, reduced_total_size);
103
+ }
104
+ }
103
105
 
104
106
  CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(x, cT);
105
107
  CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(gamma, cT);
@@ -142,10 +144,10 @@ static VALUE
142
144
  status = cudnnBatchNormalizationBackward(
143
145
  handle,
144
146
  mode,
145
- (void*)&coef_alpha,
146
- (void*)&coef_beta,
147
- (void*)&coef_alpha,
148
- (void*)&coef_beta,
147
+ (void*)&coef_one,
148
+ (void*)&coef_zero,
149
+ (void*)&coef_one,
150
+ (void*)&coef_zero,
149
151
  x_desc,
150
152
  x_cont_ptr,
151
153
  x_desc,
@@ -13,6 +13,7 @@ static void
13
13
  CUMO_INIT_PTR_IDX(lp, 0, p1, s1, idx1);
14
14
  CUMO_INIT_PTR_BIT(lp, 1, a2, p2, s2);
15
15
  CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
16
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
16
17
  if (idx1) {
17
18
  for (; i--;) {
18
19
  CUMO_GET_DATA_INDEX(p1,idx1,dtype,x);
@@ -12,10 +12,12 @@ static void
12
12
  CUMO_INIT_PTR(lp, 1, p2, s2);
13
13
  //printf("i=%lu p1=%lx s1=%lu p2=%lx s2=%lu\n",i,(size_t)p1,s1,(size_t)p2,s2);
14
14
 
15
+ CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%><%=j%>", "<%=type_name%>");
16
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
17
+
15
18
  CUMO_GET_DATA_STRIDE(p1,s1,dtype,x);
16
19
  CUMO_SET_DATA_STRIDE(p2,s2,dtype,x);
17
20
  //printf("i=%lu x=%f\n",i,x);
18
- CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%><%=j%>", "<%=type_name%>");
19
21
  for (i--; i--;) {
20
22
  CUMO_GET_DATA_STRIDE(p1,s1,dtype,y);
21
23
  m_<%=name%><%=j%>(x,y);
@@ -32,8 +32,8 @@ static VALUE
32
32
  };
33
33
  VALUE opts[] = {Qundef, Qundef, Qundef};
34
34
 
35
- cumo_narray_t *nx; // , *ngamma, *nbeta;
36
- size_t *x_shape; // *gamma_shape, *beta_shape, reduced_shape[CUMO_NA_MAX_DIMENSION];
35
+ cumo_narray_t *nx;
36
+ size_t *x_shape;
37
37
  size_t x_ndim;
38
38
 
39
39
  VALUE x_cont, gamma_cont, beta_cont, mean_cont, var_cont;
@@ -62,24 +62,24 @@ static VALUE
62
62
  }
63
63
 
64
64
  CumoGetNArray(x, nx);
65
- // CumoGetNArray(gamma, ngamma);
66
- // CumoGetNArray(beta, nbeta);
67
65
  x_ndim = nx->ndim;
68
66
  x_shape = nx->shape;
69
- // gamma_ndim = ngamma->ndim;
70
- // gamma_shape = ngamma->shape;
71
- // beta_ndim = nbeta->ndim;
72
- // beta_shape = nbeta->shape;
73
-
74
- // TODO: Size check of gammma, beta, running_mean, running_var, mean, inv_std
75
- // are equivalent with either of reduced_shape(keepdims: false) or reduced_shape(keepdims: true)
76
- // reduced_ndim = cumo_cuda_cudnn_ReduceShape(reduced_shape, x_ndim, x_shape, axis_ndim, int_axis, 1);
77
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(reduced_ndim, gamma_ndim);
78
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(reduced_ndim, beta_ndim);
79
- // for (size_t idim = 0; idim < reduced_ndim; ++idim) {
80
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(reduced_shape[idim], gamma_shape[idim]);
81
- // CUMO_CUDA_CUDNN_CHECK_DIM_EQ(reduced_shape[idim], beta_shape[idim]);
82
- // }
67
+
68
+ {
69
+ cumo_narray_t *ngamma, *nbeta, *nmean, *nvar;
70
+ cumo_cuda_cudnn_shape_t reduced_shape = cumo_cuda_cudnn_ReduceShape(x_ndim, x_shape, axis_ndim, int_axis, 1);
71
+ size_t reduced_total_size = cumo_cuda_cudnn_GetTotalSize(&reduced_shape);
72
+
73
+ CumoGetNArray(gamma, ngamma);
74
+ CumoGetNArray(beta, nbeta);
75
+ CumoGetNArray(mean, nmean);
76
+ CumoGetNArray(var, nvar);
77
+
78
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(ngamma->size, reduced_total_size);
79
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nbeta->size, reduced_total_size);
80
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nmean->size, reduced_total_size);
81
+ CUMO_CUDA_CUDNN_CHECK_SIZE_EQ(nvar->size, reduced_total_size);
82
+ }
83
83
 
84
84
  CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(x, cT);
85
85
  CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(gamma, cT);
@@ -857,6 +857,8 @@ cumo_na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride,
857
857
  x = cumo_na_range_check(idx[i], na->shape[i], i);
858
858
  sdx = nv->stridx[i];
859
859
  if (CUMO_SDX_IS_INDEX(sdx)) {
860
+ CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("index", "cumo_na_get_result_dimension");
861
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
860
862
  pos += CUMO_SDX_GET_INDEX(sdx)[x];
861
863
  } else {
862
864
  pos += CUMO_SDX_GET_STRIDE(sdx)*x;
@@ -872,6 +874,8 @@ cumo_na_get_result_dimension(VALUE self, int argc, VALUE *argv, ssize_t stride,
872
874
  x = x / s;
873
875
  sdx = nv->stridx[i];
874
876
  if (CUMO_SDX_IS_INDEX(sdx)) {
877
+ CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("index", "cumo_na_get_result_dimension");
878
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
875
879
  pos += CUMO_SDX_GET_INDEX(sdx)[m];
876
880
  } else {
877
881
  pos += CUMO_SDX_GET_STRIDE(sdx)*m;
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cumo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-05-06 00:00:00.000000000 Z
11
+ date: 2019-06-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray