cumo 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e2ed7641113820552c74682336a8a7a92cce11e8897a4b6e0d9f9d416431242d
4
- data.tar.gz: b5e6a459c3bf096991587a40c7b1efeba3aa1b2004e37604d3adf5fc7dce68ee
3
+ metadata.gz: 8f07518545c0e4d72380382915462039151c132ee9827967e9d4c32604001f15
4
+ data.tar.gz: 94e5c261659e401d9a8016eb0a3493d9bce26abe19347602859f7a9fcc79014e
5
5
  SHA512:
6
- metadata.gz: 86cdf6a12b38ebb87c672cc0cb87ed7a51a2b37fc5fdf0b21d386ac4446afa102952da69e8799130abef8420b2bf6cd09836d6d14832b8ca2d4693f0718275e8
7
- data.tar.gz: 0f881077419ee9ca3c4b5b09fcf01e35be9884cca9d736745bdd45c5f8c3fab8b856a0c336f486ac12f26209d81f2128b8eefbcdbd3b240259ca21479dd13f41
6
+ metadata.gz: cf48d1e5452e2c871587da1f006b90f3de54c8dc911d4fa555bd54c7ea69741903ce000fd0b1fd38ba96fc46d65e79f24e3c9d858029df176cee7768e8de790b
7
+ data.tar.gz: 4c5d6bdc9ec211967458b9b4f3576a278b2d59011faf2ae5dd85979a4821496ad29db9eefd0fce9d0765f9ede38d42af2c8fc8dd250de41f9e22759ee9914a56
@@ -1,3 +1,11 @@
1
+ # 0.3.1 (2019-04-16)
2
+
3
+ Fixes:
4
+
5
+ * Fix batch\_norm\_backward
6
+ * Fix scalar.dot(scalar)
7
+ * Fix clip
8
+
1
9
  # 0.3.0 (2019-04-10)
2
10
 
3
11
  Enhancements:
@@ -10,8 +10,8 @@ extern "C" {
10
10
  #endif
11
11
  #endif
12
12
 
13
- #define CUMO_VERSION "0.3.0"
14
- #define CUMO_VERSION_CODE 30
13
+ #define CUMO_VERSION "0.3.1"
14
+ #define CUMO_VERSION_CODE 31
15
15
 
16
16
  bool cumo_compatible_mode_enabled_p();
17
17
  bool cumo_show_warning_enabled_p();
@@ -5,6 +5,7 @@
5
5
  #ifdef CUDNN_FOUND
6
6
  #include <cudnn.h>
7
7
  #endif // CUDNN_FOUND
8
+ #include "cumo/narray.h"
8
9
 
9
10
  #if defined(__cplusplus)
10
11
  extern "C" {
@@ -81,6 +82,24 @@ cumo_cuda_cudnn_get_int_ary(int* int_ary, VALUE ary, size_t ndim, int default_va
81
82
  }
82
83
  }
83
84
 
85
+ // VALUE is Ruby Array
86
+ static inline size_t
87
+ cumo_cuda_cudnn_get_int_axis(int* int_axis, VALUE axis)
88
+ {
89
+ size_t axis_ndim;
90
+ Check_Type(axis, T_ARRAY);
91
+ axis_ndim = (size_t)(RARRAY_LEN(axis));
92
+ if (CUMO_NA_MAX_DIMENSION <= axis_ndim) {
93
+ rb_raise(rb_eArgError, "Size of axis must be smaller than %d, but was %d",
94
+ (int)CUMO_NA_MAX_DIMENSION, (int)axis_ndim);
95
+ }
96
+ for (size_t idim = 0; idim < axis_ndim; ++idim) {
97
+ int_axis[idim] = NUM2INT(rb_ary_entry(axis, (long)idim));
98
+ }
99
+ // TODO: check axis is sorted
100
+ return axis_ndim;
101
+ }
102
+
84
103
  size_t
85
104
  cumo_cuda_cudnn_GetConvOutDim(
86
105
  size_t in_dim,
@@ -36,6 +36,10 @@ void <%="cumo_#{type_name}_mulsum#{nan}_reduce_kernel_launch"%>(char *p1, char *
36
36
  {
37
37
  ssize_t s1_idx = s1 / sizeof(dtype);
38
38
  ssize_t s2_idx = s2 / sizeof(dtype);
39
+ if (n == 1) { // when n == 1, s1 and s3 could be 0
40
+ s1_idx = 1;
41
+ s2_idx = 1;
42
+ }
39
43
  thrust::device_ptr<dtype> p1_begin = thrust::device_pointer_cast((dtype*)p1);
40
44
  thrust::device_ptr<dtype> p1_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
41
45
  thrust::device_ptr<dtype> p2_begin = thrust::device_pointer_cast((dtype*)p2);
@@ -88,12 +88,7 @@ static VALUE
88
88
  double_decay = NUM2DBL(decay);
89
89
  }
90
90
  if (axis != Qnil) {
91
- Check_Type(axis, T_ARRAY);
92
- axis_ndim = (size_t)(RARRAY_LEN(axis));
93
- for (size_t idim = 0; idim < axis_ndim; ++idim) {
94
- int_axis[idim] = NUM2INT(rb_ary_entry(axis, (long)idim));
95
- }
96
- // TODO: check axis is sorted
91
+ axis_ndim = cumo_cuda_cudnn_get_int_axis(int_axis, axis);
97
92
  }
98
93
 
99
94
  CumoGetNArray(x, nx);
@@ -55,7 +55,7 @@ static VALUE
55
55
  size_t axis_ndim = 1;
56
56
 
57
57
  rb_scan_args(argc, argv, "2:", &gamma, &gy, &kw_hash);
58
- rb_get_kwargs(kw_hash, kw_table, 0, 8, opts);
58
+ rb_get_kwargs(kw_hash, kw_table, 0, 7, opts);
59
59
  mean = cumo_cuda_cudnn_option_value(opts[0], Qnil);
60
60
  inv_std = cumo_cuda_cudnn_option_value(opts[1], Qnil);
61
61
  eps = cumo_cuda_cudnn_option_value(opts[2], Qnil);
@@ -74,12 +74,7 @@ static VALUE
74
74
  double_eps = NUM2DBL(eps);
75
75
  }
76
76
  if (axis != Qnil) {
77
- Check_Type(axis, T_ARRAY);
78
- axis_ndim = (size_t)(RARRAY_LEN(axis));
79
- for (size_t idim = 0; idim < axis_ndim; ++idim) {
80
- int_axis[idim] = NUM2INT(rb_ary_entry(axis, (long)idim));
81
- }
82
- // TODO: check axis is sorted
77
+ axis_ndim = cumo_cuda_cudnn_get_int_axis(int_axis, axis);
83
78
  }
84
79
 
85
80
  CumoGetNArray(x, nx);
@@ -135,11 +130,11 @@ static VALUE
135
130
  gbeta_ptr = cumo_na_get_offset_pointer_for_write(gbeta);
136
131
 
137
132
  status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
138
- if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_ERROR;
133
+ if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
139
134
 
140
135
  mode = cumo_cuda_cudnn_GetBatchNormMode(axis_ndim, int_axis);
141
136
  status = cumo_cuda_cudnn_CreateBNTensorDescriptor(&bn_desc, x_desc, mode);
142
- if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_ERROR;
137
+ if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
143
138
  // TODO: bn_desc may return another type, and may need to cast gamma, gy, mean, var
144
139
 
145
140
  handle = cumo_cuda_cudnn_handle();
@@ -164,9 +159,9 @@ static VALUE
164
159
  double_eps,
165
160
  mean_ptr,
166
161
  inv_std_ptr);
167
- if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_ERROR;
162
+ if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
168
163
 
169
- BATCH_NORM_ERROR:
164
+ BATCH_NORM_BACKWARD_ERROR:
170
165
  if (x_desc) cudnnDestroyTensorDescriptor(x_desc);
171
166
  if (bn_desc) cudnnDestroyTensorDescriptor(bn_desc);
172
167
  cumo_cuda_cudnn_check_status(status);
@@ -11,6 +11,7 @@ static void
11
11
  CUMO_INIT_PTR(lp, 2, p3, s3);
12
12
  CUMO_INIT_PTR(lp, 3, p4, s4);
13
13
  CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
14
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
14
15
  for (; i--;) {
15
16
  CUMO_GET_DATA_STRIDE(p1,s1,dtype,x);
16
17
  CUMO_GET_DATA_STRIDE(p2,s2,dtype,min);
@@ -34,6 +35,7 @@ static void
34
35
  CUMO_INIT_PTR(lp, 1, p2, s2);
35
36
  CUMO_INIT_PTR(lp, 2, p3, s3);
36
37
  CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>_min", "<%=type_name%>");
38
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
37
39
  for (; i--;) {
38
40
  CUMO_GET_DATA_STRIDE(p1,s1,dtype,x);
39
41
  CUMO_GET_DATA_STRIDE(p2,s2,dtype,min);
@@ -54,6 +56,7 @@ static void
54
56
  CUMO_INIT_PTR(lp, 1, p2, s2);
55
57
  CUMO_INIT_PTR(lp, 2, p3, s3);
56
58
  CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>_max", "<%=type_name%>");
59
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
57
60
  for (; i--;) {
58
61
  CUMO_GET_DATA_STRIDE(p1,s1,dtype,x);
59
62
  CUMO_GET_DATA_STRIDE(p2,s2,dtype,max);
@@ -134,13 +134,13 @@ static VALUE
134
134
  y_ptr = cumo_na_get_offset_pointer_for_write(y);
135
135
 
136
136
  status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
137
- if (status != CUDNN_STATUS_SUCCESS) goto CONV_ERROR;
137
+ if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
138
138
  status = cumo_cuda_cudnn_CreateTensorDescriptor(&y_desc, y, cudnn_dtype);
139
- if (status != CUDNN_STATUS_SUCCESS) goto CONV_ERROR;
139
+ if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
140
140
  status = cumo_cuda_cudnn_CreateFilterDescriptor(&w_desc, w_cont, cudnn_dtype);
141
- if (status != CUDNN_STATUS_SUCCESS) goto CONV_ERROR;
141
+ if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
142
142
  status = cumo_cuda_cudnn_CreateConvolutionDescriptor(&conv_desc, ndim, int_stride, int_pad, cudnn_dtype);
143
- if (status != CUDNN_STATUS_SUCCESS) goto CONV_ERROR;
143
+ if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
144
144
 
145
145
  handle = cumo_cuda_cudnn_handle();
146
146
 
@@ -160,7 +160,7 @@ static VALUE
160
160
  int_pad,
161
161
  ndim,
162
162
  cudnn_dtype);
163
- if (status != CUDNN_STATUS_SUCCESS) goto CONV_ERROR;
163
+ if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
164
164
  algo = perf_result.algo;
165
165
  workspace_size = perf_result.memory;
166
166
 
@@ -179,7 +179,7 @@ static VALUE
179
179
  (void*)&beta,
180
180
  y_desc,
181
181
  (void*)y_ptr);
182
- if (status != CUDNN_STATUS_SUCCESS) goto CONV_ERROR;
182
+ if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
183
183
 
184
184
  if (b != Qnil) {
185
185
  size_t new_shape[CUMO_NA_MAX_DIMENSION];
@@ -208,7 +208,7 @@ static VALUE
208
208
  // restore b.shape
209
209
  nb_cont->ndim = b_ndim;
210
210
  nb_cont->shape = b_shape;
211
- if (status != CUDNN_STATUS_SUCCESS) goto CONV_ERROR;
211
+ if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
212
212
 
213
213
  status = cudnnAddTensor(
214
214
  handle,
@@ -218,10 +218,10 @@ static VALUE
218
218
  (void*)&alpha,
219
219
  y_desc,
220
220
  (void*)y_ptr);
221
- if (status != CUDNN_STATUS_SUCCESS) goto CONV_ERROR;
221
+ if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
222
222
  }
223
223
 
224
- CONV_ERROR:
224
+ CONV_TRANSPOSE_ERROR:
225
225
  if (x_desc) cudnnDestroyTensorDescriptor(x_desc);
226
226
  if (y_desc) cudnnDestroyTensorDescriptor(y_desc);
227
227
  if (b_desc) cudnnDestroyTensorDescriptor(b_desc);
@@ -94,11 +94,11 @@ static VALUE
94
94
  gx_ptr = cumo_na_get_offset_pointer_for_write(gx);
95
95
 
96
96
  status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
97
- if (status != CUDNN_STATUS_SUCCESS) goto POOLING_ERROR;
97
+ if (status != CUDNN_STATUS_SUCCESS) goto POOLING_BACKAWARD_ERROR;
98
98
  status = cumo_cuda_cudnn_CreateTensorDescriptor(&y_desc, y, cudnn_dtype);
99
- if (status != CUDNN_STATUS_SUCCESS) goto POOLING_ERROR;
99
+ if (status != CUDNN_STATUS_SUCCESS) goto POOLING_BACKAWARD_ERROR;
100
100
  status = cumo_cuda_cudnn_CreatePoolingDescriptor(&pool_desc, int_mode, ndim, int_kernel_size, int_stride, int_pad);
101
- if (status != CUDNN_STATUS_SUCCESS) goto POOLING_ERROR;
101
+ if (status != CUDNN_STATUS_SUCCESS) goto POOLING_BACKAWARD_ERROR;
102
102
 
103
103
  handle = cumo_cuda_cudnn_handle();
104
104
  status = cudnnPoolingBackward(
@@ -114,9 +114,9 @@ static VALUE
114
114
  (void*)&beta,
115
115
  x_desc,
116
116
  (void*)gx_ptr);
117
- if (status != CUDNN_STATUS_SUCCESS) goto POOLING_ERROR;
117
+ if (status != CUDNN_STATUS_SUCCESS) goto POOLING_BACKAWARD_ERROR;
118
118
 
119
- POOLING_ERROR:
119
+ POOLING_BACKAWARD_ERROR:
120
120
  if (x_desc) cudnnDestroyTensorDescriptor(x_desc);
121
121
  if (y_desc) cudnnDestroyTensorDescriptor(y_desc);
122
122
  if (pool_desc) cudnnDestroyPoolingDescriptor(pool_desc);
@@ -98,11 +98,11 @@ static VALUE
98
98
  y_ptr = cumo_na_get_offset_pointer_for_write(y);
99
99
 
100
100
  status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
101
- if (status != CUDNN_STATUS_SUCCESS) goto POOLING_ERROR;
101
+ if (status != CUDNN_STATUS_SUCCESS) goto POLLING_FORWARD_ERROR;
102
102
  status = cumo_cuda_cudnn_CreateTensorDescriptor(&y_desc, y, cudnn_dtype);
103
- if (status != CUDNN_STATUS_SUCCESS) goto POOLING_ERROR;
103
+ if (status != CUDNN_STATUS_SUCCESS) goto POLLING_FORWARD_ERROR;
104
104
  status = cumo_cuda_cudnn_CreatePoolingDescriptor(&pool_desc, int_mode, ndim, int_kernel_size, int_stride, int_pad);
105
- if (status != CUDNN_STATUS_SUCCESS) goto POOLING_ERROR;
105
+ if (status != CUDNN_STATUS_SUCCESS) goto POLLING_FORWARD_ERROR;
106
106
 
107
107
  handle = cumo_cuda_cudnn_handle();
108
108
  status = cudnnPoolingForward(
@@ -114,9 +114,9 @@ static VALUE
114
114
  (void*)&beta,
115
115
  y_desc,
116
116
  (void*)y_ptr);
117
- if (status != CUDNN_STATUS_SUCCESS) goto POOLING_ERROR;
117
+ if (status != CUDNN_STATUS_SUCCESS) goto POLLING_FORWARD_ERROR;
118
118
 
119
- POOLING_ERROR:
119
+ POLLING_FORWARD_ERROR:
120
120
  if (x_desc) cudnnDestroyTensorDescriptor(x_desc);
121
121
  if (y_desc) cudnnDestroyTensorDescriptor(y_desc);
122
122
  if (pool_desc) cudnnDestroyPoolingDescriptor(pool_desc);
@@ -1133,6 +1133,9 @@ module Cumo
1133
1133
 
1134
1134
  def dot(b)
1135
1135
  t = self.class::UPCAST[b.class]
1136
+ if self.ndim == 0 and b.ndim == 0
1137
+ return self * b
1138
+ end
1136
1139
  if [SFloat, DFloat, SComplex, DComplex].include?(t)
1137
1140
  b = self.class.asarray(b)
1138
1141
  case self.ndim
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cumo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-04-10 00:00:00.000000000 Z
11
+ date: 2019-04-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray