cumo 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e2ed7641113820552c74682336a8a7a92cce11e8897a4b6e0d9f9d416431242d
4
- data.tar.gz: b5e6a459c3bf096991587a40c7b1efeba3aa1b2004e37604d3adf5fc7dce68ee
3
+ metadata.gz: 8f07518545c0e4d72380382915462039151c132ee9827967e9d4c32604001f15
4
+ data.tar.gz: 94e5c261659e401d9a8016eb0a3493d9bce26abe19347602859f7a9fcc79014e
5
5
  SHA512:
6
- metadata.gz: 86cdf6a12b38ebb87c672cc0cb87ed7a51a2b37fc5fdf0b21d386ac4446afa102952da69e8799130abef8420b2bf6cd09836d6d14832b8ca2d4693f0718275e8
7
- data.tar.gz: 0f881077419ee9ca3c4b5b09fcf01e35be9884cca9d736745bdd45c5f8c3fab8b856a0c336f486ac12f26209d81f2128b8eefbcdbd3b240259ca21479dd13f41
6
+ metadata.gz: cf48d1e5452e2c871587da1f006b90f3de54c8dc911d4fa555bd54c7ea69741903ce000fd0b1fd38ba96fc46d65e79f24e3c9d858029df176cee7768e8de790b
7
+ data.tar.gz: 4c5d6bdc9ec211967458b9b4f3576a278b2d59011faf2ae5dd85979a4821496ad29db9eefd0fce9d0765f9ede38d42af2c8fc8dd250de41f9e22759ee9914a56
@@ -1,3 +1,11 @@
1
+ # 0.3.1 (2019-04-16)
2
+
3
+ Fixes:
4
+
5
+ * Fix batch\_norm\_backward
6
+ * Fix scalar.dot(scalar)
7
+ * Fix clip
8
+
1
9
  # 0.3.0 (2019-04-10)
2
10
 
3
11
  Enhancements:
@@ -10,8 +10,8 @@ extern "C" {
10
10
  #endif
11
11
  #endif
12
12
 
13
- #define CUMO_VERSION "0.3.0"
14
- #define CUMO_VERSION_CODE 30
13
+ #define CUMO_VERSION "0.3.1"
14
+ #define CUMO_VERSION_CODE 31
15
15
 
16
16
  bool cumo_compatible_mode_enabled_p();
17
17
  bool cumo_show_warning_enabled_p();
@@ -5,6 +5,7 @@
5
5
  #ifdef CUDNN_FOUND
6
6
  #include <cudnn.h>
7
7
  #endif // CUDNN_FOUND
8
+ #include "cumo/narray.h"
8
9
 
9
10
  #if defined(__cplusplus)
10
11
  extern "C" {
@@ -81,6 +82,24 @@ cumo_cuda_cudnn_get_int_ary(int* int_ary, VALUE ary, size_t ndim, int default_va
81
82
  }
82
83
  }
83
84
 
85
+ // VALUE is Ruby Array
86
+ static inline size_t
87
+ cumo_cuda_cudnn_get_int_axis(int* int_axis, VALUE axis)
88
+ {
89
+ size_t axis_ndim;
90
+ Check_Type(axis, T_ARRAY);
91
+ axis_ndim = (size_t)(RARRAY_LEN(axis));
92
+ if (CUMO_NA_MAX_DIMENSION <= axis_ndim) {
93
+ rb_raise(rb_eArgError, "Size of axis must be smaller than %d, but was %d",
94
+ (int)CUMO_NA_MAX_DIMENSION, (int)axis_ndim);
95
+ }
96
+ for (size_t idim = 0; idim < axis_ndim; ++idim) {
97
+ int_axis[idim] = NUM2INT(rb_ary_entry(axis, (long)idim));
98
+ }
99
+ // TODO: check axis is sorted
100
+ return axis_ndim;
101
+ }
102
+
84
103
  size_t
85
104
  cumo_cuda_cudnn_GetConvOutDim(
86
105
  size_t in_dim,
@@ -36,6 +36,10 @@ void <%="cumo_#{type_name}_mulsum#{nan}_reduce_kernel_launch"%>(char *p1, char *
36
36
  {
37
37
  ssize_t s1_idx = s1 / sizeof(dtype);
38
38
  ssize_t s2_idx = s2 / sizeof(dtype);
39
+ if (n == 1) { // when n == 1, s1 and s3 could be 0
40
+ s1_idx = 1;
41
+ s2_idx = 1;
42
+ }
39
43
  thrust::device_ptr<dtype> p1_begin = thrust::device_pointer_cast((dtype*)p1);
40
44
  thrust::device_ptr<dtype> p1_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
41
45
  thrust::device_ptr<dtype> p2_begin = thrust::device_pointer_cast((dtype*)p2);
@@ -88,12 +88,7 @@ static VALUE
88
88
  double_decay = NUM2DBL(decay);
89
89
  }
90
90
  if (axis != Qnil) {
91
- Check_Type(axis, T_ARRAY);
92
- axis_ndim = (size_t)(RARRAY_LEN(axis));
93
- for (size_t idim = 0; idim < axis_ndim; ++idim) {
94
- int_axis[idim] = NUM2INT(rb_ary_entry(axis, (long)idim));
95
- }
96
- // TODO: check axis is sorted
91
+ axis_ndim = cumo_cuda_cudnn_get_int_axis(int_axis, axis);
97
92
  }
98
93
 
99
94
  CumoGetNArray(x, nx);
@@ -55,7 +55,7 @@ static VALUE
55
55
  size_t axis_ndim = 1;
56
56
 
57
57
  rb_scan_args(argc, argv, "2:", &gamma, &gy, &kw_hash);
58
- rb_get_kwargs(kw_hash, kw_table, 0, 8, opts);
58
+ rb_get_kwargs(kw_hash, kw_table, 0, 7, opts);
59
59
  mean = cumo_cuda_cudnn_option_value(opts[0], Qnil);
60
60
  inv_std = cumo_cuda_cudnn_option_value(opts[1], Qnil);
61
61
  eps = cumo_cuda_cudnn_option_value(opts[2], Qnil);
@@ -74,12 +74,7 @@ static VALUE
74
74
  double_eps = NUM2DBL(eps);
75
75
  }
76
76
  if (axis != Qnil) {
77
- Check_Type(axis, T_ARRAY);
78
- axis_ndim = (size_t)(RARRAY_LEN(axis));
79
- for (size_t idim = 0; idim < axis_ndim; ++idim) {
80
- int_axis[idim] = NUM2INT(rb_ary_entry(axis, (long)idim));
81
- }
82
- // TODO: check axis is sorted
77
+ axis_ndim = cumo_cuda_cudnn_get_int_axis(int_axis, axis);
83
78
  }
84
79
 
85
80
  CumoGetNArray(x, nx);
@@ -135,11 +130,11 @@ static VALUE
135
130
  gbeta_ptr = cumo_na_get_offset_pointer_for_write(gbeta);
136
131
 
137
132
  status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
138
- if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_ERROR;
133
+ if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
139
134
 
140
135
  mode = cumo_cuda_cudnn_GetBatchNormMode(axis_ndim, int_axis);
141
136
  status = cumo_cuda_cudnn_CreateBNTensorDescriptor(&bn_desc, x_desc, mode);
142
- if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_ERROR;
137
+ if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
143
138
  // TODO: bn_desc may return another type, and may need to cast gamma, gy, mean, var
144
139
 
145
140
  handle = cumo_cuda_cudnn_handle();
@@ -164,9 +159,9 @@ static VALUE
164
159
  double_eps,
165
160
  mean_ptr,
166
161
  inv_std_ptr);
167
- if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_ERROR;
162
+ if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
168
163
 
169
- BATCH_NORM_ERROR:
164
+ BATCH_NORM_BACKWARD_ERROR:
170
165
  if (x_desc) cudnnDestroyTensorDescriptor(x_desc);
171
166
  if (bn_desc) cudnnDestroyTensorDescriptor(bn_desc);
172
167
  cumo_cuda_cudnn_check_status(status);
@@ -11,6 +11,7 @@ static void
11
11
  CUMO_INIT_PTR(lp, 2, p3, s3);
12
12
  CUMO_INIT_PTR(lp, 3, p4, s4);
13
13
  CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
14
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
14
15
  for (; i--;) {
15
16
  CUMO_GET_DATA_STRIDE(p1,s1,dtype,x);
16
17
  CUMO_GET_DATA_STRIDE(p2,s2,dtype,min);
@@ -34,6 +35,7 @@ static void
34
35
  CUMO_INIT_PTR(lp, 1, p2, s2);
35
36
  CUMO_INIT_PTR(lp, 2, p3, s3);
36
37
  CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>_min", "<%=type_name%>");
38
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
37
39
  for (; i--;) {
38
40
  CUMO_GET_DATA_STRIDE(p1,s1,dtype,x);
39
41
  CUMO_GET_DATA_STRIDE(p2,s2,dtype,min);
@@ -54,6 +56,7 @@ static void
54
56
  CUMO_INIT_PTR(lp, 1, p2, s2);
55
57
  CUMO_INIT_PTR(lp, 2, p3, s3);
56
58
  CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>_max", "<%=type_name%>");
59
+ cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
57
60
  for (; i--;) {
58
61
  CUMO_GET_DATA_STRIDE(p1,s1,dtype,x);
59
62
  CUMO_GET_DATA_STRIDE(p2,s2,dtype,max);
@@ -134,13 +134,13 @@ static VALUE
134
134
  y_ptr = cumo_na_get_offset_pointer_for_write(y);
135
135
 
136
136
  status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
137
- if (status != CUDNN_STATUS_SUCCESS) goto CONV_ERROR;
137
+ if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
138
138
  status = cumo_cuda_cudnn_CreateTensorDescriptor(&y_desc, y, cudnn_dtype);
139
- if (status != CUDNN_STATUS_SUCCESS) goto CONV_ERROR;
139
+ if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
140
140
  status = cumo_cuda_cudnn_CreateFilterDescriptor(&w_desc, w_cont, cudnn_dtype);
141
- if (status != CUDNN_STATUS_SUCCESS) goto CONV_ERROR;
141
+ if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
142
142
  status = cumo_cuda_cudnn_CreateConvolutionDescriptor(&conv_desc, ndim, int_stride, int_pad, cudnn_dtype);
143
- if (status != CUDNN_STATUS_SUCCESS) goto CONV_ERROR;
143
+ if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
144
144
 
145
145
  handle = cumo_cuda_cudnn_handle();
146
146
 
@@ -160,7 +160,7 @@ static VALUE
160
160
  int_pad,
161
161
  ndim,
162
162
  cudnn_dtype);
163
- if (status != CUDNN_STATUS_SUCCESS) goto CONV_ERROR;
163
+ if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
164
164
  algo = perf_result.algo;
165
165
  workspace_size = perf_result.memory;
166
166
 
@@ -179,7 +179,7 @@ static VALUE
179
179
  (void*)&beta,
180
180
  y_desc,
181
181
  (void*)y_ptr);
182
- if (status != CUDNN_STATUS_SUCCESS) goto CONV_ERROR;
182
+ if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
183
183
 
184
184
  if (b != Qnil) {
185
185
  size_t new_shape[CUMO_NA_MAX_DIMENSION];
@@ -208,7 +208,7 @@ static VALUE
208
208
  // restore b.shape
209
209
  nb_cont->ndim = b_ndim;
210
210
  nb_cont->shape = b_shape;
211
- if (status != CUDNN_STATUS_SUCCESS) goto CONV_ERROR;
211
+ if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
212
212
 
213
213
  status = cudnnAddTensor(
214
214
  handle,
@@ -218,10 +218,10 @@ static VALUE
218
218
  (void*)&alpha,
219
219
  y_desc,
220
220
  (void*)y_ptr);
221
- if (status != CUDNN_STATUS_SUCCESS) goto CONV_ERROR;
221
+ if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
222
222
  }
223
223
 
224
- CONV_ERROR:
224
+ CONV_TRANSPOSE_ERROR:
225
225
  if (x_desc) cudnnDestroyTensorDescriptor(x_desc);
226
226
  if (y_desc) cudnnDestroyTensorDescriptor(y_desc);
227
227
  if (b_desc) cudnnDestroyTensorDescriptor(b_desc);
@@ -94,11 +94,11 @@ static VALUE
94
94
  gx_ptr = cumo_na_get_offset_pointer_for_write(gx);
95
95
 
96
96
  status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
97
- if (status != CUDNN_STATUS_SUCCESS) goto POOLING_ERROR;
97
+ if (status != CUDNN_STATUS_SUCCESS) goto POOLING_BACKAWARD_ERROR;
98
98
  status = cumo_cuda_cudnn_CreateTensorDescriptor(&y_desc, y, cudnn_dtype);
99
- if (status != CUDNN_STATUS_SUCCESS) goto POOLING_ERROR;
99
+ if (status != CUDNN_STATUS_SUCCESS) goto POOLING_BACKAWARD_ERROR;
100
100
  status = cumo_cuda_cudnn_CreatePoolingDescriptor(&pool_desc, int_mode, ndim, int_kernel_size, int_stride, int_pad);
101
- if (status != CUDNN_STATUS_SUCCESS) goto POOLING_ERROR;
101
+ if (status != CUDNN_STATUS_SUCCESS) goto POOLING_BACKAWARD_ERROR;
102
102
 
103
103
  handle = cumo_cuda_cudnn_handle();
104
104
  status = cudnnPoolingBackward(
@@ -114,9 +114,9 @@ static VALUE
114
114
  (void*)&beta,
115
115
  x_desc,
116
116
  (void*)gx_ptr);
117
- if (status != CUDNN_STATUS_SUCCESS) goto POOLING_ERROR;
117
+ if (status != CUDNN_STATUS_SUCCESS) goto POOLING_BACKAWARD_ERROR;
118
118
 
119
- POOLING_ERROR:
119
+ POOLING_BACKAWARD_ERROR:
120
120
  if (x_desc) cudnnDestroyTensorDescriptor(x_desc);
121
121
  if (y_desc) cudnnDestroyTensorDescriptor(y_desc);
122
122
  if (pool_desc) cudnnDestroyPoolingDescriptor(pool_desc);
@@ -98,11 +98,11 @@ static VALUE
98
98
  y_ptr = cumo_na_get_offset_pointer_for_write(y);
99
99
 
100
100
  status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
101
- if (status != CUDNN_STATUS_SUCCESS) goto POOLING_ERROR;
101
+ if (status != CUDNN_STATUS_SUCCESS) goto POLLING_FORWARD_ERROR;
102
102
  status = cumo_cuda_cudnn_CreateTensorDescriptor(&y_desc, y, cudnn_dtype);
103
- if (status != CUDNN_STATUS_SUCCESS) goto POOLING_ERROR;
103
+ if (status != CUDNN_STATUS_SUCCESS) goto POLLING_FORWARD_ERROR;
104
104
  status = cumo_cuda_cudnn_CreatePoolingDescriptor(&pool_desc, int_mode, ndim, int_kernel_size, int_stride, int_pad);
105
- if (status != CUDNN_STATUS_SUCCESS) goto POOLING_ERROR;
105
+ if (status != CUDNN_STATUS_SUCCESS) goto POLLING_FORWARD_ERROR;
106
106
 
107
107
  handle = cumo_cuda_cudnn_handle();
108
108
  status = cudnnPoolingForward(
@@ -114,9 +114,9 @@ static VALUE
114
114
  (void*)&beta,
115
115
  y_desc,
116
116
  (void*)y_ptr);
117
- if (status != CUDNN_STATUS_SUCCESS) goto POOLING_ERROR;
117
+ if (status != CUDNN_STATUS_SUCCESS) goto POLLING_FORWARD_ERROR;
118
118
 
119
- POOLING_ERROR:
119
+ POLLING_FORWARD_ERROR:
120
120
  if (x_desc) cudnnDestroyTensorDescriptor(x_desc);
121
121
  if (y_desc) cudnnDestroyTensorDescriptor(y_desc);
122
122
  if (pool_desc) cudnnDestroyPoolingDescriptor(pool_desc);
@@ -1133,6 +1133,9 @@ module Cumo
1133
1133
 
1134
1134
  def dot(b)
1135
1135
  t = self.class::UPCAST[b.class]
1136
+ if self.ndim == 0 and b.ndim == 0
1137
+ return self * b
1138
+ end
1136
1139
  if [SFloat, DFloat, SComplex, DComplex].include?(t)
1137
1140
  b = self.class.asarray(b)
1138
1141
  case self.ndim
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cumo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-04-10 00:00:00.000000000 Z
11
+ date: 2019-04-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray