cumo 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/ext/cumo/include/cumo.h +2 -2
- data/ext/cumo/include/cumo/cuda/cudnn.h +19 -0
- data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +4 -0
- data/ext/cumo/narray/gen/tmpl/batch_norm.c +1 -6
- data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +6 -11
- data/ext/cumo/narray/gen/tmpl/clip.c +3 -0
- data/ext/cumo/narray/gen/tmpl/conv_transpose.c +9 -9
- data/ext/cumo/narray/gen/tmpl/pooling_backward.c +5 -5
- data/ext/cumo/narray/gen/tmpl/pooling_forward.c +5 -5
- data/lib/cumo/narray/extra.rb +3 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8f07518545c0e4d72380382915462039151c132ee9827967e9d4c32604001f15
|
4
|
+
data.tar.gz: 94e5c261659e401d9a8016eb0a3493d9bce26abe19347602859f7a9fcc79014e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cf48d1e5452e2c871587da1f006b90f3de54c8dc911d4fa555bd54c7ea69741903ce000fd0b1fd38ba96fc46d65e79f24e3c9d858029df176cee7768e8de790b
|
7
|
+
data.tar.gz: 4c5d6bdc9ec211967458b9b4f3576a278b2d59011faf2ae5dd85979a4821496ad29db9eefd0fce9d0765f9ede38d42af2c8fc8dd250de41f9e22759ee9914a56
|
data/CHANGELOG.md
CHANGED
data/ext/cumo/include/cumo.h
CHANGED
@@ -5,6 +5,7 @@
|
|
5
5
|
#ifdef CUDNN_FOUND
|
6
6
|
#include <cudnn.h>
|
7
7
|
#endif // CUDNN_FOUND
|
8
|
+
#include "cumo/narray.h"
|
8
9
|
|
9
10
|
#if defined(__cplusplus)
|
10
11
|
extern "C" {
|
@@ -81,6 +82,24 @@ cumo_cuda_cudnn_get_int_ary(int* int_ary, VALUE ary, size_t ndim, int default_va
|
|
81
82
|
}
|
82
83
|
}
|
83
84
|
|
85
|
+
// VALUE is Ruby Array
|
86
|
+
static inline size_t
|
87
|
+
cumo_cuda_cudnn_get_int_axis(int* int_axis, VALUE axis)
|
88
|
+
{
|
89
|
+
size_t axis_ndim;
|
90
|
+
Check_Type(axis, T_ARRAY);
|
91
|
+
axis_ndim = (size_t)(RARRAY_LEN(axis));
|
92
|
+
if (CUMO_NA_MAX_DIMENSION <= axis_ndim) {
|
93
|
+
rb_raise(rb_eArgError, "Size of axis must be smaller than %d, but was %d",
|
94
|
+
(int)CUMO_NA_MAX_DIMENSION, (int)axis_ndim);
|
95
|
+
}
|
96
|
+
for (size_t idim = 0; idim < axis_ndim; ++idim) {
|
97
|
+
int_axis[idim] = NUM2INT(rb_ary_entry(axis, (long)idim));
|
98
|
+
}
|
99
|
+
// TODO: check axis is sorted
|
100
|
+
return axis_ndim;
|
101
|
+
}
|
102
|
+
|
84
103
|
size_t
|
85
104
|
cumo_cuda_cudnn_GetConvOutDim(
|
86
105
|
size_t in_dim,
|
@@ -36,6 +36,10 @@ void <%="cumo_#{type_name}_mulsum#{nan}_reduce_kernel_launch"%>(char *p1, char *
|
|
36
36
|
{
|
37
37
|
ssize_t s1_idx = s1 / sizeof(dtype);
|
38
38
|
ssize_t s2_idx = s2 / sizeof(dtype);
|
39
|
+
if (n == 1) { // when n == 1, s1 and s3 could be 0
|
40
|
+
s1_idx = 1;
|
41
|
+
s2_idx = 1;
|
42
|
+
}
|
39
43
|
thrust::device_ptr<dtype> p1_begin = thrust::device_pointer_cast((dtype*)p1);
|
40
44
|
thrust::device_ptr<dtype> p1_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
|
41
45
|
thrust::device_ptr<dtype> p2_begin = thrust::device_pointer_cast((dtype*)p2);
|
@@ -88,12 +88,7 @@ static VALUE
|
|
88
88
|
double_decay = NUM2DBL(decay);
|
89
89
|
}
|
90
90
|
if (axis != Qnil) {
|
91
|
-
|
92
|
-
axis_ndim = (size_t)(RARRAY_LEN(axis));
|
93
|
-
for (size_t idim = 0; idim < axis_ndim; ++idim) {
|
94
|
-
int_axis[idim] = NUM2INT(rb_ary_entry(axis, (long)idim));
|
95
|
-
}
|
96
|
-
// TODO: check axis is sorted
|
91
|
+
axis_ndim = cumo_cuda_cudnn_get_int_axis(int_axis, axis);
|
97
92
|
}
|
98
93
|
|
99
94
|
CumoGetNArray(x, nx);
|
@@ -55,7 +55,7 @@ static VALUE
|
|
55
55
|
size_t axis_ndim = 1;
|
56
56
|
|
57
57
|
rb_scan_args(argc, argv, "2:", &gamma, &gy, &kw_hash);
|
58
|
-
rb_get_kwargs(kw_hash, kw_table, 0,
|
58
|
+
rb_get_kwargs(kw_hash, kw_table, 0, 7, opts);
|
59
59
|
mean = cumo_cuda_cudnn_option_value(opts[0], Qnil);
|
60
60
|
inv_std = cumo_cuda_cudnn_option_value(opts[1], Qnil);
|
61
61
|
eps = cumo_cuda_cudnn_option_value(opts[2], Qnil);
|
@@ -74,12 +74,7 @@ static VALUE
|
|
74
74
|
double_eps = NUM2DBL(eps);
|
75
75
|
}
|
76
76
|
if (axis != Qnil) {
|
77
|
-
|
78
|
-
axis_ndim = (size_t)(RARRAY_LEN(axis));
|
79
|
-
for (size_t idim = 0; idim < axis_ndim; ++idim) {
|
80
|
-
int_axis[idim] = NUM2INT(rb_ary_entry(axis, (long)idim));
|
81
|
-
}
|
82
|
-
// TODO: check axis is sorted
|
77
|
+
axis_ndim = cumo_cuda_cudnn_get_int_axis(int_axis, axis);
|
83
78
|
}
|
84
79
|
|
85
80
|
CumoGetNArray(x, nx);
|
@@ -135,11 +130,11 @@ static VALUE
|
|
135
130
|
gbeta_ptr = cumo_na_get_offset_pointer_for_write(gbeta);
|
136
131
|
|
137
132
|
status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
|
138
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
133
|
+
if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
|
139
134
|
|
140
135
|
mode = cumo_cuda_cudnn_GetBatchNormMode(axis_ndim, int_axis);
|
141
136
|
status = cumo_cuda_cudnn_CreateBNTensorDescriptor(&bn_desc, x_desc, mode);
|
142
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
137
|
+
if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
|
143
138
|
// TODO: bn_desc may return another type, and may need to cast gamma, gy, mean, var
|
144
139
|
|
145
140
|
handle = cumo_cuda_cudnn_handle();
|
@@ -164,9 +159,9 @@ static VALUE
|
|
164
159
|
double_eps,
|
165
160
|
mean_ptr,
|
166
161
|
inv_std_ptr);
|
167
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
162
|
+
if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
|
168
163
|
|
169
|
-
|
164
|
+
BATCH_NORM_BACKWARD_ERROR:
|
170
165
|
if (x_desc) cudnnDestroyTensorDescriptor(x_desc);
|
171
166
|
if (bn_desc) cudnnDestroyTensorDescriptor(bn_desc);
|
172
167
|
cumo_cuda_cudnn_check_status(status);
|
@@ -11,6 +11,7 @@ static void
|
|
11
11
|
CUMO_INIT_PTR(lp, 2, p3, s3);
|
12
12
|
CUMO_INIT_PTR(lp, 3, p4, s4);
|
13
13
|
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
|
14
|
+
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
14
15
|
for (; i--;) {
|
15
16
|
CUMO_GET_DATA_STRIDE(p1,s1,dtype,x);
|
16
17
|
CUMO_GET_DATA_STRIDE(p2,s2,dtype,min);
|
@@ -34,6 +35,7 @@ static void
|
|
34
35
|
CUMO_INIT_PTR(lp, 1, p2, s2);
|
35
36
|
CUMO_INIT_PTR(lp, 2, p3, s3);
|
36
37
|
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>_min", "<%=type_name%>");
|
38
|
+
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
37
39
|
for (; i--;) {
|
38
40
|
CUMO_GET_DATA_STRIDE(p1,s1,dtype,x);
|
39
41
|
CUMO_GET_DATA_STRIDE(p2,s2,dtype,min);
|
@@ -54,6 +56,7 @@ static void
|
|
54
56
|
CUMO_INIT_PTR(lp, 1, p2, s2);
|
55
57
|
CUMO_INIT_PTR(lp, 2, p3, s3);
|
56
58
|
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>_max", "<%=type_name%>");
|
59
|
+
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
57
60
|
for (; i--;) {
|
58
61
|
CUMO_GET_DATA_STRIDE(p1,s1,dtype,x);
|
59
62
|
CUMO_GET_DATA_STRIDE(p2,s2,dtype,max);
|
@@ -134,13 +134,13 @@ static VALUE
|
|
134
134
|
y_ptr = cumo_na_get_offset_pointer_for_write(y);
|
135
135
|
|
136
136
|
status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
|
137
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
137
|
+
if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
|
138
138
|
status = cumo_cuda_cudnn_CreateTensorDescriptor(&y_desc, y, cudnn_dtype);
|
139
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
139
|
+
if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
|
140
140
|
status = cumo_cuda_cudnn_CreateFilterDescriptor(&w_desc, w_cont, cudnn_dtype);
|
141
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
141
|
+
if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
|
142
142
|
status = cumo_cuda_cudnn_CreateConvolutionDescriptor(&conv_desc, ndim, int_stride, int_pad, cudnn_dtype);
|
143
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
143
|
+
if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
|
144
144
|
|
145
145
|
handle = cumo_cuda_cudnn_handle();
|
146
146
|
|
@@ -160,7 +160,7 @@ static VALUE
|
|
160
160
|
int_pad,
|
161
161
|
ndim,
|
162
162
|
cudnn_dtype);
|
163
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
163
|
+
if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
|
164
164
|
algo = perf_result.algo;
|
165
165
|
workspace_size = perf_result.memory;
|
166
166
|
|
@@ -179,7 +179,7 @@ static VALUE
|
|
179
179
|
(void*)&beta,
|
180
180
|
y_desc,
|
181
181
|
(void*)y_ptr);
|
182
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
182
|
+
if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
|
183
183
|
|
184
184
|
if (b != Qnil) {
|
185
185
|
size_t new_shape[CUMO_NA_MAX_DIMENSION];
|
@@ -208,7 +208,7 @@ static VALUE
|
|
208
208
|
// restore b.shape
|
209
209
|
nb_cont->ndim = b_ndim;
|
210
210
|
nb_cont->shape = b_shape;
|
211
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
211
|
+
if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
|
212
212
|
|
213
213
|
status = cudnnAddTensor(
|
214
214
|
handle,
|
@@ -218,10 +218,10 @@ static VALUE
|
|
218
218
|
(void*)&alpha,
|
219
219
|
y_desc,
|
220
220
|
(void*)y_ptr);
|
221
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
221
|
+
if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
|
222
222
|
}
|
223
223
|
|
224
|
-
|
224
|
+
CONV_TRANSPOSE_ERROR:
|
225
225
|
if (x_desc) cudnnDestroyTensorDescriptor(x_desc);
|
226
226
|
if (y_desc) cudnnDestroyTensorDescriptor(y_desc);
|
227
227
|
if (b_desc) cudnnDestroyTensorDescriptor(b_desc);
|
@@ -94,11 +94,11 @@ static VALUE
|
|
94
94
|
gx_ptr = cumo_na_get_offset_pointer_for_write(gx);
|
95
95
|
|
96
96
|
status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
|
97
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
97
|
+
if (status != CUDNN_STATUS_SUCCESS) goto POOLING_BACKAWARD_ERROR;
|
98
98
|
status = cumo_cuda_cudnn_CreateTensorDescriptor(&y_desc, y, cudnn_dtype);
|
99
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
99
|
+
if (status != CUDNN_STATUS_SUCCESS) goto POOLING_BACKAWARD_ERROR;
|
100
100
|
status = cumo_cuda_cudnn_CreatePoolingDescriptor(&pool_desc, int_mode, ndim, int_kernel_size, int_stride, int_pad);
|
101
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
101
|
+
if (status != CUDNN_STATUS_SUCCESS) goto POOLING_BACKAWARD_ERROR;
|
102
102
|
|
103
103
|
handle = cumo_cuda_cudnn_handle();
|
104
104
|
status = cudnnPoolingBackward(
|
@@ -114,9 +114,9 @@ static VALUE
|
|
114
114
|
(void*)&beta,
|
115
115
|
x_desc,
|
116
116
|
(void*)gx_ptr);
|
117
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
117
|
+
if (status != CUDNN_STATUS_SUCCESS) goto POOLING_BACKAWARD_ERROR;
|
118
118
|
|
119
|
-
|
119
|
+
POOLING_BACKAWARD_ERROR:
|
120
120
|
if (x_desc) cudnnDestroyTensorDescriptor(x_desc);
|
121
121
|
if (y_desc) cudnnDestroyTensorDescriptor(y_desc);
|
122
122
|
if (pool_desc) cudnnDestroyPoolingDescriptor(pool_desc);
|
@@ -98,11 +98,11 @@ static VALUE
|
|
98
98
|
y_ptr = cumo_na_get_offset_pointer_for_write(y);
|
99
99
|
|
100
100
|
status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
|
101
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
101
|
+
if (status != CUDNN_STATUS_SUCCESS) goto POLLING_FORWARD_ERROR;
|
102
102
|
status = cumo_cuda_cudnn_CreateTensorDescriptor(&y_desc, y, cudnn_dtype);
|
103
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
103
|
+
if (status != CUDNN_STATUS_SUCCESS) goto POLLING_FORWARD_ERROR;
|
104
104
|
status = cumo_cuda_cudnn_CreatePoolingDescriptor(&pool_desc, int_mode, ndim, int_kernel_size, int_stride, int_pad);
|
105
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
105
|
+
if (status != CUDNN_STATUS_SUCCESS) goto POLLING_FORWARD_ERROR;
|
106
106
|
|
107
107
|
handle = cumo_cuda_cudnn_handle();
|
108
108
|
status = cudnnPoolingForward(
|
@@ -114,9 +114,9 @@ static VALUE
|
|
114
114
|
(void*)&beta,
|
115
115
|
y_desc,
|
116
116
|
(void*)y_ptr);
|
117
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
117
|
+
if (status != CUDNN_STATUS_SUCCESS) goto POLLING_FORWARD_ERROR;
|
118
118
|
|
119
|
-
|
119
|
+
POLLING_FORWARD_ERROR:
|
120
120
|
if (x_desc) cudnnDestroyTensorDescriptor(x_desc);
|
121
121
|
if (y_desc) cudnnDestroyTensorDescriptor(y_desc);
|
122
122
|
if (pool_desc) cudnnDestroyPoolingDescriptor(pool_desc);
|
data/lib/cumo/narray/extra.rb
CHANGED
@@ -1133,6 +1133,9 @@ module Cumo
|
|
1133
1133
|
|
1134
1134
|
def dot(b)
|
1135
1135
|
t = self.class::UPCAST[b.class]
|
1136
|
+
if self.ndim == 0 and b.ndim == 0
|
1137
|
+
return self * b
|
1138
|
+
end
|
1136
1139
|
if [SFloat, DFloat, SComplex, DComplex].include?(t)
|
1137
1140
|
b = self.class.asarray(b)
|
1138
1141
|
case self.ndim
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cumo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-04-
|
11
|
+
date: 2019-04-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|