cumo 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/ext/cumo/include/cumo.h +2 -2
- data/ext/cumo/include/cumo/cuda/cudnn.h +19 -0
- data/ext/cumo/narray/gen/tmpl/accum_binary_kernel.cu +4 -0
- data/ext/cumo/narray/gen/tmpl/batch_norm.c +1 -6
- data/ext/cumo/narray/gen/tmpl/batch_norm_backward.c +6 -11
- data/ext/cumo/narray/gen/tmpl/clip.c +3 -0
- data/ext/cumo/narray/gen/tmpl/conv_transpose.c +9 -9
- data/ext/cumo/narray/gen/tmpl/pooling_backward.c +5 -5
- data/ext/cumo/narray/gen/tmpl/pooling_forward.c +5 -5
- data/lib/cumo/narray/extra.rb +3 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8f07518545c0e4d72380382915462039151c132ee9827967e9d4c32604001f15
|
4
|
+
data.tar.gz: 94e5c261659e401d9a8016eb0a3493d9bce26abe19347602859f7a9fcc79014e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cf48d1e5452e2c871587da1f006b90f3de54c8dc911d4fa555bd54c7ea69741903ce000fd0b1fd38ba96fc46d65e79f24e3c9d858029df176cee7768e8de790b
|
7
|
+
data.tar.gz: 4c5d6bdc9ec211967458b9b4f3576a278b2d59011faf2ae5dd85979a4821496ad29db9eefd0fce9d0765f9ede38d42af2c8fc8dd250de41f9e22759ee9914a56
|
data/CHANGELOG.md
CHANGED
data/ext/cumo/include/cumo.h
CHANGED
@@ -5,6 +5,7 @@
|
|
5
5
|
#ifdef CUDNN_FOUND
|
6
6
|
#include <cudnn.h>
|
7
7
|
#endif // CUDNN_FOUND
|
8
|
+
#include "cumo/narray.h"
|
8
9
|
|
9
10
|
#if defined(__cplusplus)
|
10
11
|
extern "C" {
|
@@ -81,6 +82,24 @@ cumo_cuda_cudnn_get_int_ary(int* int_ary, VALUE ary, size_t ndim, int default_va
|
|
81
82
|
}
|
82
83
|
}
|
83
84
|
|
85
|
+
// VALUE is Ruby Array
|
86
|
+
static inline size_t
|
87
|
+
cumo_cuda_cudnn_get_int_axis(int* int_axis, VALUE axis)
|
88
|
+
{
|
89
|
+
size_t axis_ndim;
|
90
|
+
Check_Type(axis, T_ARRAY);
|
91
|
+
axis_ndim = (size_t)(RARRAY_LEN(axis));
|
92
|
+
if (CUMO_NA_MAX_DIMENSION <= axis_ndim) {
|
93
|
+
rb_raise(rb_eArgError, "Size of axis must be smaller than %d, but was %d",
|
94
|
+
(int)CUMO_NA_MAX_DIMENSION, (int)axis_ndim);
|
95
|
+
}
|
96
|
+
for (size_t idim = 0; idim < axis_ndim; ++idim) {
|
97
|
+
int_axis[idim] = NUM2INT(rb_ary_entry(axis, (long)idim));
|
98
|
+
}
|
99
|
+
// TODO: check axis is sorted
|
100
|
+
return axis_ndim;
|
101
|
+
}
|
102
|
+
|
84
103
|
size_t
|
85
104
|
cumo_cuda_cudnn_GetConvOutDim(
|
86
105
|
size_t in_dim,
|
@@ -36,6 +36,10 @@ void <%="cumo_#{type_name}_mulsum#{nan}_reduce_kernel_launch"%>(char *p1, char *
|
|
36
36
|
{
|
37
37
|
ssize_t s1_idx = s1 / sizeof(dtype);
|
38
38
|
ssize_t s2_idx = s2 / sizeof(dtype);
|
39
|
+
if (n == 1) { // when n == 1, s1 and s3 could be 0
|
40
|
+
s1_idx = 1;
|
41
|
+
s2_idx = 1;
|
42
|
+
}
|
39
43
|
thrust::device_ptr<dtype> p1_begin = thrust::device_pointer_cast((dtype*)p1);
|
40
44
|
thrust::device_ptr<dtype> p1_end = thrust::device_pointer_cast(((dtype*)p1) + n * s1_idx);
|
41
45
|
thrust::device_ptr<dtype> p2_begin = thrust::device_pointer_cast((dtype*)p2);
|
@@ -88,12 +88,7 @@ static VALUE
|
|
88
88
|
double_decay = NUM2DBL(decay);
|
89
89
|
}
|
90
90
|
if (axis != Qnil) {
|
91
|
-
|
92
|
-
axis_ndim = (size_t)(RARRAY_LEN(axis));
|
93
|
-
for (size_t idim = 0; idim < axis_ndim; ++idim) {
|
94
|
-
int_axis[idim] = NUM2INT(rb_ary_entry(axis, (long)idim));
|
95
|
-
}
|
96
|
-
// TODO: check axis is sorted
|
91
|
+
axis_ndim = cumo_cuda_cudnn_get_int_axis(int_axis, axis);
|
97
92
|
}
|
98
93
|
|
99
94
|
CumoGetNArray(x, nx);
|
@@ -55,7 +55,7 @@ static VALUE
|
|
55
55
|
size_t axis_ndim = 1;
|
56
56
|
|
57
57
|
rb_scan_args(argc, argv, "2:", &gamma, &gy, &kw_hash);
|
58
|
-
rb_get_kwargs(kw_hash, kw_table, 0,
|
58
|
+
rb_get_kwargs(kw_hash, kw_table, 0, 7, opts);
|
59
59
|
mean = cumo_cuda_cudnn_option_value(opts[0], Qnil);
|
60
60
|
inv_std = cumo_cuda_cudnn_option_value(opts[1], Qnil);
|
61
61
|
eps = cumo_cuda_cudnn_option_value(opts[2], Qnil);
|
@@ -74,12 +74,7 @@ static VALUE
|
|
74
74
|
double_eps = NUM2DBL(eps);
|
75
75
|
}
|
76
76
|
if (axis != Qnil) {
|
77
|
-
|
78
|
-
axis_ndim = (size_t)(RARRAY_LEN(axis));
|
79
|
-
for (size_t idim = 0; idim < axis_ndim; ++idim) {
|
80
|
-
int_axis[idim] = NUM2INT(rb_ary_entry(axis, (long)idim));
|
81
|
-
}
|
82
|
-
// TODO: check axis is sorted
|
77
|
+
axis_ndim = cumo_cuda_cudnn_get_int_axis(int_axis, axis);
|
83
78
|
}
|
84
79
|
|
85
80
|
CumoGetNArray(x, nx);
|
@@ -135,11 +130,11 @@ static VALUE
|
|
135
130
|
gbeta_ptr = cumo_na_get_offset_pointer_for_write(gbeta);
|
136
131
|
|
137
132
|
status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
|
138
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
133
|
+
if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
|
139
134
|
|
140
135
|
mode = cumo_cuda_cudnn_GetBatchNormMode(axis_ndim, int_axis);
|
141
136
|
status = cumo_cuda_cudnn_CreateBNTensorDescriptor(&bn_desc, x_desc, mode);
|
142
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
137
|
+
if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
|
143
138
|
// TODO: bn_desc may return another type, and may need to cast gamma, gy, mean, var
|
144
139
|
|
145
140
|
handle = cumo_cuda_cudnn_handle();
|
@@ -164,9 +159,9 @@ static VALUE
|
|
164
159
|
double_eps,
|
165
160
|
mean_ptr,
|
166
161
|
inv_std_ptr);
|
167
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
162
|
+
if (status != CUDNN_STATUS_SUCCESS) goto BATCH_NORM_BACKWARD_ERROR;
|
168
163
|
|
169
|
-
|
164
|
+
BATCH_NORM_BACKWARD_ERROR:
|
170
165
|
if (x_desc) cudnnDestroyTensorDescriptor(x_desc);
|
171
166
|
if (bn_desc) cudnnDestroyTensorDescriptor(bn_desc);
|
172
167
|
cumo_cuda_cudnn_check_status(status);
|
@@ -11,6 +11,7 @@ static void
|
|
11
11
|
CUMO_INIT_PTR(lp, 2, p3, s3);
|
12
12
|
CUMO_INIT_PTR(lp, 3, p4, s4);
|
13
13
|
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
|
14
|
+
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
14
15
|
for (; i--;) {
|
15
16
|
CUMO_GET_DATA_STRIDE(p1,s1,dtype,x);
|
16
17
|
CUMO_GET_DATA_STRIDE(p2,s2,dtype,min);
|
@@ -34,6 +35,7 @@ static void
|
|
34
35
|
CUMO_INIT_PTR(lp, 1, p2, s2);
|
35
36
|
CUMO_INIT_PTR(lp, 2, p3, s3);
|
36
37
|
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>_min", "<%=type_name%>");
|
38
|
+
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
37
39
|
for (; i--;) {
|
38
40
|
CUMO_GET_DATA_STRIDE(p1,s1,dtype,x);
|
39
41
|
CUMO_GET_DATA_STRIDE(p2,s2,dtype,min);
|
@@ -54,6 +56,7 @@ static void
|
|
54
56
|
CUMO_INIT_PTR(lp, 1, p2, s2);
|
55
57
|
CUMO_INIT_PTR(lp, 2, p3, s3);
|
56
58
|
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>_max", "<%=type_name%>");
|
59
|
+
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
|
57
60
|
for (; i--;) {
|
58
61
|
CUMO_GET_DATA_STRIDE(p1,s1,dtype,x);
|
59
62
|
CUMO_GET_DATA_STRIDE(p2,s2,dtype,max);
|
@@ -134,13 +134,13 @@ static VALUE
|
|
134
134
|
y_ptr = cumo_na_get_offset_pointer_for_write(y);
|
135
135
|
|
136
136
|
status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
|
137
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
137
|
+
if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
|
138
138
|
status = cumo_cuda_cudnn_CreateTensorDescriptor(&y_desc, y, cudnn_dtype);
|
139
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
139
|
+
if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
|
140
140
|
status = cumo_cuda_cudnn_CreateFilterDescriptor(&w_desc, w_cont, cudnn_dtype);
|
141
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
141
|
+
if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
|
142
142
|
status = cumo_cuda_cudnn_CreateConvolutionDescriptor(&conv_desc, ndim, int_stride, int_pad, cudnn_dtype);
|
143
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
143
|
+
if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
|
144
144
|
|
145
145
|
handle = cumo_cuda_cudnn_handle();
|
146
146
|
|
@@ -160,7 +160,7 @@ static VALUE
|
|
160
160
|
int_pad,
|
161
161
|
ndim,
|
162
162
|
cudnn_dtype);
|
163
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
163
|
+
if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
|
164
164
|
algo = perf_result.algo;
|
165
165
|
workspace_size = perf_result.memory;
|
166
166
|
|
@@ -179,7 +179,7 @@ static VALUE
|
|
179
179
|
(void*)&beta,
|
180
180
|
y_desc,
|
181
181
|
(void*)y_ptr);
|
182
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
182
|
+
if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
|
183
183
|
|
184
184
|
if (b != Qnil) {
|
185
185
|
size_t new_shape[CUMO_NA_MAX_DIMENSION];
|
@@ -208,7 +208,7 @@ static VALUE
|
|
208
208
|
// restore b.shape
|
209
209
|
nb_cont->ndim = b_ndim;
|
210
210
|
nb_cont->shape = b_shape;
|
211
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
211
|
+
if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
|
212
212
|
|
213
213
|
status = cudnnAddTensor(
|
214
214
|
handle,
|
@@ -218,10 +218,10 @@ static VALUE
|
|
218
218
|
(void*)&alpha,
|
219
219
|
y_desc,
|
220
220
|
(void*)y_ptr);
|
221
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
221
|
+
if (status != CUDNN_STATUS_SUCCESS) goto CONV_TRANSPOSE_ERROR;
|
222
222
|
}
|
223
223
|
|
224
|
-
|
224
|
+
CONV_TRANSPOSE_ERROR:
|
225
225
|
if (x_desc) cudnnDestroyTensorDescriptor(x_desc);
|
226
226
|
if (y_desc) cudnnDestroyTensorDescriptor(y_desc);
|
227
227
|
if (b_desc) cudnnDestroyTensorDescriptor(b_desc);
|
@@ -94,11 +94,11 @@ static VALUE
|
|
94
94
|
gx_ptr = cumo_na_get_offset_pointer_for_write(gx);
|
95
95
|
|
96
96
|
status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
|
97
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
97
|
+
if (status != CUDNN_STATUS_SUCCESS) goto POOLING_BACKAWARD_ERROR;
|
98
98
|
status = cumo_cuda_cudnn_CreateTensorDescriptor(&y_desc, y, cudnn_dtype);
|
99
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
99
|
+
if (status != CUDNN_STATUS_SUCCESS) goto POOLING_BACKAWARD_ERROR;
|
100
100
|
status = cumo_cuda_cudnn_CreatePoolingDescriptor(&pool_desc, int_mode, ndim, int_kernel_size, int_stride, int_pad);
|
101
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
101
|
+
if (status != CUDNN_STATUS_SUCCESS) goto POOLING_BACKAWARD_ERROR;
|
102
102
|
|
103
103
|
handle = cumo_cuda_cudnn_handle();
|
104
104
|
status = cudnnPoolingBackward(
|
@@ -114,9 +114,9 @@ static VALUE
|
|
114
114
|
(void*)&beta,
|
115
115
|
x_desc,
|
116
116
|
(void*)gx_ptr);
|
117
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
117
|
+
if (status != CUDNN_STATUS_SUCCESS) goto POOLING_BACKAWARD_ERROR;
|
118
118
|
|
119
|
-
|
119
|
+
POOLING_BACKAWARD_ERROR:
|
120
120
|
if (x_desc) cudnnDestroyTensorDescriptor(x_desc);
|
121
121
|
if (y_desc) cudnnDestroyTensorDescriptor(y_desc);
|
122
122
|
if (pool_desc) cudnnDestroyPoolingDescriptor(pool_desc);
|
@@ -98,11 +98,11 @@ static VALUE
|
|
98
98
|
y_ptr = cumo_na_get_offset_pointer_for_write(y);
|
99
99
|
|
100
100
|
status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype);
|
101
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
101
|
+
if (status != CUDNN_STATUS_SUCCESS) goto POLLING_FORWARD_ERROR;
|
102
102
|
status = cumo_cuda_cudnn_CreateTensorDescriptor(&y_desc, y, cudnn_dtype);
|
103
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
103
|
+
if (status != CUDNN_STATUS_SUCCESS) goto POLLING_FORWARD_ERROR;
|
104
104
|
status = cumo_cuda_cudnn_CreatePoolingDescriptor(&pool_desc, int_mode, ndim, int_kernel_size, int_stride, int_pad);
|
105
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
105
|
+
if (status != CUDNN_STATUS_SUCCESS) goto POLLING_FORWARD_ERROR;
|
106
106
|
|
107
107
|
handle = cumo_cuda_cudnn_handle();
|
108
108
|
status = cudnnPoolingForward(
|
@@ -114,9 +114,9 @@ static VALUE
|
|
114
114
|
(void*)&beta,
|
115
115
|
y_desc,
|
116
116
|
(void*)y_ptr);
|
117
|
-
if (status != CUDNN_STATUS_SUCCESS) goto
|
117
|
+
if (status != CUDNN_STATUS_SUCCESS) goto POLLING_FORWARD_ERROR;
|
118
118
|
|
119
|
-
|
119
|
+
POLLING_FORWARD_ERROR:
|
120
120
|
if (x_desc) cudnnDestroyTensorDescriptor(x_desc);
|
121
121
|
if (y_desc) cudnnDestroyTensorDescriptor(y_desc);
|
122
122
|
if (pool_desc) cudnnDestroyPoolingDescriptor(pool_desc);
|
data/lib/cumo/narray/extra.rb
CHANGED
@@ -1133,6 +1133,9 @@ module Cumo
|
|
1133
1133
|
|
1134
1134
|
def dot(b)
|
1135
1135
|
t = self.class::UPCAST[b.class]
|
1136
|
+
if self.ndim == 0 and b.ndim == 0
|
1137
|
+
return self * b
|
1138
|
+
end
|
1136
1139
|
if [SFloat, DFloat, SComplex, DComplex].include?(t)
|
1137
1140
|
b = self.class.asarray(b)
|
1138
1141
|
case self.ndim
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cumo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-04-
|
11
|
+
date: 2019-04-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|