tensor_stream-opencl 0.2.4 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/.rubocop.yml +6 -1
- data/lib/tensor_stream/opencl/array_ops.rb +58 -55
- data/lib/tensor_stream/opencl/nn_ops.rb +57 -56
- data/lib/tensor_stream/opencl/opencl_buffer.rb +11 -6
- data/lib/tensor_stream/opencl/opencl_evaluator.rb +49 -36
- data/lib/tensor_stream/opencl/version.rb +1 -1
- data/lib/tensor_stream/opencl.rb +1 -0
- data/samples/classify.rb +21 -0
- data/samples/dump_mnist.rb +21 -0
- data/samples/image_sort.rb +9 -0
- data/samples/mnist_data_2.3.rb +4 -12
- data/samples/mnist_data_3.0.rb +16 -8
- data/tensor_stream-opencl.gemspec +1 -1
- metadata +9 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c0e8de1676b30c21f9529cdce9d3fee406cdf2945d54f890ae49c14c1329860e
|
4
|
+
data.tar.gz: 66932db63589eedcd6247083a27344bed8c80860b6d096d01fb5b46db1b53521
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5b905243d98976c94cb58dd443fc25f0b1e78bba689f20677ee3457dc44641d228642eb65adc4e58227d1feb0686fe29a3cabd8b25910f8e9c1aa0ef575ae8ff
|
7
|
+
data.tar.gz: 03ad5c5cd27ff058df206de8e109699fe0ef492e8ced1b97217cc0fe1ab7a4e43da5db45fed85b2e96b7bb35dc14465e39695b7995b3a1ccfcece7c050e0cae3
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
@@ -34,6 +34,7 @@ Metrics/CyclomaticComplexity:
|
|
34
34
|
Metrics/BlockLength:
|
35
35
|
Exclude:
|
36
36
|
- lib/tensor_stream/math_gradients.rb
|
37
|
+
- benchmark/benchmark.rb
|
37
38
|
|
38
39
|
Naming/AccessorMethodName:
|
39
40
|
Exclude:
|
@@ -86,4 +87,8 @@ Style/TrailingCommaInHashLiteral:
|
|
86
87
|
Naming/UncommunicativeMethodParamName:
|
87
88
|
Exclude:
|
88
89
|
- lib/tensor_stream/evaluator/ruby_evaluator.rb
|
89
|
-
- lib/tensor_stream/ops.rb
|
90
|
+
- lib/tensor_stream/ops.rb
|
91
|
+
|
92
|
+
Style/BlockDelimiters:
|
93
|
+
Exclude:
|
94
|
+
- benchmark/benchmark.rb
|
@@ -103,26 +103,27 @@ module TensorStream
|
|
103
103
|
end
|
104
104
|
else
|
105
105
|
raise TensorStream::ValueError, "#{num_split} does not divide #{value_shape[axis]} evenly" if num_split.reduce(:+) != value_shape[axis]
|
106
|
+
|
106
107
|
# compute shapes of individual output buffers
|
107
108
|
new_shapes = num_split.each_with_index.collect do |num, index|
|
108
109
|
new_shape = value_shape.dup
|
109
110
|
new_shape[axis] = num
|
110
111
|
new_shape
|
111
112
|
end
|
113
|
+
out = []
|
114
|
+
|
112
115
|
if axis.zero? # axis zero fast copy path
|
113
116
|
start = 0
|
114
|
-
|
115
|
-
new_shapes.each_with_index do |
|
116
|
-
element_count =
|
117
|
+
|
118
|
+
new_shapes.each_with_index do |ns, index|
|
119
|
+
element_count = ns.reduce(:*) || 1
|
117
120
|
region_size_in_bytes = element_count * value.buffer.element_size
|
118
|
-
out << _create_variable_result_sub_buffer(value, index, start, region_size_in_bytes, tensor.data_type,
|
121
|
+
out << _create_variable_result_sub_buffer(value, index, start, region_size_in_bytes, tensor.data_type, ns, "#{tensor.name}/out_#{index}_#{ns.join('.')}")
|
119
122
|
start += region_size_in_bytes
|
120
123
|
end
|
121
|
-
out
|
122
124
|
else
|
123
125
|
# create buffers for each piece
|
124
126
|
work_buffer = _create_result_buffer(tensor.data_type, value_shape, "#{tensor.name}/out")
|
125
|
-
out = []
|
126
127
|
start = 0
|
127
128
|
|
128
129
|
steps = num_split.dup.reverse.drop(1).inject([0]) do |a, s|
|
@@ -157,14 +158,15 @@ module TensorStream
|
|
157
158
|
event_wait_list: event_wait_list)
|
158
159
|
end
|
159
160
|
work_buffer.op = events
|
160
|
-
new_shapes.each_with_index do |
|
161
|
-
element_count =
|
161
|
+
new_shapes.each_with_index do |ns, index|
|
162
|
+
element_count = ns.reduce(:*) || 1
|
162
163
|
region_size_in_bytes = element_count * work_buffer.buffer.element_size
|
163
|
-
out << _create_variable_result_sub_buffer(work_buffer, index, start, region_size_in_bytes, tensor.data_type,
|
164
|
+
out << _create_variable_result_sub_buffer(work_buffer, index, start, region_size_in_bytes, tensor.data_type, ns, "#{tensor.name}/out_#{index}_#{new_shape.join('.')}")
|
164
165
|
start += region_size_in_bytes
|
165
166
|
end
|
166
|
-
out
|
167
167
|
end
|
168
|
+
|
169
|
+
out
|
168
170
|
end
|
169
171
|
|
170
172
|
TensorStream::Evaluator::OutputGroup.new(outputs, outputs.map(&:data_type))
|
@@ -195,58 +197,57 @@ module TensorStream
|
|
195
197
|
|
196
198
|
output_buffer = _create_result_buffer(tensor.data_type, new_shape, tensor.name)
|
197
199
|
ops = if axis.zero? # fast path
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
start = index * input.buffer.size * input.buffer.element_size
|
202
|
-
region = [input.buffer.size * input.buffer.element_size, 1, 1]
|
203
|
-
event_wait_list = build_event_wait_list(input)
|
204
|
-
_opencl_queue.enqueue_copy_buffer_rect(input.cl_buffer, output_buffer.cl_buffer,
|
205
|
-
region, dst_origin: [start, 0, 0], event_wait_list: event_wait_list)
|
206
|
-
end.compact
|
207
|
-
else
|
208
|
-
elem_size = shape.empty? ? 1 : shape.reduce(:*)
|
209
|
-
cl_n = OpenCL::Int1.new(elem_size)
|
200
|
+
inputs.each_with_index.map do |input, index|
|
201
|
+
next if input.empty_value?
|
210
202
|
|
211
|
-
|
212
|
-
|
213
|
-
|
203
|
+
start = index * input.buffer.size * input.buffer.element_size
|
204
|
+
region = [input.buffer.size * input.buffer.element_size, 1, 1]
|
205
|
+
event_wait_list = build_event_wait_list(input)
|
206
|
+
_opencl_queue.enqueue_copy_buffer_rect(input.cl_buffer, output_buffer.cl_buffer,
|
207
|
+
region, dst_origin: [start, 0, 0], event_wait_list: event_wait_list)
|
208
|
+
end.compact
|
209
|
+
else
|
210
|
+
elem_size = shape.empty? ? 1 : shape.reduce(:*)
|
211
|
+
cl_n = OpenCL::Int1.new(elem_size)
|
214
212
|
|
215
|
-
|
216
|
-
|
213
|
+
steps = inputs.map(&:shape).reverse.drop(1).inject([0]) do |a, shape|
|
214
|
+
a << shape[axis] + a.last
|
215
|
+
end
|
216
|
+
|
217
|
+
work_group = [elem_size]
|
218
|
+
event_wait_list = build_event_wait_list(inputs)
|
219
|
+
|
220
|
+
inputs.each_with_index.map do |input, index|
|
221
|
+
cl_index = OpenCL::Int1.new(index)
|
222
|
+
step = OpenCL::Int1.new(steps[index])
|
223
|
+
_cl_program('concat', data_type: tensor.data_type, divisors: divisors, multipliers: multipliers, axis: axis).
|
224
|
+
concat(_opencl_queue, work_group, cl_n, cl_index, step, input.cl_buffer,
|
225
|
+
output_buffer.cl_buffer, event_wait_list: event_wait_list)
|
226
|
+
end
|
227
|
+
end
|
217
228
|
|
218
|
-
inputs.each_with_index.map do |input, index|
|
219
|
-
cl_index = OpenCL::Int1.new(index)
|
220
|
-
step = OpenCL::Int1.new(steps[index])
|
221
|
-
_cl_program('concat', data_type: tensor.data_type, divisors: divisors, multipliers: multipliers, axis: axis).
|
222
|
-
concat(_opencl_queue, work_group, cl_n, cl_index, step, input.cl_buffer,
|
223
|
-
output_buffer.cl_buffer, event_wait_list: event_wait_list)
|
224
|
-
end
|
225
|
-
end
|
226
229
|
output_buffer.op = ops
|
227
230
|
output_buffer
|
228
231
|
end
|
229
232
|
|
230
|
-
register_op :squeeze do |
|
233
|
+
register_op :squeeze do |_context, tensor, inputs|
|
231
234
|
arr = inputs[0]
|
232
235
|
shape = inputs[0].shape.dup
|
233
236
|
axis = !tensor.options[:axis].is_a?(Array) ? [tensor.options[:axis]] : tensor.options[:axis]
|
234
237
|
if !axis.empty?
|
235
|
-
axis.each do |
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
raise TensorStream::ValueError, "unable to squeeze dimension that does not have a size of 1"
|
240
|
-
end
|
238
|
+
axis.each do |x|
|
239
|
+
raise TensorStream::ValueError, "unable to squeeze dimension that does not have a size of 1" unless shape[x] == 1
|
240
|
+
|
241
|
+
shape[x] = nil
|
241
242
|
end
|
242
243
|
else
|
243
244
|
shape = shape.map { |s| s == 1 ? nil : s }
|
244
245
|
end
|
245
246
|
|
246
247
|
OpenCLBuffer.new(self, name: tensor.name, data_type: tensor.data_type,
|
247
|
-
|
248
|
-
|
249
|
-
|
248
|
+
shape: shape.compact, buffer: arr.buffer,
|
249
|
+
cl_buffer: arr.cl_buffer,
|
250
|
+
op: arr.op)
|
250
251
|
end
|
251
252
|
|
252
253
|
register_op :stack do |_context, tensor, inputs|
|
@@ -312,7 +313,6 @@ module TensorStream
|
|
312
313
|
a << s * a.last
|
313
314
|
end.reverse
|
314
315
|
|
315
|
-
step = multipliers[0]
|
316
316
|
sub_shape = new_shape.dup
|
317
317
|
sub_shape.shift
|
318
318
|
|
@@ -375,9 +375,9 @@ module TensorStream
|
|
375
375
|
end
|
376
376
|
|
377
377
|
OpenCLBuffer.new(self, name: tensor.name, data_type: tensor.data_type,
|
378
|
-
|
379
|
-
|
380
|
-
|
378
|
+
shape: shape, buffer: arr.buffer,
|
379
|
+
cl_buffer: arr.cl_buffer,
|
380
|
+
op: arr.op)
|
381
381
|
end
|
382
382
|
|
383
383
|
register_op :transpose, buffer: true do |_context, tensor, inputs|
|
@@ -407,7 +407,10 @@ module TensorStream
|
|
407
407
|
|
408
408
|
shape = input_a.shape
|
409
409
|
|
410
|
-
slice_param = input_b.zip(size).collect.with_index
|
410
|
+
slice_param = input_b.zip(size).collect.with_index do |p, index|
|
411
|
+
p[1] = p[1] == -1 ? shape[index] : p[1]
|
412
|
+
p[0]..p[0] + p[1] - 1
|
413
|
+
end.reverse
|
411
414
|
|
412
415
|
new_buf = input_a.buffer.reshape(*input_a.shape.reverse)
|
413
416
|
sliced = new_buf.slice[*slice_param]
|
@@ -423,11 +426,11 @@ module TensorStream
|
|
423
426
|
if a.data_type != tensor.data_type
|
424
427
|
buffer = _create_result_buffer(tensor.data_type, a.shape, tensor.name)
|
425
428
|
work_group = if inputs[0].shape.size > 2
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
429
|
+
[inputs[0].shape.reduce(:*) / inputs[0].shape.last, inputs[0].shape.last]
|
430
|
+
else
|
431
|
+
m, n = inputs[0].shape
|
432
|
+
[m || 1, n || 1]
|
433
|
+
end
|
431
434
|
|
432
435
|
cl_m = OpenCL::Int1.new(work_group[0])
|
433
436
|
cl_n = OpenCL::Int1.new(work_group[1])
|
@@ -11,12 +11,12 @@ module TensorStream
|
|
11
11
|
|
12
12
|
assign = tensor.inputs[0] || tensor
|
13
13
|
|
14
|
-
assign.
|
15
|
-
output_buffer = assign.
|
14
|
+
assign.container_buffer.dirty = true # force buffer copy when variable is read externally
|
15
|
+
output_buffer = assign.container_buffer
|
16
16
|
|
17
17
|
work_group = [output_buffer.total_elements]
|
18
18
|
|
19
|
-
event_wait_list = build_event_wait_list([assign.
|
19
|
+
event_wait_list = build_event_wait_list([assign.container_buffer, learning_rate, delta])
|
20
20
|
|
21
21
|
event = call_program("apply_gradient", output_buffer.data_type,
|
22
22
|
work_group,
|
@@ -33,21 +33,21 @@ module TensorStream
|
|
33
33
|
|
34
34
|
assign = tensor.inputs[0] || tensor
|
35
35
|
assign_acc = tensor.inputs[1]
|
36
|
-
assign.
|
37
|
-
assign_acc.
|
36
|
+
assign.container_buffer.dirty = true # force buffer copy when variable is read externally
|
37
|
+
assign_acc.container_buffer.dirty = true # force buffer copy when variable is read externally
|
38
38
|
|
39
|
-
output_buffer = assign.
|
39
|
+
output_buffer = assign.container_buffer
|
40
40
|
|
41
41
|
work_group = [output_buffer.total_elements]
|
42
42
|
|
43
|
-
event_wait_list = build_event_wait_list([assign.
|
43
|
+
event_wait_list = build_event_wait_list([assign.container_buffer, assign_acc.container_buffer, learning_rate, grad, momentum])
|
44
44
|
method_call = :"apply_momentum_#{output_buffer.data_type}"
|
45
45
|
event = _cl_program("apply_momentum", nesterov: tensor.options[:use_nesterov], dtype: output_buffer.data_type).
|
46
46
|
send(method_call, _opencl_queue, work_group, grad.cl_buffer,
|
47
47
|
learning_rate.cl_buffer, momentum.cl_buffer, output_buffer.cl_buffer,
|
48
|
-
assign_acc.
|
48
|
+
assign_acc.container_buffer.cl_buffer, event_wait_list: event_wait_list)
|
49
49
|
output_buffer.op = event
|
50
|
-
assign_acc.
|
50
|
+
assign_acc.container_buffer.op = event
|
51
51
|
output_buffer
|
52
52
|
end
|
53
53
|
|
@@ -58,11 +58,11 @@ module TensorStream
|
|
58
58
|
assign_acc_update = tensor.inputs[2]
|
59
59
|
|
60
60
|
# mark variable buffers as dirty
|
61
|
-
assign.
|
62
|
-
assign_acc.
|
63
|
-
assign_acc_update.
|
61
|
+
assign.container_buffer.dirty = true # force buffer copy when variable is read externally
|
62
|
+
assign_acc.container_buffer.dirty = true # force buffer copy when variable is read externally
|
63
|
+
assign_acc_update.container_buffer.dirty = true # force buffer copy when variable is read externally
|
64
64
|
|
65
|
-
output_buffer = assign.
|
65
|
+
output_buffer = assign.container_buffer
|
66
66
|
|
67
67
|
work_group = [output_buffer.total_elements]
|
68
68
|
|
@@ -73,13 +73,13 @@ module TensorStream
|
|
73
73
|
rho.cl_buffer,
|
74
74
|
epsilon.cl_buffer,
|
75
75
|
grad.cl_buffer,
|
76
|
-
assign.
|
77
|
-
assign_acc.
|
78
|
-
assign_acc_update.
|
76
|
+
assign.container_buffer.cl_buffer,
|
77
|
+
assign_acc.container_buffer.cl_buffer,
|
78
|
+
assign_acc_update.container_buffer.cl_buffer,
|
79
79
|
event_wait_list: event_wait_list)
|
80
80
|
output_buffer.op = event
|
81
|
-
assign_acc.
|
82
|
-
assign_acc_update.
|
81
|
+
assign_acc.container_buffer.op = event
|
82
|
+
assign_acc_update.container_buffer.op = event
|
83
83
|
output_buffer
|
84
84
|
end
|
85
85
|
|
@@ -92,11 +92,11 @@ module TensorStream
|
|
92
92
|
assign_v = tensor.inputs[2]
|
93
93
|
|
94
94
|
# mark variable buffers as dirty
|
95
|
-
assign.
|
96
|
-
assign_m.
|
97
|
-
assign_v.
|
95
|
+
assign.container_buffer.dirty = true # force buffer copy when variable is read externally
|
96
|
+
assign_m.container_buffer.dirty = true # force buffer copy when variable is read externally
|
97
|
+
assign_v.container_buffer.dirty = true # force buffer copy when variable is read externally
|
98
98
|
|
99
|
-
output_buffer = assign.
|
99
|
+
output_buffer = assign.container_buffer
|
100
100
|
|
101
101
|
work_group = [output_buffer.total_elements]
|
102
102
|
|
@@ -110,13 +110,13 @@ module TensorStream
|
|
110
110
|
beta1_t.cl_buffer,
|
111
111
|
beta2_t.cl_buffer,
|
112
112
|
epsilon_t.cl_buffer,
|
113
|
-
assign_m.
|
114
|
-
assign.
|
115
|
-
assign_v.
|
113
|
+
assign_m.container_buffer.cl_buffer,
|
114
|
+
assign.container_buffer.cl_buffer,
|
115
|
+
assign_v.container_buffer.cl_buffer,
|
116
116
|
event_wait_list: event_wait_list)
|
117
117
|
output_buffer.op = event
|
118
|
-
assign_m.
|
119
|
-
assign_v.
|
118
|
+
assign_m.container_buffer.op = event
|
119
|
+
assign_v.container_buffer.op = event
|
120
120
|
output_buffer
|
121
121
|
end
|
122
122
|
|
@@ -126,9 +126,9 @@ module TensorStream
|
|
126
126
|
assign = tensor.inputs[0] || tensor
|
127
127
|
assign_acc = tensor.inputs[1]
|
128
128
|
|
129
|
-
assign.
|
130
|
-
assign_acc.
|
131
|
-
output_buffer = assign.
|
129
|
+
assign.container_buffer.dirty = true
|
130
|
+
assign_acc.container_buffer.dirty = true
|
131
|
+
output_buffer = assign.container_buffer
|
132
132
|
|
133
133
|
work_group = [output_buffer.total_elements]
|
134
134
|
|
@@ -138,11 +138,11 @@ module TensorStream
|
|
138
138
|
work_group,
|
139
139
|
lr.cl_buffer,
|
140
140
|
grad.cl_buffer,
|
141
|
-
assign.
|
142
|
-
assign_acc.
|
141
|
+
assign.container_buffer.cl_buffer,
|
142
|
+
assign_acc.container_buffer.cl_buffer,
|
143
143
|
event_wait_list: event_wait_list)
|
144
144
|
output_buffer.op = event
|
145
|
-
assign_acc.
|
145
|
+
assign_acc.container_buffer.op = event
|
146
146
|
output_buffer
|
147
147
|
end
|
148
148
|
|
@@ -154,11 +154,11 @@ module TensorStream
|
|
154
154
|
assign_ms = tensor.inputs[2]
|
155
155
|
assign_mom = tensor.inputs[3]
|
156
156
|
|
157
|
-
assign.
|
158
|
-
assign_mg.
|
159
|
-
assign_ms.
|
160
|
-
assign_mom.
|
161
|
-
output_buffer = assign.
|
157
|
+
assign.container_buffer.dirty = true
|
158
|
+
assign_mg.container_buffer.dirty = true
|
159
|
+
assign_ms.container_buffer.dirty = true
|
160
|
+
assign_mom.container_buffer.dirty = true
|
161
|
+
output_buffer = assign.container_buffer
|
162
162
|
event_wait_list = build_event_wait_list(inputs)
|
163
163
|
work_group = [output_buffer.total_elements]
|
164
164
|
|
@@ -168,30 +168,30 @@ module TensorStream
|
|
168
168
|
momentum.cl_buffer,
|
169
169
|
epsilon.cl_buffer,
|
170
170
|
grad.cl_buffer,
|
171
|
-
assign.
|
172
|
-
assign_ms.
|
173
|
-
assign_mg.
|
174
|
-
assign_mom.
|
171
|
+
assign.container_buffer.cl_buffer,
|
172
|
+
assign_ms.container_buffer.cl_buffer,
|
173
|
+
assign_mg.container_buffer.cl_buffer,
|
174
|
+
assign_mom.container_buffer.cl_buffer,
|
175
175
|
event_wait_list: event_wait_list)
|
176
176
|
|
177
177
|
output_buffer.op = event
|
178
|
-
assign_mg.
|
179
|
-
assign_ms.
|
180
|
-
assign_mom.
|
178
|
+
assign_mg.container_buffer.op = event
|
179
|
+
assign_ms.container_buffer.op = event
|
180
|
+
assign_mom.container_buffer.op = event
|
181
181
|
output_buffer
|
182
182
|
end
|
183
183
|
|
184
|
-
register_op :apply_rms_prop do |
|
184
|
+
register_op :apply_rms_prop do |_context, tensor, inputs|
|
185
185
|
var, ms, mom, lr, rho, momentum, epsilon, grad = inputs
|
186
186
|
|
187
187
|
assign = tensor.inputs[0]
|
188
188
|
assign_ms = tensor.inputs[1]
|
189
189
|
assign_mom = tensor.inputs[2]
|
190
190
|
|
191
|
-
assign.
|
192
|
-
assign_ms.
|
193
|
-
assign_mom.
|
194
|
-
output_buffer = assign.
|
191
|
+
assign.container_buffer.dirty = true
|
192
|
+
assign_ms.container_buffer.dirty = true
|
193
|
+
assign_mom.container_buffer.dirty = true
|
194
|
+
output_buffer = assign.container_buffer
|
195
195
|
event_wait_list = build_event_wait_list(inputs)
|
196
196
|
work_group = [output_buffer.total_elements]
|
197
197
|
|
@@ -202,14 +202,14 @@ module TensorStream
|
|
202
202
|
momentum.cl_buffer,
|
203
203
|
epsilon.cl_buffer,
|
204
204
|
grad.cl_buffer,
|
205
|
-
assign.
|
206
|
-
assign_ms.
|
207
|
-
assign_mom.
|
205
|
+
assign.container_buffer.cl_buffer,
|
206
|
+
assign_ms.container_buffer.cl_buffer,
|
207
|
+
assign_mom.container_buffer.cl_buffer,
|
208
208
|
event_wait_list: event_wait_list)
|
209
209
|
|
210
210
|
output_buffer.op = event
|
211
|
-
assign_ms.
|
212
|
-
assign_mom.
|
211
|
+
assign_ms.container_buffer.op = event
|
212
|
+
assign_mom.container_buffer.op = event
|
213
213
|
output_buffer
|
214
214
|
end
|
215
215
|
|
@@ -273,7 +273,7 @@ module TensorStream
|
|
273
273
|
output_buffer_backprop.op = event
|
274
274
|
|
275
275
|
loss = reduction(context, tensor, output_buffer, rank, :sum)
|
276
|
-
TensorStream::Evaluator::OutputGroup.new([loss, output_buffer_backprop],
|
276
|
+
TensorStream::Evaluator::OutputGroup.new([loss, output_buffer_backprop], [tensor.inputs[0].data_type, tensor.inputs[0].data_type])
|
277
277
|
end
|
278
278
|
|
279
279
|
register_op :softmax_cross_entropy_with_logits_v2_grad do |_context, tensor, inputs|
|
@@ -370,6 +370,7 @@ module TensorStream
|
|
370
370
|
raise TensorStream::ValueError, " Current implementation does not yet support strides in the batch and depth dimensions." if strides[0] != 1 || strides[3] != 1
|
371
371
|
|
372
372
|
padding_option = tensor.options[:padding]
|
373
|
+
|
373
374
|
padding = conv2d_padding_options(padding_option, filter_shape, height, width, height_stride, width_stride)
|
374
375
|
event_wait_list = build_event_wait_list(inputs)
|
375
376
|
|
@@ -33,13 +33,18 @@ module TensorStream
|
|
33
33
|
end
|
34
34
|
|
35
35
|
if shape.empty?
|
36
|
-
return
|
37
|
-
|
38
|
-
|
36
|
+
return case data_type
|
37
|
+
when :string
|
38
|
+
buffer.to_s
|
39
|
+
when :boolean
|
40
|
+
buffer[0] != 0
|
41
|
+
else
|
42
|
+
buffer[0]
|
43
|
+
end
|
39
44
|
end
|
40
|
-
|
41
|
-
result = buffer.reshape(*shape.map(&:to_i).reverse).to_a
|
42
|
-
data_type == :boolean ? process_function_op(result
|
45
|
+
|
46
|
+
result = buffer.reshape(*shape.map(&:to_i).reverse).to_a
|
47
|
+
data_type == :boolean ? process_function_op(result) { |a, _b| a != 0 } : result
|
43
48
|
end
|
44
49
|
|
45
50
|
def self.nil_buffer(owner, name, data_type)
|
@@ -225,7 +225,6 @@ module TensorStream
|
|
225
225
|
def prepare_input(tensor, context, options = {})
|
226
226
|
return nil unless tensor
|
227
227
|
|
228
|
-
tensor = resolve_placeholder(tensor)
|
229
228
|
if options[:noop]
|
230
229
|
tensor
|
231
230
|
elsif options[:buffer]
|
@@ -329,30 +328,18 @@ module TensorStream
|
|
329
328
|
tensor = tensor.call if tensor.is_a?(Proc)
|
330
329
|
|
331
330
|
child_context = execution_context.dup
|
332
|
-
res = if
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
eval_operation(tensor, child_context)
|
337
|
-
end
|
338
|
-
elsif tensor.is_a?(Variable)
|
339
|
-
eval_variable(tensor, child_context)
|
340
|
-
elsif tensor.is_a?(Placeholder)
|
341
|
-
resolve_placeholder(tensor, child_context)
|
331
|
+
res = if !on_same_device?(tensor) # tensor is on another device or evaluator
|
332
|
+
perform_transition(tensor, tensor, @context[:_cache][:placement][tensor.name][1], execution_context)
|
333
|
+
elsif tensor.is_a?(Operation)
|
334
|
+
eval_operation(tensor, child_context)
|
342
335
|
else
|
343
|
-
|
336
|
+
raise "invalid tensor type!"
|
344
337
|
end
|
338
|
+
|
345
339
|
execution_context.deep_merge!(returns: child_context[:returns])
|
346
340
|
res
|
347
341
|
end
|
348
342
|
|
349
|
-
def eval_variable(tensor, _child_context)
|
350
|
-
raise "variable #{tensor.name} not initalized" if tensor.value.nil? && (tensor.buffer.nil? || !tensor.buffer.dirty)
|
351
|
-
|
352
|
-
tensor.buffer = wrap_opencl(tensor, name: tensor.name) if tensor.buffer.nil?
|
353
|
-
tensor.buffer
|
354
|
-
end
|
355
|
-
|
356
343
|
register_op :no_op do |_context, _tensor, _inputs|
|
357
344
|
end
|
358
345
|
|
@@ -396,14 +383,14 @@ module TensorStream
|
|
396
383
|
end
|
397
384
|
|
398
385
|
%i[less less_equal greater greater_equal equal not_equal logical_and].each do |op|
|
399
|
-
register_op op do |
|
386
|
+
register_op op do |_context, tensor, inputs|
|
400
387
|
execute_2_operand_func(op.to_s, tensor, inputs[0], inputs[1], 'cond')
|
401
388
|
end
|
402
389
|
end
|
403
390
|
|
404
391
|
register_op :where, noop: true do |context, tensor, inputs|
|
405
|
-
pred =
|
406
|
-
execute_cond_func('where', tensor, pred, inputs[
|
392
|
+
pred = inputs[0]
|
393
|
+
execute_cond_func('where', tensor, pred, inputs[1], inputs[2], context)
|
407
394
|
end
|
408
395
|
|
409
396
|
register_op :check_numerics, noop: true do |context, tensor, inputs|
|
@@ -455,10 +442,36 @@ module TensorStream
|
|
455
442
|
nil
|
456
443
|
end
|
457
444
|
|
445
|
+
register_op :const do |_context, tensor, inputs|
|
446
|
+
wrap_opencl(tensor.const_value, name: tensor.name, data_type: tensor.data_type)
|
447
|
+
end
|
448
|
+
|
458
449
|
register_op :size do |_context, tensor, inputs|
|
459
450
|
wrap_opencl(inputs[0].buffer.size, name: tensor.name, data_type: tensor.options[:out_type] || :int32)
|
460
451
|
end
|
461
452
|
|
453
|
+
register_op :restore_ts do |context, tensor, inputs|
|
454
|
+
inputs = inputs.dup
|
455
|
+
filename = inputs.shift
|
456
|
+
tensor_names = inputs
|
457
|
+
|
458
|
+
filename = read_final_result(complete_eval(filename, context))
|
459
|
+
tensor_names.map! { |n| read_final_result(complete_eval(n, context)) }
|
460
|
+
|
461
|
+
input_dump = YAML.safe_load(File.read(filename), [Symbol])
|
462
|
+
vars = tensor.graph.get_collection(GraphKeys::GLOBAL_VARIABLES)
|
463
|
+
|
464
|
+
vars.select! { |v| input_dump['variables'].key?(v.name) && tensor_names.include?(v.name) }
|
465
|
+
vars.each do |variable|
|
466
|
+
data = TensorStream::Packer.unpack(Zlib::Inflate.inflate(Base64.decode64(input_dump['variables'][variable.name]['data'])), variable.data_type)
|
467
|
+
shape = input_dump['variables'][variable.name]['shape']
|
468
|
+
variable.buffer = convert_to_opencl(data, shape, data_type: variable.data_type, name: variable.name)
|
469
|
+
variable.value = TensorShape.reshape(data, shape)
|
470
|
+
end
|
471
|
+
|
472
|
+
nil
|
473
|
+
end
|
474
|
+
|
462
475
|
def eval_operation(tensor, child_context)
|
463
476
|
cache_key = "#{tensor.graph.object_id}_opencl_#{tensor.name}:#{object_id}"
|
464
477
|
return @context[:_cache][cache_key] if @context[:_cache].key?(cache_key)
|
@@ -514,7 +527,7 @@ module TensorStream
|
|
514
527
|
# File.write('/home/jedld/workspace/tensor_stream/samples/error.graphml', TensorStream::Graphml.new.get_string(tensor, @session))
|
515
528
|
|
516
529
|
# File.write('/Users/josephemmanueldayo/workspace/gradients.graphml', TensorStream::Graphml.new.get_string(tensor, @session))
|
517
|
-
raise EvaluatorExcecutionException.new(e, tensor), "error #{e.message} while evaluating #{tensor.name} :
|
530
|
+
raise EvaluatorExcecutionException.new(e, tensor), "error #{e.message} while evaluating #{tensor.name} : defined at #{tensor.source}"
|
518
531
|
end
|
519
532
|
|
520
533
|
def eval_tensor(tensor, child_context)
|
@@ -539,21 +552,21 @@ module TensorStream
|
|
539
552
|
assign = tensor.inputs[0] || tensor
|
540
553
|
buffer = complete_eval(b, child_context)
|
541
554
|
|
542
|
-
if assign.
|
543
|
-
event_wait_list = build_event_wait_list([buffer, assign.
|
544
|
-
assign.
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
555
|
+
if assign.container_buffer
|
556
|
+
event_wait_list = build_event_wait_list([buffer, assign.container_buffer])
|
557
|
+
assign.container_buffer.op = if assign.container_buffer.cl_buffer != buffer.cl_buffer
|
558
|
+
_opencl_queue.enqueue_copy_buffer(buffer.cl_buffer, assign.container_buffer.cl_buffer, event_wait_list: event_wait_list)
|
559
|
+
else
|
560
|
+
buffer.op
|
561
|
+
end
|
549
562
|
else
|
550
563
|
value = read_final_result(buffer)
|
551
|
-
assign.buffer = convert_to_opencl(value, buffer.shape, data_type: tensor.data_type, name: assign.name)
|
552
|
-
assign.value = value
|
564
|
+
assign.options[:container].buffer = convert_to_opencl(value, buffer.shape, data_type: tensor.data_type, name: assign.name)
|
565
|
+
assign.options[:container].value = value
|
553
566
|
end
|
554
567
|
|
555
|
-
assign.
|
556
|
-
assign.
|
568
|
+
assign.container_buffer.dirty = true
|
569
|
+
assign.container_buffer
|
557
570
|
end
|
558
571
|
|
559
572
|
def execute_2_operand_func(op_name, tensor, a, b, prog_name = nil)
|
@@ -572,7 +585,7 @@ module TensorStream
|
|
572
585
|
[m || 1, n || 1]
|
573
586
|
elsif (b.shape.size == 1) && (result_shape.last == b.shape.last)
|
574
587
|
last_dim = b.shape.last
|
575
|
-
[result_shape.reduce(:*) / last_dim, last_dim]
|
588
|
+
[result_shape.reduce(:*) / last_dim, last_dim]
|
576
589
|
else
|
577
590
|
raise "rank > 2 not supported for now"
|
578
591
|
end
|
@@ -622,7 +635,7 @@ module TensorStream
|
|
622
635
|
work_group = if p.shape.size > 2
|
623
636
|
[m, p.shape.reduce(:*) / m]
|
624
637
|
else
|
625
|
-
[
|
638
|
+
[m || 1, n || 1]
|
626
639
|
end
|
627
640
|
|
628
641
|
cl_m = OpenCL::Int1.new(work_group[0])
|
data/lib/tensor_stream/opencl.rb
CHANGED
data/samples/classify.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "tensor_stream"
|
5
|
+
require 'mnist-learn'
|
6
|
+
require 'fileutils'
|
7
|
+
|
8
|
+
file_path = ARGV[0]
|
9
|
+
model_path = ARGV[1]
|
10
|
+
|
11
|
+
decoded_image = TensorStream.image.decode_png(File.read(file_path), channels: 1)
|
12
|
+
target_graph = TensorStream::YamlLoader.new.load_from_file(model_path)
|
13
|
+
input = target_graph['Placeholder']
|
14
|
+
output = TensorStream.argmax(target_graph['out'], 1)
|
15
|
+
sess = TensorStream.session
|
16
|
+
|
17
|
+
reshaped_image = 255.0.t - decoded_image.reshape([1, 28, 28, 1]).cast(:float32)
|
18
|
+
result = sess.run(output, feed_dict: { input => reshaped_image})
|
19
|
+
|
20
|
+
puts "image is a #{result.first}"
|
21
|
+
|
@@ -0,0 +1,21 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "tensor_stream"
|
5
|
+
require 'mnist-learn'
|
6
|
+
require 'fileutils'
|
7
|
+
|
8
|
+
mnist = Mnist.read_data_sets('/tmp/data', one_hot: true)
|
9
|
+
|
10
|
+
ts = TensorStream
|
11
|
+
test_data = mnist.test.images
|
12
|
+
FileUtils.mkdir_p 'test_images'
|
13
|
+
|
14
|
+
sess = ts.session
|
15
|
+
|
16
|
+
test_data.each_with_index do |image , index|
|
17
|
+
image = 255.t - ts.cast(ts.reshape(image, [28, 28, 1]), :uint8) # reshape image
|
18
|
+
encoder = ts.image.encode_png(image)
|
19
|
+
blob = sess.run(encoder)
|
20
|
+
File.write(File.join('test_images', "#{index}_image.png"), blob)
|
21
|
+
end
|
data/samples/mnist_data_2.3.rb
CHANGED
@@ -54,18 +54,10 @@ b5 = tf.variable(tf.zeros([10]))
|
|
54
54
|
x_ = tf.reshape(x, [-1, 784])
|
55
55
|
|
56
56
|
y1 = tf.nn.relu(tf.matmul(x_, w1) + b1)
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
y3 = tf.nn.relu(tf.matmul(y2d, w3) + b3)
|
63
|
-
y3d = tf.nn.dropout(y3, pkeep)
|
64
|
-
|
65
|
-
y4 = tf.nn.relu(tf.matmul(y3d, w4) + b4)
|
66
|
-
y4d = tf.nn.dropout(y4, pkeep)
|
67
|
-
|
68
|
-
ylogits = tf.matmul(y4d, w5) + b5
|
57
|
+
y2 = tf.nn.relu(tf.matmul(y1, w2) + b2)
|
58
|
+
y3 = tf.nn.relu(tf.matmul(y2, w3) + b3)
|
59
|
+
y4 = tf.nn.relu(tf.matmul(y3, w4) + b4)
|
60
|
+
ylogits = tf.matmul(y4, w5) + b5
|
69
61
|
|
70
62
|
# model
|
71
63
|
y = tf.nn.softmax(ylogits)
|
data/samples/mnist_data_3.0.rb
CHANGED
@@ -10,6 +10,7 @@ require "bundler/setup"
|
|
10
10
|
require 'tensor_stream'
|
11
11
|
require 'mnist-learn'
|
12
12
|
require 'pry-byebug'
|
13
|
+
require 'csv'
|
13
14
|
|
14
15
|
# Enable OpenCL hardware accelerated computation, not using OpenCL can be very slow
|
15
16
|
require 'tensor_stream/opencl'
|
@@ -21,6 +22,7 @@ puts "Tensorstream version #{tf.__version__} with OpenCL lib #{TensorStream::Ope
|
|
21
22
|
puts "downloading minst data"
|
22
23
|
# Download images and labels into mnist.test (10K images+labels) and mnist.train (60K images+labels)
|
23
24
|
mnist = Mnist.read_data_sets('/tmp/data', one_hot: true)
|
25
|
+
|
24
26
|
puts "downloading finished"
|
25
27
|
|
26
28
|
# neural network structure for this sample:
|
@@ -85,13 +87,10 @@ y3 = tf.nn.relu(tf.nn.conv2d(y2, w3, [1, stride, stride, 1], 'SAME') + b3)
|
|
85
87
|
yy = tf.reshape(y3, [-1, 7 * 7 * M])
|
86
88
|
y4 = tf.nn.relu(tf.matmul(yy, w4) + b4)
|
87
89
|
|
88
|
-
|
89
|
-
yy4 = tf.nn.dropout(y4, pkeep)
|
90
|
-
|
91
|
-
ylogits = tf.matmul(yy4, w5) + b5
|
90
|
+
ylogits = tf.matmul(y4, w5) + b5
|
92
91
|
|
93
92
|
# model
|
94
|
-
y = tf.nn.softmax(ylogits)
|
93
|
+
y = tf.nn.softmax(ylogits, name: 'out')
|
95
94
|
|
96
95
|
|
97
96
|
|
@@ -111,16 +110,21 @@ accuracy = tf.reduce_mean(tf.cast(is_correct, :float32))
|
|
111
110
|
lr = 0.0001.t + tf.train.exponential_decay(0.003, step, 2000, 1/Math::E)
|
112
111
|
train_step = TensorStream::Train::AdamOptimizer.new(lr).minimize(cross_entropy)
|
113
112
|
|
114
|
-
sess = tf.session
|
113
|
+
sess = tf.session(profile_enabled: true)
|
115
114
|
# Add ops to save and restore all the variables.
|
116
115
|
|
117
116
|
init = tf.global_variables_initializer
|
118
117
|
|
119
118
|
sess.run(init)
|
119
|
+
|
120
|
+
#Setup save and restore
|
121
|
+
model_save_path = "test_models/mnist_data_3.0"
|
122
|
+
saver = tf::Train::Saver.new
|
123
|
+
saver.restore(sess, model_save_path)
|
124
|
+
|
120
125
|
mnist_train = mnist.train
|
121
126
|
test_data = { x => mnist.test.images, y_ => mnist.test.labels, pkeep => 1.0 }
|
122
127
|
|
123
|
-
|
124
128
|
(0..10001).each do |i|
|
125
129
|
# load batch of images and correct answers
|
126
130
|
batch_x, batch_y = mnist_train.next_batch(100)
|
@@ -130,7 +134,8 @@ test_data = { x => mnist.test.images, y_ => mnist.test.labels, pkeep => 1.0 }
|
|
130
134
|
sess.run(train_step, feed_dict: train_data)
|
131
135
|
|
132
136
|
if (i % 10 == 0)
|
133
|
-
#
|
137
|
+
# result = TensorStream::ReportTool.profile_for(sess)
|
138
|
+
# File.write("profile.csv", result.map(&:to_csv).join("\n"))
|
134
139
|
# success? add code to print it
|
135
140
|
a_train, c_train, l = sess.run([accuracy, cross_entropy, lr], feed_dict: { x => batch_x, y_ => batch_y, step => i, pkeep => 1.0})
|
136
141
|
puts "#{i}: accuracy:#{a_train} loss:#{c_train} (lr:#{l})"
|
@@ -140,6 +145,9 @@ test_data = { x => mnist.test.images, y_ => mnist.test.labels, pkeep => 1.0 }
|
|
140
145
|
# success on test data?
|
141
146
|
a_test, c_test = sess.run([accuracy, cross_entropy], feed_dict: test_data, pkeep => 1.0)
|
142
147
|
puts("#{i}: ******** test accuracy: #{a_test} test loss: #{c_test}")
|
148
|
+
|
149
|
+
# save current state of the model
|
150
|
+
save_path = saver.save(sess, model_save_path)
|
143
151
|
end
|
144
152
|
end
|
145
153
|
|
@@ -39,7 +39,7 @@ Gem::Specification.new do |spec|
|
|
39
39
|
spec.add_development_dependency "awesome_print"
|
40
40
|
spec.add_development_dependency "mnist-learn"
|
41
41
|
spec.add_development_dependency "simplecov"
|
42
|
-
spec.add_dependency "tensor_stream", "
|
42
|
+
spec.add_dependency "tensor_stream", "1.0.0-rc1"
|
43
43
|
spec.add_dependency "opencl_ruby_ffi"
|
44
44
|
spec.add_dependency "oily_png"
|
45
45
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tensor_stream-opencl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joseph Dayo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-01-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -112,16 +112,16 @@ dependencies:
|
|
112
112
|
name: tensor_stream
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
|
-
- -
|
115
|
+
- - '='
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version: 0.
|
117
|
+
version: 1.0.0.pre.rc1
|
118
118
|
type: :runtime
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
|
-
- -
|
122
|
+
- - '='
|
123
123
|
- !ruby/object:Gem::Version
|
124
|
-
version: 0.
|
124
|
+
version: 1.0.0.pre.rc1
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
126
|
name: opencl_ruby_ffi
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -246,6 +246,9 @@ files:
|
|
246
246
|
- lib/tensor_stream/opencl/opencl_evaluator.rb
|
247
247
|
- lib/tensor_stream/opencl/opencl_template_helper.rb
|
248
248
|
- lib/tensor_stream/opencl/version.rb
|
249
|
+
- samples/classify.rb
|
250
|
+
- samples/dump_mnist.rb
|
251
|
+
- samples/image_sort.rb
|
249
252
|
- samples/iris.data
|
250
253
|
- samples/iris.rb
|
251
254
|
- samples/logistic_regression.rb
|