tensor_stream-opencl 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/.rubocop.yml +6 -1
- data/lib/tensor_stream/opencl/array_ops.rb +58 -55
- data/lib/tensor_stream/opencl/nn_ops.rb +57 -56
- data/lib/tensor_stream/opencl/opencl_buffer.rb +11 -6
- data/lib/tensor_stream/opencl/opencl_evaluator.rb +49 -36
- data/lib/tensor_stream/opencl/version.rb +1 -1
- data/lib/tensor_stream/opencl.rb +1 -0
- data/samples/classify.rb +21 -0
- data/samples/dump_mnist.rb +21 -0
- data/samples/image_sort.rb +9 -0
- data/samples/mnist_data_2.3.rb +4 -12
- data/samples/mnist_data_3.0.rb +16 -8
- data/tensor_stream-opencl.gemspec +1 -1
- metadata +9 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c0e8de1676b30c21f9529cdce9d3fee406cdf2945d54f890ae49c14c1329860e
|
4
|
+
data.tar.gz: 66932db63589eedcd6247083a27344bed8c80860b6d096d01fb5b46db1b53521
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5b905243d98976c94cb58dd443fc25f0b1e78bba689f20677ee3457dc44641d228642eb65adc4e58227d1feb0686fe29a3cabd8b25910f8e9c1aa0ef575ae8ff
|
7
|
+
data.tar.gz: 03ad5c5cd27ff058df206de8e109699fe0ef492e8ced1b97217cc0fe1ab7a4e43da5db45fed85b2e96b7bb35dc14465e39695b7995b3a1ccfcece7c050e0cae3
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
@@ -34,6 +34,7 @@ Metrics/CyclomaticComplexity:
|
|
34
34
|
Metrics/BlockLength:
|
35
35
|
Exclude:
|
36
36
|
- lib/tensor_stream/math_gradients.rb
|
37
|
+
- benchmark/benchmark.rb
|
37
38
|
|
38
39
|
Naming/AccessorMethodName:
|
39
40
|
Exclude:
|
@@ -86,4 +87,8 @@ Style/TrailingCommaInHashLiteral:
|
|
86
87
|
Naming/UncommunicativeMethodParamName:
|
87
88
|
Exclude:
|
88
89
|
- lib/tensor_stream/evaluator/ruby_evaluator.rb
|
89
|
-
- lib/tensor_stream/ops.rb
|
90
|
+
- lib/tensor_stream/ops.rb
|
91
|
+
|
92
|
+
Style/BlockDelimiters:
|
93
|
+
Exclude:
|
94
|
+
- benchmark/benchmark.rb
|
@@ -103,26 +103,27 @@ module TensorStream
|
|
103
103
|
end
|
104
104
|
else
|
105
105
|
raise TensorStream::ValueError, "#{num_split} does not divide #{value_shape[axis]} evenly" if num_split.reduce(:+) != value_shape[axis]
|
106
|
+
|
106
107
|
# compute shapes of individual output buffers
|
107
108
|
new_shapes = num_split.each_with_index.collect do |num, index|
|
108
109
|
new_shape = value_shape.dup
|
109
110
|
new_shape[axis] = num
|
110
111
|
new_shape
|
111
112
|
end
|
113
|
+
out = []
|
114
|
+
|
112
115
|
if axis.zero? # axis zero fast copy path
|
113
116
|
start = 0
|
114
|
-
|
115
|
-
new_shapes.each_with_index do |
|
116
|
-
element_count =
|
117
|
+
|
118
|
+
new_shapes.each_with_index do |ns, index|
|
119
|
+
element_count = ns.reduce(:*) || 1
|
117
120
|
region_size_in_bytes = element_count * value.buffer.element_size
|
118
|
-
out << _create_variable_result_sub_buffer(value, index, start, region_size_in_bytes, tensor.data_type,
|
121
|
+
out << _create_variable_result_sub_buffer(value, index, start, region_size_in_bytes, tensor.data_type, ns, "#{tensor.name}/out_#{index}_#{ns.join('.')}")
|
119
122
|
start += region_size_in_bytes
|
120
123
|
end
|
121
|
-
out
|
122
124
|
else
|
123
125
|
# create buffers for each piece
|
124
126
|
work_buffer = _create_result_buffer(tensor.data_type, value_shape, "#{tensor.name}/out")
|
125
|
-
out = []
|
126
127
|
start = 0
|
127
128
|
|
128
129
|
steps = num_split.dup.reverse.drop(1).inject([0]) do |a, s|
|
@@ -157,14 +158,15 @@ module TensorStream
|
|
157
158
|
event_wait_list: event_wait_list)
|
158
159
|
end
|
159
160
|
work_buffer.op = events
|
160
|
-
new_shapes.each_with_index do |
|
161
|
-
element_count =
|
161
|
+
new_shapes.each_with_index do |ns, index|
|
162
|
+
element_count = ns.reduce(:*) || 1
|
162
163
|
region_size_in_bytes = element_count * work_buffer.buffer.element_size
|
163
|
-
out << _create_variable_result_sub_buffer(work_buffer, index, start, region_size_in_bytes, tensor.data_type,
|
164
|
+
out << _create_variable_result_sub_buffer(work_buffer, index, start, region_size_in_bytes, tensor.data_type, ns, "#{tensor.name}/out_#{index}_#{new_shape.join('.')}")
|
164
165
|
start += region_size_in_bytes
|
165
166
|
end
|
166
|
-
out
|
167
167
|
end
|
168
|
+
|
169
|
+
out
|
168
170
|
end
|
169
171
|
|
170
172
|
TensorStream::Evaluator::OutputGroup.new(outputs, outputs.map(&:data_type))
|
@@ -195,58 +197,57 @@ module TensorStream
|
|
195
197
|
|
196
198
|
output_buffer = _create_result_buffer(tensor.data_type, new_shape, tensor.name)
|
197
199
|
ops = if axis.zero? # fast path
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
start = index * input.buffer.size * input.buffer.element_size
|
202
|
-
region = [input.buffer.size * input.buffer.element_size, 1, 1]
|
203
|
-
event_wait_list = build_event_wait_list(input)
|
204
|
-
_opencl_queue.enqueue_copy_buffer_rect(input.cl_buffer, output_buffer.cl_buffer,
|
205
|
-
region, dst_origin: [start, 0, 0], event_wait_list: event_wait_list)
|
206
|
-
end.compact
|
207
|
-
else
|
208
|
-
elem_size = shape.empty? ? 1 : shape.reduce(:*)
|
209
|
-
cl_n = OpenCL::Int1.new(elem_size)
|
200
|
+
inputs.each_with_index.map do |input, index|
|
201
|
+
next if input.empty_value?
|
210
202
|
|
211
|
-
|
212
|
-
|
213
|
-
|
203
|
+
start = index * input.buffer.size * input.buffer.element_size
|
204
|
+
region = [input.buffer.size * input.buffer.element_size, 1, 1]
|
205
|
+
event_wait_list = build_event_wait_list(input)
|
206
|
+
_opencl_queue.enqueue_copy_buffer_rect(input.cl_buffer, output_buffer.cl_buffer,
|
207
|
+
region, dst_origin: [start, 0, 0], event_wait_list: event_wait_list)
|
208
|
+
end.compact
|
209
|
+
else
|
210
|
+
elem_size = shape.empty? ? 1 : shape.reduce(:*)
|
211
|
+
cl_n = OpenCL::Int1.new(elem_size)
|
214
212
|
|
215
|
-
|
216
|
-
|
213
|
+
steps = inputs.map(&:shape).reverse.drop(1).inject([0]) do |a, shape|
|
214
|
+
a << shape[axis] + a.last
|
215
|
+
end
|
216
|
+
|
217
|
+
work_group = [elem_size]
|
218
|
+
event_wait_list = build_event_wait_list(inputs)
|
219
|
+
|
220
|
+
inputs.each_with_index.map do |input, index|
|
221
|
+
cl_index = OpenCL::Int1.new(index)
|
222
|
+
step = OpenCL::Int1.new(steps[index])
|
223
|
+
_cl_program('concat', data_type: tensor.data_type, divisors: divisors, multipliers: multipliers, axis: axis).
|
224
|
+
concat(_opencl_queue, work_group, cl_n, cl_index, step, input.cl_buffer,
|
225
|
+
output_buffer.cl_buffer, event_wait_list: event_wait_list)
|
226
|
+
end
|
227
|
+
end
|
217
228
|
|
218
|
-
inputs.each_with_index.map do |input, index|
|
219
|
-
cl_index = OpenCL::Int1.new(index)
|
220
|
-
step = OpenCL::Int1.new(steps[index])
|
221
|
-
_cl_program('concat', data_type: tensor.data_type, divisors: divisors, multipliers: multipliers, axis: axis).
|
222
|
-
concat(_opencl_queue, work_group, cl_n, cl_index, step, input.cl_buffer,
|
223
|
-
output_buffer.cl_buffer, event_wait_list: event_wait_list)
|
224
|
-
end
|
225
|
-
end
|
226
229
|
output_buffer.op = ops
|
227
230
|
output_buffer
|
228
231
|
end
|
229
232
|
|
230
|
-
register_op :squeeze do |
|
233
|
+
register_op :squeeze do |_context, tensor, inputs|
|
231
234
|
arr = inputs[0]
|
232
235
|
shape = inputs[0].shape.dup
|
233
236
|
axis = !tensor.options[:axis].is_a?(Array) ? [tensor.options[:axis]] : tensor.options[:axis]
|
234
237
|
if !axis.empty?
|
235
|
-
axis.each do |
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
raise TensorStream::ValueError, "unable to squeeze dimension that does not have a size of 1"
|
240
|
-
end
|
238
|
+
axis.each do |x|
|
239
|
+
raise TensorStream::ValueError, "unable to squeeze dimension that does not have a size of 1" unless shape[x] == 1
|
240
|
+
|
241
|
+
shape[x] = nil
|
241
242
|
end
|
242
243
|
else
|
243
244
|
shape = shape.map { |s| s == 1 ? nil : s }
|
244
245
|
end
|
245
246
|
|
246
247
|
OpenCLBuffer.new(self, name: tensor.name, data_type: tensor.data_type,
|
247
|
-
|
248
|
-
|
249
|
-
|
248
|
+
shape: shape.compact, buffer: arr.buffer,
|
249
|
+
cl_buffer: arr.cl_buffer,
|
250
|
+
op: arr.op)
|
250
251
|
end
|
251
252
|
|
252
253
|
register_op :stack do |_context, tensor, inputs|
|
@@ -312,7 +313,6 @@ module TensorStream
|
|
312
313
|
a << s * a.last
|
313
314
|
end.reverse
|
314
315
|
|
315
|
-
step = multipliers[0]
|
316
316
|
sub_shape = new_shape.dup
|
317
317
|
sub_shape.shift
|
318
318
|
|
@@ -375,9 +375,9 @@ module TensorStream
|
|
375
375
|
end
|
376
376
|
|
377
377
|
OpenCLBuffer.new(self, name: tensor.name, data_type: tensor.data_type,
|
378
|
-
|
379
|
-
|
380
|
-
|
378
|
+
shape: shape, buffer: arr.buffer,
|
379
|
+
cl_buffer: arr.cl_buffer,
|
380
|
+
op: arr.op)
|
381
381
|
end
|
382
382
|
|
383
383
|
register_op :transpose, buffer: true do |_context, tensor, inputs|
|
@@ -407,7 +407,10 @@ module TensorStream
|
|
407
407
|
|
408
408
|
shape = input_a.shape
|
409
409
|
|
410
|
-
slice_param = input_b.zip(size).collect.with_index
|
410
|
+
slice_param = input_b.zip(size).collect.with_index do |p, index|
|
411
|
+
p[1] = p[1] == -1 ? shape[index] : p[1]
|
412
|
+
p[0]..p[0] + p[1] - 1
|
413
|
+
end.reverse
|
411
414
|
|
412
415
|
new_buf = input_a.buffer.reshape(*input_a.shape.reverse)
|
413
416
|
sliced = new_buf.slice[*slice_param]
|
@@ -423,11 +426,11 @@ module TensorStream
|
|
423
426
|
if a.data_type != tensor.data_type
|
424
427
|
buffer = _create_result_buffer(tensor.data_type, a.shape, tensor.name)
|
425
428
|
work_group = if inputs[0].shape.size > 2
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
429
|
+
[inputs[0].shape.reduce(:*) / inputs[0].shape.last, inputs[0].shape.last]
|
430
|
+
else
|
431
|
+
m, n = inputs[0].shape
|
432
|
+
[m || 1, n || 1]
|
433
|
+
end
|
431
434
|
|
432
435
|
cl_m = OpenCL::Int1.new(work_group[0])
|
433
436
|
cl_n = OpenCL::Int1.new(work_group[1])
|
@@ -11,12 +11,12 @@ module TensorStream
|
|
11
11
|
|
12
12
|
assign = tensor.inputs[0] || tensor
|
13
13
|
|
14
|
-
assign.
|
15
|
-
output_buffer = assign.
|
14
|
+
assign.container_buffer.dirty = true # force buffer copy when variable is read externally
|
15
|
+
output_buffer = assign.container_buffer
|
16
16
|
|
17
17
|
work_group = [output_buffer.total_elements]
|
18
18
|
|
19
|
-
event_wait_list = build_event_wait_list([assign.
|
19
|
+
event_wait_list = build_event_wait_list([assign.container_buffer, learning_rate, delta])
|
20
20
|
|
21
21
|
event = call_program("apply_gradient", output_buffer.data_type,
|
22
22
|
work_group,
|
@@ -33,21 +33,21 @@ module TensorStream
|
|
33
33
|
|
34
34
|
assign = tensor.inputs[0] || tensor
|
35
35
|
assign_acc = tensor.inputs[1]
|
36
|
-
assign.
|
37
|
-
assign_acc.
|
36
|
+
assign.container_buffer.dirty = true # force buffer copy when variable is read externally
|
37
|
+
assign_acc.container_buffer.dirty = true # force buffer copy when variable is read externally
|
38
38
|
|
39
|
-
output_buffer = assign.
|
39
|
+
output_buffer = assign.container_buffer
|
40
40
|
|
41
41
|
work_group = [output_buffer.total_elements]
|
42
42
|
|
43
|
-
event_wait_list = build_event_wait_list([assign.
|
43
|
+
event_wait_list = build_event_wait_list([assign.container_buffer, assign_acc.container_buffer, learning_rate, grad, momentum])
|
44
44
|
method_call = :"apply_momentum_#{output_buffer.data_type}"
|
45
45
|
event = _cl_program("apply_momentum", nesterov: tensor.options[:use_nesterov], dtype: output_buffer.data_type).
|
46
46
|
send(method_call, _opencl_queue, work_group, grad.cl_buffer,
|
47
47
|
learning_rate.cl_buffer, momentum.cl_buffer, output_buffer.cl_buffer,
|
48
|
-
assign_acc.
|
48
|
+
assign_acc.container_buffer.cl_buffer, event_wait_list: event_wait_list)
|
49
49
|
output_buffer.op = event
|
50
|
-
assign_acc.
|
50
|
+
assign_acc.container_buffer.op = event
|
51
51
|
output_buffer
|
52
52
|
end
|
53
53
|
|
@@ -58,11 +58,11 @@ module TensorStream
|
|
58
58
|
assign_acc_update = tensor.inputs[2]
|
59
59
|
|
60
60
|
# mark variable buffers as dirty
|
61
|
-
assign.
|
62
|
-
assign_acc.
|
63
|
-
assign_acc_update.
|
61
|
+
assign.container_buffer.dirty = true # force buffer copy when variable is read externally
|
62
|
+
assign_acc.container_buffer.dirty = true # force buffer copy when variable is read externally
|
63
|
+
assign_acc_update.container_buffer.dirty = true # force buffer copy when variable is read externally
|
64
64
|
|
65
|
-
output_buffer = assign.
|
65
|
+
output_buffer = assign.container_buffer
|
66
66
|
|
67
67
|
work_group = [output_buffer.total_elements]
|
68
68
|
|
@@ -73,13 +73,13 @@ module TensorStream
|
|
73
73
|
rho.cl_buffer,
|
74
74
|
epsilon.cl_buffer,
|
75
75
|
grad.cl_buffer,
|
76
|
-
assign.
|
77
|
-
assign_acc.
|
78
|
-
assign_acc_update.
|
76
|
+
assign.container_buffer.cl_buffer,
|
77
|
+
assign_acc.container_buffer.cl_buffer,
|
78
|
+
assign_acc_update.container_buffer.cl_buffer,
|
79
79
|
event_wait_list: event_wait_list)
|
80
80
|
output_buffer.op = event
|
81
|
-
assign_acc.
|
82
|
-
assign_acc_update.
|
81
|
+
assign_acc.container_buffer.op = event
|
82
|
+
assign_acc_update.container_buffer.op = event
|
83
83
|
output_buffer
|
84
84
|
end
|
85
85
|
|
@@ -92,11 +92,11 @@ module TensorStream
|
|
92
92
|
assign_v = tensor.inputs[2]
|
93
93
|
|
94
94
|
# mark variable buffers as dirty
|
95
|
-
assign.
|
96
|
-
assign_m.
|
97
|
-
assign_v.
|
95
|
+
assign.container_buffer.dirty = true # force buffer copy when variable is read externally
|
96
|
+
assign_m.container_buffer.dirty = true # force buffer copy when variable is read externally
|
97
|
+
assign_v.container_buffer.dirty = true # force buffer copy when variable is read externally
|
98
98
|
|
99
|
-
output_buffer = assign.
|
99
|
+
output_buffer = assign.container_buffer
|
100
100
|
|
101
101
|
work_group = [output_buffer.total_elements]
|
102
102
|
|
@@ -110,13 +110,13 @@ module TensorStream
|
|
110
110
|
beta1_t.cl_buffer,
|
111
111
|
beta2_t.cl_buffer,
|
112
112
|
epsilon_t.cl_buffer,
|
113
|
-
assign_m.
|
114
|
-
assign.
|
115
|
-
assign_v.
|
113
|
+
assign_m.container_buffer.cl_buffer,
|
114
|
+
assign.container_buffer.cl_buffer,
|
115
|
+
assign_v.container_buffer.cl_buffer,
|
116
116
|
event_wait_list: event_wait_list)
|
117
117
|
output_buffer.op = event
|
118
|
-
assign_m.
|
119
|
-
assign_v.
|
118
|
+
assign_m.container_buffer.op = event
|
119
|
+
assign_v.container_buffer.op = event
|
120
120
|
output_buffer
|
121
121
|
end
|
122
122
|
|
@@ -126,9 +126,9 @@ module TensorStream
|
|
126
126
|
assign = tensor.inputs[0] || tensor
|
127
127
|
assign_acc = tensor.inputs[1]
|
128
128
|
|
129
|
-
assign.
|
130
|
-
assign_acc.
|
131
|
-
output_buffer = assign.
|
129
|
+
assign.container_buffer.dirty = true
|
130
|
+
assign_acc.container_buffer.dirty = true
|
131
|
+
output_buffer = assign.container_buffer
|
132
132
|
|
133
133
|
work_group = [output_buffer.total_elements]
|
134
134
|
|
@@ -138,11 +138,11 @@ module TensorStream
|
|
138
138
|
work_group,
|
139
139
|
lr.cl_buffer,
|
140
140
|
grad.cl_buffer,
|
141
|
-
assign.
|
142
|
-
assign_acc.
|
141
|
+
assign.container_buffer.cl_buffer,
|
142
|
+
assign_acc.container_buffer.cl_buffer,
|
143
143
|
event_wait_list: event_wait_list)
|
144
144
|
output_buffer.op = event
|
145
|
-
assign_acc.
|
145
|
+
assign_acc.container_buffer.op = event
|
146
146
|
output_buffer
|
147
147
|
end
|
148
148
|
|
@@ -154,11 +154,11 @@ module TensorStream
|
|
154
154
|
assign_ms = tensor.inputs[2]
|
155
155
|
assign_mom = tensor.inputs[3]
|
156
156
|
|
157
|
-
assign.
|
158
|
-
assign_mg.
|
159
|
-
assign_ms.
|
160
|
-
assign_mom.
|
161
|
-
output_buffer = assign.
|
157
|
+
assign.container_buffer.dirty = true
|
158
|
+
assign_mg.container_buffer.dirty = true
|
159
|
+
assign_ms.container_buffer.dirty = true
|
160
|
+
assign_mom.container_buffer.dirty = true
|
161
|
+
output_buffer = assign.container_buffer
|
162
162
|
event_wait_list = build_event_wait_list(inputs)
|
163
163
|
work_group = [output_buffer.total_elements]
|
164
164
|
|
@@ -168,30 +168,30 @@ module TensorStream
|
|
168
168
|
momentum.cl_buffer,
|
169
169
|
epsilon.cl_buffer,
|
170
170
|
grad.cl_buffer,
|
171
|
-
assign.
|
172
|
-
assign_ms.
|
173
|
-
assign_mg.
|
174
|
-
assign_mom.
|
171
|
+
assign.container_buffer.cl_buffer,
|
172
|
+
assign_ms.container_buffer.cl_buffer,
|
173
|
+
assign_mg.container_buffer.cl_buffer,
|
174
|
+
assign_mom.container_buffer.cl_buffer,
|
175
175
|
event_wait_list: event_wait_list)
|
176
176
|
|
177
177
|
output_buffer.op = event
|
178
|
-
assign_mg.
|
179
|
-
assign_ms.
|
180
|
-
assign_mom.
|
178
|
+
assign_mg.container_buffer.op = event
|
179
|
+
assign_ms.container_buffer.op = event
|
180
|
+
assign_mom.container_buffer.op = event
|
181
181
|
output_buffer
|
182
182
|
end
|
183
183
|
|
184
|
-
register_op :apply_rms_prop do |
|
184
|
+
register_op :apply_rms_prop do |_context, tensor, inputs|
|
185
185
|
var, ms, mom, lr, rho, momentum, epsilon, grad = inputs
|
186
186
|
|
187
187
|
assign = tensor.inputs[0]
|
188
188
|
assign_ms = tensor.inputs[1]
|
189
189
|
assign_mom = tensor.inputs[2]
|
190
190
|
|
191
|
-
assign.
|
192
|
-
assign_ms.
|
193
|
-
assign_mom.
|
194
|
-
output_buffer = assign.
|
191
|
+
assign.container_buffer.dirty = true
|
192
|
+
assign_ms.container_buffer.dirty = true
|
193
|
+
assign_mom.container_buffer.dirty = true
|
194
|
+
output_buffer = assign.container_buffer
|
195
195
|
event_wait_list = build_event_wait_list(inputs)
|
196
196
|
work_group = [output_buffer.total_elements]
|
197
197
|
|
@@ -202,14 +202,14 @@ module TensorStream
|
|
202
202
|
momentum.cl_buffer,
|
203
203
|
epsilon.cl_buffer,
|
204
204
|
grad.cl_buffer,
|
205
|
-
assign.
|
206
|
-
assign_ms.
|
207
|
-
assign_mom.
|
205
|
+
assign.container_buffer.cl_buffer,
|
206
|
+
assign_ms.container_buffer.cl_buffer,
|
207
|
+
assign_mom.container_buffer.cl_buffer,
|
208
208
|
event_wait_list: event_wait_list)
|
209
209
|
|
210
210
|
output_buffer.op = event
|
211
|
-
assign_ms.
|
212
|
-
assign_mom.
|
211
|
+
assign_ms.container_buffer.op = event
|
212
|
+
assign_mom.container_buffer.op = event
|
213
213
|
output_buffer
|
214
214
|
end
|
215
215
|
|
@@ -273,7 +273,7 @@ module TensorStream
|
|
273
273
|
output_buffer_backprop.op = event
|
274
274
|
|
275
275
|
loss = reduction(context, tensor, output_buffer, rank, :sum)
|
276
|
-
TensorStream::Evaluator::OutputGroup.new([loss, output_buffer_backprop],
|
276
|
+
TensorStream::Evaluator::OutputGroup.new([loss, output_buffer_backprop], [tensor.inputs[0].data_type, tensor.inputs[0].data_type])
|
277
277
|
end
|
278
278
|
|
279
279
|
register_op :softmax_cross_entropy_with_logits_v2_grad do |_context, tensor, inputs|
|
@@ -370,6 +370,7 @@ module TensorStream
|
|
370
370
|
raise TensorStream::ValueError, " Current implementation does not yet support strides in the batch and depth dimensions." if strides[0] != 1 || strides[3] != 1
|
371
371
|
|
372
372
|
padding_option = tensor.options[:padding]
|
373
|
+
|
373
374
|
padding = conv2d_padding_options(padding_option, filter_shape, height, width, height_stride, width_stride)
|
374
375
|
event_wait_list = build_event_wait_list(inputs)
|
375
376
|
|
@@ -33,13 +33,18 @@ module TensorStream
|
|
33
33
|
end
|
34
34
|
|
35
35
|
if shape.empty?
|
36
|
-
return
|
37
|
-
|
38
|
-
|
36
|
+
return case data_type
|
37
|
+
when :string
|
38
|
+
buffer.to_s
|
39
|
+
when :boolean
|
40
|
+
buffer[0] != 0
|
41
|
+
else
|
42
|
+
buffer[0]
|
43
|
+
end
|
39
44
|
end
|
40
|
-
|
41
|
-
result = buffer.reshape(*shape.map(&:to_i).reverse).to_a
|
42
|
-
data_type == :boolean ? process_function_op(result
|
45
|
+
|
46
|
+
result = buffer.reshape(*shape.map(&:to_i).reverse).to_a
|
47
|
+
data_type == :boolean ? process_function_op(result) { |a, _b| a != 0 } : result
|
43
48
|
end
|
44
49
|
|
45
50
|
def self.nil_buffer(owner, name, data_type)
|
@@ -225,7 +225,6 @@ module TensorStream
|
|
225
225
|
def prepare_input(tensor, context, options = {})
|
226
226
|
return nil unless tensor
|
227
227
|
|
228
|
-
tensor = resolve_placeholder(tensor)
|
229
228
|
if options[:noop]
|
230
229
|
tensor
|
231
230
|
elsif options[:buffer]
|
@@ -329,30 +328,18 @@ module TensorStream
|
|
329
328
|
tensor = tensor.call if tensor.is_a?(Proc)
|
330
329
|
|
331
330
|
child_context = execution_context.dup
|
332
|
-
res = if
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
eval_operation(tensor, child_context)
|
337
|
-
end
|
338
|
-
elsif tensor.is_a?(Variable)
|
339
|
-
eval_variable(tensor, child_context)
|
340
|
-
elsif tensor.is_a?(Placeholder)
|
341
|
-
resolve_placeholder(tensor, child_context)
|
331
|
+
res = if !on_same_device?(tensor) # tensor is on another device or evaluator
|
332
|
+
perform_transition(tensor, tensor, @context[:_cache][:placement][tensor.name][1], execution_context)
|
333
|
+
elsif tensor.is_a?(Operation)
|
334
|
+
eval_operation(tensor, child_context)
|
342
335
|
else
|
343
|
-
|
336
|
+
raise "invalid tensor type!"
|
344
337
|
end
|
338
|
+
|
345
339
|
execution_context.deep_merge!(returns: child_context[:returns])
|
346
340
|
res
|
347
341
|
end
|
348
342
|
|
349
|
-
def eval_variable(tensor, _child_context)
|
350
|
-
raise "variable #{tensor.name} not initalized" if tensor.value.nil? && (tensor.buffer.nil? || !tensor.buffer.dirty)
|
351
|
-
|
352
|
-
tensor.buffer = wrap_opencl(tensor, name: tensor.name) if tensor.buffer.nil?
|
353
|
-
tensor.buffer
|
354
|
-
end
|
355
|
-
|
356
343
|
register_op :no_op do |_context, _tensor, _inputs|
|
357
344
|
end
|
358
345
|
|
@@ -396,14 +383,14 @@ module TensorStream
|
|
396
383
|
end
|
397
384
|
|
398
385
|
%i[less less_equal greater greater_equal equal not_equal logical_and].each do |op|
|
399
|
-
register_op op do |
|
386
|
+
register_op op do |_context, tensor, inputs|
|
400
387
|
execute_2_operand_func(op.to_s, tensor, inputs[0], inputs[1], 'cond')
|
401
388
|
end
|
402
389
|
end
|
403
390
|
|
404
391
|
register_op :where, noop: true do |context, tensor, inputs|
|
405
|
-
pred =
|
406
|
-
execute_cond_func('where', tensor, pred, inputs[
|
392
|
+
pred = inputs[0]
|
393
|
+
execute_cond_func('where', tensor, pred, inputs[1], inputs[2], context)
|
407
394
|
end
|
408
395
|
|
409
396
|
register_op :check_numerics, noop: true do |context, tensor, inputs|
|
@@ -455,10 +442,36 @@ module TensorStream
|
|
455
442
|
nil
|
456
443
|
end
|
457
444
|
|
445
|
+
register_op :const do |_context, tensor, inputs|
|
446
|
+
wrap_opencl(tensor.const_value, name: tensor.name, data_type: tensor.data_type)
|
447
|
+
end
|
448
|
+
|
458
449
|
register_op :size do |_context, tensor, inputs|
|
459
450
|
wrap_opencl(inputs[0].buffer.size, name: tensor.name, data_type: tensor.options[:out_type] || :int32)
|
460
451
|
end
|
461
452
|
|
453
|
+
register_op :restore_ts do |context, tensor, inputs|
|
454
|
+
inputs = inputs.dup
|
455
|
+
filename = inputs.shift
|
456
|
+
tensor_names = inputs
|
457
|
+
|
458
|
+
filename = read_final_result(complete_eval(filename, context))
|
459
|
+
tensor_names.map! { |n| read_final_result(complete_eval(n, context)) }
|
460
|
+
|
461
|
+
input_dump = YAML.safe_load(File.read(filename), [Symbol])
|
462
|
+
vars = tensor.graph.get_collection(GraphKeys::GLOBAL_VARIABLES)
|
463
|
+
|
464
|
+
vars.select! { |v| input_dump['variables'].key?(v.name) && tensor_names.include?(v.name) }
|
465
|
+
vars.each do |variable|
|
466
|
+
data = TensorStream::Packer.unpack(Zlib::Inflate.inflate(Base64.decode64(input_dump['variables'][variable.name]['data'])), variable.data_type)
|
467
|
+
shape = input_dump['variables'][variable.name]['shape']
|
468
|
+
variable.buffer = convert_to_opencl(data, shape, data_type: variable.data_type, name: variable.name)
|
469
|
+
variable.value = TensorShape.reshape(data, shape)
|
470
|
+
end
|
471
|
+
|
472
|
+
nil
|
473
|
+
end
|
474
|
+
|
462
475
|
def eval_operation(tensor, child_context)
|
463
476
|
cache_key = "#{tensor.graph.object_id}_opencl_#{tensor.name}:#{object_id}"
|
464
477
|
return @context[:_cache][cache_key] if @context[:_cache].key?(cache_key)
|
@@ -514,7 +527,7 @@ module TensorStream
|
|
514
527
|
# File.write('/home/jedld/workspace/tensor_stream/samples/error.graphml', TensorStream::Graphml.new.get_string(tensor, @session))
|
515
528
|
|
516
529
|
# File.write('/Users/josephemmanueldayo/workspace/gradients.graphml', TensorStream::Graphml.new.get_string(tensor, @session))
|
517
|
-
raise EvaluatorExcecutionException.new(e, tensor), "error #{e.message} while evaluating #{tensor.name} :
|
530
|
+
raise EvaluatorExcecutionException.new(e, tensor), "error #{e.message} while evaluating #{tensor.name} : defined at #{tensor.source}"
|
518
531
|
end
|
519
532
|
|
520
533
|
def eval_tensor(tensor, child_context)
|
@@ -539,21 +552,21 @@ module TensorStream
|
|
539
552
|
assign = tensor.inputs[0] || tensor
|
540
553
|
buffer = complete_eval(b, child_context)
|
541
554
|
|
542
|
-
if assign.
|
543
|
-
event_wait_list = build_event_wait_list([buffer, assign.
|
544
|
-
assign.
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
555
|
+
if assign.container_buffer
|
556
|
+
event_wait_list = build_event_wait_list([buffer, assign.container_buffer])
|
557
|
+
assign.container_buffer.op = if assign.container_buffer.cl_buffer != buffer.cl_buffer
|
558
|
+
_opencl_queue.enqueue_copy_buffer(buffer.cl_buffer, assign.container_buffer.cl_buffer, event_wait_list: event_wait_list)
|
559
|
+
else
|
560
|
+
buffer.op
|
561
|
+
end
|
549
562
|
else
|
550
563
|
value = read_final_result(buffer)
|
551
|
-
assign.buffer = convert_to_opencl(value, buffer.shape, data_type: tensor.data_type, name: assign.name)
|
552
|
-
assign.value = value
|
564
|
+
assign.options[:container].buffer = convert_to_opencl(value, buffer.shape, data_type: tensor.data_type, name: assign.name)
|
565
|
+
assign.options[:container].value = value
|
553
566
|
end
|
554
567
|
|
555
|
-
assign.
|
556
|
-
assign.
|
568
|
+
assign.container_buffer.dirty = true
|
569
|
+
assign.container_buffer
|
557
570
|
end
|
558
571
|
|
559
572
|
def execute_2_operand_func(op_name, tensor, a, b, prog_name = nil)
|
@@ -572,7 +585,7 @@ module TensorStream
|
|
572
585
|
[m || 1, n || 1]
|
573
586
|
elsif (b.shape.size == 1) && (result_shape.last == b.shape.last)
|
574
587
|
last_dim = b.shape.last
|
575
|
-
[result_shape.reduce(:*) / last_dim, last_dim]
|
588
|
+
[result_shape.reduce(:*) / last_dim, last_dim]
|
576
589
|
else
|
577
590
|
raise "rank > 2 not supported for now"
|
578
591
|
end
|
@@ -622,7 +635,7 @@ module TensorStream
|
|
622
635
|
work_group = if p.shape.size > 2
|
623
636
|
[m, p.shape.reduce(:*) / m]
|
624
637
|
else
|
625
|
-
[
|
638
|
+
[m || 1, n || 1]
|
626
639
|
end
|
627
640
|
|
628
641
|
cl_m = OpenCL::Int1.new(work_group[0])
|
data/lib/tensor_stream/opencl.rb
CHANGED
data/samples/classify.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "tensor_stream"
|
5
|
+
require 'mnist-learn'
|
6
|
+
require 'fileutils'
|
7
|
+
|
8
|
+
file_path = ARGV[0]
|
9
|
+
model_path = ARGV[1]
|
10
|
+
|
11
|
+
decoded_image = TensorStream.image.decode_png(File.read(file_path), channels: 1)
|
12
|
+
target_graph = TensorStream::YamlLoader.new.load_from_file(model_path)
|
13
|
+
input = target_graph['Placeholder']
|
14
|
+
output = TensorStream.argmax(target_graph['out'], 1)
|
15
|
+
sess = TensorStream.session
|
16
|
+
|
17
|
+
reshaped_image = 255.0.t - decoded_image.reshape([1, 28, 28, 1]).cast(:float32)
|
18
|
+
result = sess.run(output, feed_dict: { input => reshaped_image})
|
19
|
+
|
20
|
+
puts "image is a #{result.first}"
|
21
|
+
|
@@ -0,0 +1,21 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "tensor_stream"
|
5
|
+
require 'mnist-learn'
|
6
|
+
require 'fileutils'
|
7
|
+
|
8
|
+
mnist = Mnist.read_data_sets('/tmp/data', one_hot: true)
|
9
|
+
|
10
|
+
ts = TensorStream
|
11
|
+
test_data = mnist.test.images
|
12
|
+
FileUtils.mkdir_p 'test_images'
|
13
|
+
|
14
|
+
sess = ts.session
|
15
|
+
|
16
|
+
test_data.each_with_index do |image , index|
|
17
|
+
image = 255.t - ts.cast(ts.reshape(image, [28, 28, 1]), :uint8) # reshape image
|
18
|
+
encoder = ts.image.encode_png(image)
|
19
|
+
blob = sess.run(encoder)
|
20
|
+
File.write(File.join('test_images', "#{index}_image.png"), blob)
|
21
|
+
end
|
data/samples/mnist_data_2.3.rb
CHANGED
@@ -54,18 +54,10 @@ b5 = tf.variable(tf.zeros([10]))
|
|
54
54
|
x_ = tf.reshape(x, [-1, 784])
|
55
55
|
|
56
56
|
y1 = tf.nn.relu(tf.matmul(x_, w1) + b1)
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
y3 = tf.nn.relu(tf.matmul(y2d, w3) + b3)
|
63
|
-
y3d = tf.nn.dropout(y3, pkeep)
|
64
|
-
|
65
|
-
y4 = tf.nn.relu(tf.matmul(y3d, w4) + b4)
|
66
|
-
y4d = tf.nn.dropout(y4, pkeep)
|
67
|
-
|
68
|
-
ylogits = tf.matmul(y4d, w5) + b5
|
57
|
+
y2 = tf.nn.relu(tf.matmul(y1, w2) + b2)
|
58
|
+
y3 = tf.nn.relu(tf.matmul(y2, w3) + b3)
|
59
|
+
y4 = tf.nn.relu(tf.matmul(y3, w4) + b4)
|
60
|
+
ylogits = tf.matmul(y4, w5) + b5
|
69
61
|
|
70
62
|
# model
|
71
63
|
y = tf.nn.softmax(ylogits)
|
data/samples/mnist_data_3.0.rb
CHANGED
@@ -10,6 +10,7 @@ require "bundler/setup"
|
|
10
10
|
require 'tensor_stream'
|
11
11
|
require 'mnist-learn'
|
12
12
|
require 'pry-byebug'
|
13
|
+
require 'csv'
|
13
14
|
|
14
15
|
# Enable OpenCL hardware accelerated computation, not using OpenCL can be very slow
|
15
16
|
require 'tensor_stream/opencl'
|
@@ -21,6 +22,7 @@ puts "Tensorstream version #{tf.__version__} with OpenCL lib #{TensorStream::Ope
|
|
21
22
|
puts "downloading minst data"
|
22
23
|
# Download images and labels into mnist.test (10K images+labels) and mnist.train (60K images+labels)
|
23
24
|
mnist = Mnist.read_data_sets('/tmp/data', one_hot: true)
|
25
|
+
|
24
26
|
puts "downloading finished"
|
25
27
|
|
26
28
|
# neural network structure for this sample:
|
@@ -85,13 +87,10 @@ y3 = tf.nn.relu(tf.nn.conv2d(y2, w3, [1, stride, stride, 1], 'SAME') + b3)
|
|
85
87
|
yy = tf.reshape(y3, [-1, 7 * 7 * M])
|
86
88
|
y4 = tf.nn.relu(tf.matmul(yy, w4) + b4)
|
87
89
|
|
88
|
-
|
89
|
-
yy4 = tf.nn.dropout(y4, pkeep)
|
90
|
-
|
91
|
-
ylogits = tf.matmul(yy4, w5) + b5
|
90
|
+
ylogits = tf.matmul(y4, w5) + b5
|
92
91
|
|
93
92
|
# model
|
94
|
-
y = tf.nn.softmax(ylogits)
|
93
|
+
y = tf.nn.softmax(ylogits, name: 'out')
|
95
94
|
|
96
95
|
|
97
96
|
|
@@ -111,16 +110,21 @@ accuracy = tf.reduce_mean(tf.cast(is_correct, :float32))
|
|
111
110
|
lr = 0.0001.t + tf.train.exponential_decay(0.003, step, 2000, 1/Math::E)
|
112
111
|
train_step = TensorStream::Train::AdamOptimizer.new(lr).minimize(cross_entropy)
|
113
112
|
|
114
|
-
sess = tf.session
|
113
|
+
sess = tf.session(profile_enabled: true)
|
115
114
|
# Add ops to save and restore all the variables.
|
116
115
|
|
117
116
|
init = tf.global_variables_initializer
|
118
117
|
|
119
118
|
sess.run(init)
|
119
|
+
|
120
|
+
#Setup save and restore
|
121
|
+
model_save_path = "test_models/mnist_data_3.0"
|
122
|
+
saver = tf::Train::Saver.new
|
123
|
+
saver.restore(sess, model_save_path)
|
124
|
+
|
120
125
|
mnist_train = mnist.train
|
121
126
|
test_data = { x => mnist.test.images, y_ => mnist.test.labels, pkeep => 1.0 }
|
122
127
|
|
123
|
-
|
124
128
|
(0..10001).each do |i|
|
125
129
|
# load batch of images and correct answers
|
126
130
|
batch_x, batch_y = mnist_train.next_batch(100)
|
@@ -130,7 +134,8 @@ test_data = { x => mnist.test.images, y_ => mnist.test.labels, pkeep => 1.0 }
|
|
130
134
|
sess.run(train_step, feed_dict: train_data)
|
131
135
|
|
132
136
|
if (i % 10 == 0)
|
133
|
-
#
|
137
|
+
# result = TensorStream::ReportTool.profile_for(sess)
|
138
|
+
# File.write("profile.csv", result.map(&:to_csv).join("\n"))
|
134
139
|
# success? add code to print it
|
135
140
|
a_train, c_train, l = sess.run([accuracy, cross_entropy, lr], feed_dict: { x => batch_x, y_ => batch_y, step => i, pkeep => 1.0})
|
136
141
|
puts "#{i}: accuracy:#{a_train} loss:#{c_train} (lr:#{l})"
|
@@ -140,6 +145,9 @@ test_data = { x => mnist.test.images, y_ => mnist.test.labels, pkeep => 1.0 }
|
|
140
145
|
# success on test data?
|
141
146
|
a_test, c_test = sess.run([accuracy, cross_entropy], feed_dict: test_data, pkeep => 1.0)
|
142
147
|
puts("#{i}: ******** test accuracy: #{a_test} test loss: #{c_test}")
|
148
|
+
|
149
|
+
# save current state of the model
|
150
|
+
save_path = saver.save(sess, model_save_path)
|
143
151
|
end
|
144
152
|
end
|
145
153
|
|
@@ -39,7 +39,7 @@ Gem::Specification.new do |spec|
|
|
39
39
|
spec.add_development_dependency "awesome_print"
|
40
40
|
spec.add_development_dependency "mnist-learn"
|
41
41
|
spec.add_development_dependency "simplecov"
|
42
|
-
spec.add_dependency "tensor_stream", "
|
42
|
+
spec.add_dependency "tensor_stream", "1.0.0-rc1"
|
43
43
|
spec.add_dependency "opencl_ruby_ffi"
|
44
44
|
spec.add_dependency "oily_png"
|
45
45
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tensor_stream-opencl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joseph Dayo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-01-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -112,16 +112,16 @@ dependencies:
|
|
112
112
|
name: tensor_stream
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
|
-
- -
|
115
|
+
- - '='
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version: 0.
|
117
|
+
version: 1.0.0.pre.rc1
|
118
118
|
type: :runtime
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
|
-
- -
|
122
|
+
- - '='
|
123
123
|
- !ruby/object:Gem::Version
|
124
|
-
version: 0.
|
124
|
+
version: 1.0.0.pre.rc1
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
126
|
name: opencl_ruby_ffi
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -246,6 +246,9 @@ files:
|
|
246
246
|
- lib/tensor_stream/opencl/opencl_evaluator.rb
|
247
247
|
- lib/tensor_stream/opencl/opencl_template_helper.rb
|
248
248
|
- lib/tensor_stream/opencl/version.rb
|
249
|
+
- samples/classify.rb
|
250
|
+
- samples/dump_mnist.rb
|
251
|
+
- samples/image_sort.rb
|
249
252
|
- samples/iris.data
|
250
253
|
- samples/iris.rb
|
251
254
|
- samples/logistic_regression.rb
|