tensor_stream-opencl 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3e4aa123289372c651cd4da3e7c206abc4f9f67a551d4062180c5cf6555dc243
4
- data.tar.gz: 6517954207c85f56cd08b2892b0119d4bb7a35e2d4bd9b9cacc5d3c9ccfb9e42
3
+ metadata.gz: c0e8de1676b30c21f9529cdce9d3fee406cdf2945d54f890ae49c14c1329860e
4
+ data.tar.gz: 66932db63589eedcd6247083a27344bed8c80860b6d096d01fb5b46db1b53521
5
5
  SHA512:
6
- metadata.gz: 7f61d61be79dd1e06ebfdc77ed2dff9e717e0cdb292160fe20c9ca08693d867e1b0e0350c71db5d24feb4671a26e793f44d6b80762c384193c1985b6b1616376
7
- data.tar.gz: 72c32530717fac8ff947ce4b204535755134bde14e0f70d0d120ff101b5654843312186317cb480fd5e1c620a25328a3590b1f35193faf1d196e7ad631d169b0
6
+ metadata.gz: 5b905243d98976c94cb58dd443fc25f0b1e78bba689f20677ee3457dc44641d228642eb65adc4e58227d1feb0686fe29a3cabd8b25910f8e9c1aa0ef575ae8ff
7
+ data.tar.gz: 03ad5c5cd27ff058df206de8e109699fe0ef492e8ced1b97217cc0fe1ab7a4e43da5db45fed85b2e96b7bb35dc14465e39695b7995b3a1ccfcece7c050e0cae3
data/.gitignore CHANGED
@@ -1,6 +1,7 @@
1
1
  /.bundle/
2
2
  /.yardoc
3
3
  /_yardoc/
4
+ /test_images/
4
5
  /coverage/
5
6
  /doc/
6
7
  /pkg/
@@ -10,6 +11,8 @@ Gemfile.lock
10
11
  *.gem
11
12
  *.ckpt
12
13
  profile.json
14
+ profile.csv
15
+ /test_models/
13
16
 
14
17
  # rspec failure tracking
15
18
  .rspec_status
data/.rubocop.yml CHANGED
@@ -34,6 +34,7 @@ Metrics/CyclomaticComplexity:
34
34
  Metrics/BlockLength:
35
35
  Exclude:
36
36
  - lib/tensor_stream/math_gradients.rb
37
+ - benchmark/benchmark.rb
37
38
 
38
39
  Naming/AccessorMethodName:
39
40
  Exclude:
@@ -86,4 +87,8 @@ Style/TrailingCommaInHashLiteral:
86
87
  Naming/UncommunicativeMethodParamName:
87
88
  Exclude:
88
89
  - lib/tensor_stream/evaluator/ruby_evaluator.rb
89
- - lib/tensor_stream/ops.rb
90
+ - lib/tensor_stream/ops.rb
91
+
92
+ Style/BlockDelimiters:
93
+ Exclude:
94
+ - benchmark/benchmark.rb
@@ -103,26 +103,27 @@ module TensorStream
103
103
  end
104
104
  else
105
105
  raise TensorStream::ValueError, "#{num_split} does not divide #{value_shape[axis]} evenly" if num_split.reduce(:+) != value_shape[axis]
106
+
106
107
  # compute shapes of individual output buffers
107
108
  new_shapes = num_split.each_with_index.collect do |num, index|
108
109
  new_shape = value_shape.dup
109
110
  new_shape[axis] = num
110
111
  new_shape
111
112
  end
113
+ out = []
114
+
112
115
  if axis.zero? # axis zero fast copy path
113
116
  start = 0
114
- out = []
115
- new_shapes.each_with_index do |new_shape, index|
116
- element_count = new_shape.reduce(:*) || 1
117
+
118
+ new_shapes.each_with_index do |ns, index|
119
+ element_count = ns.reduce(:*) || 1
117
120
  region_size_in_bytes = element_count * value.buffer.element_size
118
- out << _create_variable_result_sub_buffer(value, index, start, region_size_in_bytes, tensor.data_type, new_shape, "#{tensor.name}/out_#{index}_#{new_shape.join('.')}")
121
+ out << _create_variable_result_sub_buffer(value, index, start, region_size_in_bytes, tensor.data_type, ns, "#{tensor.name}/out_#{index}_#{ns.join('.')}")
119
122
  start += region_size_in_bytes
120
123
  end
121
- out
122
124
  else
123
125
  # create buffers for each piece
124
126
  work_buffer = _create_result_buffer(tensor.data_type, value_shape, "#{tensor.name}/out")
125
- out = []
126
127
  start = 0
127
128
 
128
129
  steps = num_split.dup.reverse.drop(1).inject([0]) do |a, s|
@@ -157,14 +158,15 @@ module TensorStream
157
158
  event_wait_list: event_wait_list)
158
159
  end
159
160
  work_buffer.op = events
160
- new_shapes.each_with_index do |new_shape, index|
161
- element_count = new_shape.reduce(:*) || 1
161
+ new_shapes.each_with_index do |ns, index|
162
+ element_count = ns.reduce(:*) || 1
162
163
  region_size_in_bytes = element_count * work_buffer.buffer.element_size
163
- out << _create_variable_result_sub_buffer(work_buffer, index, start, region_size_in_bytes, tensor.data_type, new_shape, "#{tensor.name}/out_#{index}_#{new_shape.join('.')}")
164
+ out << _create_variable_result_sub_buffer(work_buffer, index, start, region_size_in_bytes, tensor.data_type, ns, "#{tensor.name}/out_#{index}_#{new_shape.join('.')}")
164
165
  start += region_size_in_bytes
165
166
  end
166
- out
167
167
  end
168
+
169
+ out
168
170
  end
169
171
 
170
172
  TensorStream::Evaluator::OutputGroup.new(outputs, outputs.map(&:data_type))
@@ -195,58 +197,57 @@ module TensorStream
195
197
 
196
198
  output_buffer = _create_result_buffer(tensor.data_type, new_shape, tensor.name)
197
199
  ops = if axis.zero? # fast path
198
- inputs.each_with_index.map do |input, index|
199
- next if input.empty_value?
200
-
201
- start = index * input.buffer.size * input.buffer.element_size
202
- region = [input.buffer.size * input.buffer.element_size, 1, 1]
203
- event_wait_list = build_event_wait_list(input)
204
- _opencl_queue.enqueue_copy_buffer_rect(input.cl_buffer, output_buffer.cl_buffer,
205
- region, dst_origin: [start, 0, 0], event_wait_list: event_wait_list)
206
- end.compact
207
- else
208
- elem_size = shape.empty? ? 1 : shape.reduce(:*)
209
- cl_n = OpenCL::Int1.new(elem_size)
200
+ inputs.each_with_index.map do |input, index|
201
+ next if input.empty_value?
210
202
 
211
- steps = inputs.map(&:shape).reverse.drop(1).inject([0]) do |a, shape|
212
- a << shape[axis] + a.last
213
- end
203
+ start = index * input.buffer.size * input.buffer.element_size
204
+ region = [input.buffer.size * input.buffer.element_size, 1, 1]
205
+ event_wait_list = build_event_wait_list(input)
206
+ _opencl_queue.enqueue_copy_buffer_rect(input.cl_buffer, output_buffer.cl_buffer,
207
+ region, dst_origin: [start, 0, 0], event_wait_list: event_wait_list)
208
+ end.compact
209
+ else
210
+ elem_size = shape.empty? ? 1 : shape.reduce(:*)
211
+ cl_n = OpenCL::Int1.new(elem_size)
214
212
 
215
- work_group = [elem_size]
216
- event_wait_list = build_event_wait_list(inputs)
213
+ steps = inputs.map(&:shape).reverse.drop(1).inject([0]) do |a, shape|
214
+ a << shape[axis] + a.last
215
+ end
216
+
217
+ work_group = [elem_size]
218
+ event_wait_list = build_event_wait_list(inputs)
219
+
220
+ inputs.each_with_index.map do |input, index|
221
+ cl_index = OpenCL::Int1.new(index)
222
+ step = OpenCL::Int1.new(steps[index])
223
+ _cl_program('concat', data_type: tensor.data_type, divisors: divisors, multipliers: multipliers, axis: axis).
224
+ concat(_opencl_queue, work_group, cl_n, cl_index, step, input.cl_buffer,
225
+ output_buffer.cl_buffer, event_wait_list: event_wait_list)
226
+ end
227
+ end
217
228
 
218
- inputs.each_with_index.map do |input, index|
219
- cl_index = OpenCL::Int1.new(index)
220
- step = OpenCL::Int1.new(steps[index])
221
- _cl_program('concat', data_type: tensor.data_type, divisors: divisors, multipliers: multipliers, axis: axis).
222
- concat(_opencl_queue, work_group, cl_n, cl_index, step, input.cl_buffer,
223
- output_buffer.cl_buffer, event_wait_list: event_wait_list)
224
- end
225
- end
226
229
  output_buffer.op = ops
227
230
  output_buffer
228
231
  end
229
232
 
230
- register_op :squeeze do |context, tensor, inputs|
233
+ register_op :squeeze do |_context, tensor, inputs|
231
234
  arr = inputs[0]
232
235
  shape = inputs[0].shape.dup
233
236
  axis = !tensor.options[:axis].is_a?(Array) ? [tensor.options[:axis]] : tensor.options[:axis]
234
237
  if !axis.empty?
235
- axis.each do |axis|
236
- if shape[axis] == 1
237
- shape[axis] = nil
238
- else
239
- raise TensorStream::ValueError, "unable to squeeze dimension that does not have a size of 1"
240
- end
238
+ axis.each do |x|
239
+ raise TensorStream::ValueError, "unable to squeeze dimension that does not have a size of 1" unless shape[x] == 1
240
+
241
+ shape[x] = nil
241
242
  end
242
243
  else
243
244
  shape = shape.map { |s| s == 1 ? nil : s }
244
245
  end
245
246
 
246
247
  OpenCLBuffer.new(self, name: tensor.name, data_type: tensor.data_type,
247
- shape: shape.compact, buffer: arr.buffer,
248
- cl_buffer: arr.cl_buffer,
249
- op: arr.op)
248
+ shape: shape.compact, buffer: arr.buffer,
249
+ cl_buffer: arr.cl_buffer,
250
+ op: arr.op)
250
251
  end
251
252
 
252
253
  register_op :stack do |_context, tensor, inputs|
@@ -312,7 +313,6 @@ module TensorStream
312
313
  a << s * a.last
313
314
  end.reverse
314
315
 
315
- step = multipliers[0]
316
316
  sub_shape = new_shape.dup
317
317
  sub_shape.shift
318
318
 
@@ -375,9 +375,9 @@ module TensorStream
375
375
  end
376
376
 
377
377
  OpenCLBuffer.new(self, name: tensor.name, data_type: tensor.data_type,
378
- shape: shape, buffer: arr.buffer,
379
- cl_buffer: arr.cl_buffer,
380
- op: arr.op)
378
+ shape: shape, buffer: arr.buffer,
379
+ cl_buffer: arr.cl_buffer,
380
+ op: arr.op)
381
381
  end
382
382
 
383
383
  register_op :transpose, buffer: true do |_context, tensor, inputs|
@@ -407,7 +407,10 @@ module TensorStream
407
407
 
408
408
  shape = input_a.shape
409
409
 
410
- slice_param = input_b.zip(size).collect.with_index { | p, index| p[1] = (p[1] == -1) ? shape[index] : p[1] ; p[0]..p[0] + p[1] - 1 }.reverse
410
+ slice_param = input_b.zip(size).collect.with_index do |p, index|
411
+ p[1] = p[1] == -1 ? shape[index] : p[1]
412
+ p[0]..p[0] + p[1] - 1
413
+ end.reverse
411
414
 
412
415
  new_buf = input_a.buffer.reshape(*input_a.shape.reverse)
413
416
  sliced = new_buf.slice[*slice_param]
@@ -423,11 +426,11 @@ module TensorStream
423
426
  if a.data_type != tensor.data_type
424
427
  buffer = _create_result_buffer(tensor.data_type, a.shape, tensor.name)
425
428
  work_group = if inputs[0].shape.size > 2
426
- [ inputs[0].shape.reduce(:*) / inputs[0].shape.last, inputs[0].shape.last]
427
- else
428
- m, n = inputs[0].shape
429
- [m || 1, n || 1]
430
- end
429
+ [inputs[0].shape.reduce(:*) / inputs[0].shape.last, inputs[0].shape.last]
430
+ else
431
+ m, n = inputs[0].shape
432
+ [m || 1, n || 1]
433
+ end
431
434
 
432
435
  cl_m = OpenCL::Int1.new(work_group[0])
433
436
  cl_n = OpenCL::Int1.new(work_group[1])
@@ -11,12 +11,12 @@ module TensorStream
11
11
 
12
12
  assign = tensor.inputs[0] || tensor
13
13
 
14
- assign.buffer.dirty = true # force buffer copy when variable is read externally
15
- output_buffer = assign.buffer
14
+ assign.container_buffer.dirty = true # force buffer copy when variable is read externally
15
+ output_buffer = assign.container_buffer
16
16
 
17
17
  work_group = [output_buffer.total_elements]
18
18
 
19
- event_wait_list = build_event_wait_list([assign.buffer, learning_rate, delta])
19
+ event_wait_list = build_event_wait_list([assign.container_buffer, learning_rate, delta])
20
20
 
21
21
  event = call_program("apply_gradient", output_buffer.data_type,
22
22
  work_group,
@@ -33,21 +33,21 @@ module TensorStream
33
33
 
34
34
  assign = tensor.inputs[0] || tensor
35
35
  assign_acc = tensor.inputs[1]
36
- assign.buffer.dirty = true # force buffer copy when variable is read externally
37
- assign_acc.buffer.dirty = true # force buffer copy when variable is read externally
36
+ assign.container_buffer.dirty = true # force buffer copy when variable is read externally
37
+ assign_acc.container_buffer.dirty = true # force buffer copy when variable is read externally
38
38
 
39
- output_buffer = assign.buffer
39
+ output_buffer = assign.container_buffer
40
40
 
41
41
  work_group = [output_buffer.total_elements]
42
42
 
43
- event_wait_list = build_event_wait_list([assign.buffer, assign_acc.buffer, learning_rate, grad, momentum])
43
+ event_wait_list = build_event_wait_list([assign.container_buffer, assign_acc.container_buffer, learning_rate, grad, momentum])
44
44
  method_call = :"apply_momentum_#{output_buffer.data_type}"
45
45
  event = _cl_program("apply_momentum", nesterov: tensor.options[:use_nesterov], dtype: output_buffer.data_type).
46
46
  send(method_call, _opencl_queue, work_group, grad.cl_buffer,
47
47
  learning_rate.cl_buffer, momentum.cl_buffer, output_buffer.cl_buffer,
48
- assign_acc.buffer.cl_buffer, event_wait_list: event_wait_list)
48
+ assign_acc.container_buffer.cl_buffer, event_wait_list: event_wait_list)
49
49
  output_buffer.op = event
50
- assign_acc.buffer.op = event
50
+ assign_acc.container_buffer.op = event
51
51
  output_buffer
52
52
  end
53
53
 
@@ -58,11 +58,11 @@ module TensorStream
58
58
  assign_acc_update = tensor.inputs[2]
59
59
 
60
60
  # mark variable buffers as dirty
61
- assign.buffer.dirty = true # force buffer copy when variable is read externally
62
- assign_acc.buffer.dirty = true # force buffer copy when variable is read externally
63
- assign_acc_update.buffer.dirty = true # force buffer copy when variable is read externally
61
+ assign.container_buffer.dirty = true # force buffer copy when variable is read externally
62
+ assign_acc.container_buffer.dirty = true # force buffer copy when variable is read externally
63
+ assign_acc_update.container_buffer.dirty = true # force buffer copy when variable is read externally
64
64
 
65
- output_buffer = assign.buffer
65
+ output_buffer = assign.container_buffer
66
66
 
67
67
  work_group = [output_buffer.total_elements]
68
68
 
@@ -73,13 +73,13 @@ module TensorStream
73
73
  rho.cl_buffer,
74
74
  epsilon.cl_buffer,
75
75
  grad.cl_buffer,
76
- assign.buffer.cl_buffer,
77
- assign_acc.buffer.cl_buffer,
78
- assign_acc_update.buffer.cl_buffer,
76
+ assign.container_buffer.cl_buffer,
77
+ assign_acc.container_buffer.cl_buffer,
78
+ assign_acc_update.container_buffer.cl_buffer,
79
79
  event_wait_list: event_wait_list)
80
80
  output_buffer.op = event
81
- assign_acc.buffer.op = event
82
- assign_acc_update.buffer.op = event
81
+ assign_acc.container_buffer.op = event
82
+ assign_acc_update.container_buffer.op = event
83
83
  output_buffer
84
84
  end
85
85
 
@@ -92,11 +92,11 @@ module TensorStream
92
92
  assign_v = tensor.inputs[2]
93
93
 
94
94
  # mark variable buffers as dirty
95
- assign.buffer.dirty = true # force buffer copy when variable is read externally
96
- assign_m.buffer.dirty = true # force buffer copy when variable is read externally
97
- assign_v.buffer.dirty = true # force buffer copy when variable is read externally
95
+ assign.container_buffer.dirty = true # force buffer copy when variable is read externally
96
+ assign_m.container_buffer.dirty = true # force buffer copy when variable is read externally
97
+ assign_v.container_buffer.dirty = true # force buffer copy when variable is read externally
98
98
 
99
- output_buffer = assign.buffer
99
+ output_buffer = assign.container_buffer
100
100
 
101
101
  work_group = [output_buffer.total_elements]
102
102
 
@@ -110,13 +110,13 @@ module TensorStream
110
110
  beta1_t.cl_buffer,
111
111
  beta2_t.cl_buffer,
112
112
  epsilon_t.cl_buffer,
113
- assign_m.buffer.cl_buffer,
114
- assign.buffer.cl_buffer,
115
- assign_v.buffer.cl_buffer,
113
+ assign_m.container_buffer.cl_buffer,
114
+ assign.container_buffer.cl_buffer,
115
+ assign_v.container_buffer.cl_buffer,
116
116
  event_wait_list: event_wait_list)
117
117
  output_buffer.op = event
118
- assign_m.buffer.op = event
119
- assign_v.buffer.op = event
118
+ assign_m.container_buffer.op = event
119
+ assign_v.container_buffer.op = event
120
120
  output_buffer
121
121
  end
122
122
 
@@ -126,9 +126,9 @@ module TensorStream
126
126
  assign = tensor.inputs[0] || tensor
127
127
  assign_acc = tensor.inputs[1]
128
128
 
129
- assign.buffer.dirty = true
130
- assign_acc.buffer.dirty = true
131
- output_buffer = assign.buffer
129
+ assign.container_buffer.dirty = true
130
+ assign_acc.container_buffer.dirty = true
131
+ output_buffer = assign.container_buffer
132
132
 
133
133
  work_group = [output_buffer.total_elements]
134
134
 
@@ -138,11 +138,11 @@ module TensorStream
138
138
  work_group,
139
139
  lr.cl_buffer,
140
140
  grad.cl_buffer,
141
- assign.buffer.cl_buffer,
142
- assign_acc.buffer.cl_buffer,
141
+ assign.container_buffer.cl_buffer,
142
+ assign_acc.container_buffer.cl_buffer,
143
143
  event_wait_list: event_wait_list)
144
144
  output_buffer.op = event
145
- assign_acc.buffer.op = event
145
+ assign_acc.container_buffer.op = event
146
146
  output_buffer
147
147
  end
148
148
 
@@ -154,11 +154,11 @@ module TensorStream
154
154
  assign_ms = tensor.inputs[2]
155
155
  assign_mom = tensor.inputs[3]
156
156
 
157
- assign.buffer.dirty = true
158
- assign_mg.buffer.dirty = true
159
- assign_ms.buffer.dirty = true
160
- assign_mom.buffer.dirty = true
161
- output_buffer = assign.buffer
157
+ assign.container_buffer.dirty = true
158
+ assign_mg.container_buffer.dirty = true
159
+ assign_ms.container_buffer.dirty = true
160
+ assign_mom.container_buffer.dirty = true
161
+ output_buffer = assign.container_buffer
162
162
  event_wait_list = build_event_wait_list(inputs)
163
163
  work_group = [output_buffer.total_elements]
164
164
 
@@ -168,30 +168,30 @@ module TensorStream
168
168
  momentum.cl_buffer,
169
169
  epsilon.cl_buffer,
170
170
  grad.cl_buffer,
171
- assign.buffer.cl_buffer,
172
- assign_ms.buffer.cl_buffer,
173
- assign_mg.buffer.cl_buffer,
174
- assign_mom.buffer.cl_buffer,
171
+ assign.container_buffer.cl_buffer,
172
+ assign_ms.container_buffer.cl_buffer,
173
+ assign_mg.container_buffer.cl_buffer,
174
+ assign_mom.container_buffer.cl_buffer,
175
175
  event_wait_list: event_wait_list)
176
176
 
177
177
  output_buffer.op = event
178
- assign_mg.buffer.op = event
179
- assign_ms.buffer.op = event
180
- assign_mom.buffer.op = event
178
+ assign_mg.container_buffer.op = event
179
+ assign_ms.container_buffer.op = event
180
+ assign_mom.container_buffer.op = event
181
181
  output_buffer
182
182
  end
183
183
 
184
- register_op :apply_rms_prop do |context, tensor, inputs|
184
+ register_op :apply_rms_prop do |_context, tensor, inputs|
185
185
  var, ms, mom, lr, rho, momentum, epsilon, grad = inputs
186
186
 
187
187
  assign = tensor.inputs[0]
188
188
  assign_ms = tensor.inputs[1]
189
189
  assign_mom = tensor.inputs[2]
190
190
 
191
- assign.buffer.dirty = true
192
- assign_ms.buffer.dirty = true
193
- assign_mom.buffer.dirty = true
194
- output_buffer = assign.buffer
191
+ assign.container_buffer.dirty = true
192
+ assign_ms.container_buffer.dirty = true
193
+ assign_mom.container_buffer.dirty = true
194
+ output_buffer = assign.container_buffer
195
195
  event_wait_list = build_event_wait_list(inputs)
196
196
  work_group = [output_buffer.total_elements]
197
197
 
@@ -202,14 +202,14 @@ module TensorStream
202
202
  momentum.cl_buffer,
203
203
  epsilon.cl_buffer,
204
204
  grad.cl_buffer,
205
- assign.buffer.cl_buffer,
206
- assign_ms.buffer.cl_buffer,
207
- assign_mom.buffer.cl_buffer,
205
+ assign.container_buffer.cl_buffer,
206
+ assign_ms.container_buffer.cl_buffer,
207
+ assign_mom.container_buffer.cl_buffer,
208
208
  event_wait_list: event_wait_list)
209
209
 
210
210
  output_buffer.op = event
211
- assign_ms.buffer.op = event
212
- assign_mom.buffer.op = event
211
+ assign_ms.container_buffer.op = event
212
+ assign_mom.container_buffer.op = event
213
213
  output_buffer
214
214
  end
215
215
 
@@ -273,7 +273,7 @@ module TensorStream
273
273
  output_buffer_backprop.op = event
274
274
 
275
275
  loss = reduction(context, tensor, output_buffer, rank, :sum)
276
- TensorStream::Evaluator::OutputGroup.new([loss, output_buffer_backprop], [tensor.inputs[0].data_type, tensor.inputs[0].data_type])
276
+ TensorStream::Evaluator::OutputGroup.new([loss, output_buffer_backprop], [tensor.inputs[0].data_type, tensor.inputs[0].data_type])
277
277
  end
278
278
 
279
279
  register_op :softmax_cross_entropy_with_logits_v2_grad do |_context, tensor, inputs|
@@ -370,6 +370,7 @@ module TensorStream
370
370
  raise TensorStream::ValueError, " Current implementation does not yet support strides in the batch and depth dimensions." if strides[0] != 1 || strides[3] != 1
371
371
 
372
372
  padding_option = tensor.options[:padding]
373
+
373
374
  padding = conv2d_padding_options(padding_option, filter_shape, height, width, height_stride, width_stride)
374
375
  event_wait_list = build_event_wait_list(inputs)
375
376
 
@@ -33,13 +33,18 @@ module TensorStream
33
33
  end
34
34
 
35
35
  if shape.empty?
36
- return buffer.to_s if data_type == :string
37
- return buffer[0] != 0 if data_type == :boolean
38
- return buffer[0]
36
+ return case data_type
37
+ when :string
38
+ buffer.to_s
39
+ when :boolean
40
+ buffer[0] != 0
41
+ else
42
+ buffer[0]
43
+ end
39
44
  end
40
-
41
- result = buffer.reshape(*shape.map(&:to_i).reverse).to_a
42
- data_type == :boolean ? process_function_op(result, ->(a, _b) { a != 0 }) : result
45
+
46
+ result = buffer.reshape(*shape.map(&:to_i).reverse).to_a
47
+ data_type == :boolean ? process_function_op(result) { |a, _b| a != 0 } : result
43
48
  end
44
49
 
45
50
  def self.nil_buffer(owner, name, data_type)
@@ -225,7 +225,6 @@ module TensorStream
225
225
  def prepare_input(tensor, context, options = {})
226
226
  return nil unless tensor
227
227
 
228
- tensor = resolve_placeholder(tensor)
229
228
  if options[:noop]
230
229
  tensor
231
230
  elsif options[:buffer]
@@ -329,30 +328,18 @@ module TensorStream
329
328
  tensor = tensor.call if tensor.is_a?(Proc)
330
329
 
331
330
  child_context = execution_context.dup
332
- res = if tensor.is_a?(Operation)
333
- if !on_same_device?(tensor) # tensor is on another device or evaluator
334
- perform_transition(tensor, tensor, @context[:_cache][:placement][tensor.name][1], execution_context)
335
- else
336
- eval_operation(tensor, child_context)
337
- end
338
- elsif tensor.is_a?(Variable)
339
- eval_variable(tensor, child_context)
340
- elsif tensor.is_a?(Placeholder)
341
- resolve_placeholder(tensor, child_context)
331
+ res = if !on_same_device?(tensor) # tensor is on another device or evaluator
332
+ perform_transition(tensor, tensor, @context[:_cache][:placement][tensor.name][1], execution_context)
333
+ elsif tensor.is_a?(Operation)
334
+ eval_operation(tensor, child_context)
342
335
  else
343
- eval_tensor(tensor, child_context)
336
+ raise "invalid tensor type!"
344
337
  end
338
+
345
339
  execution_context.deep_merge!(returns: child_context[:returns])
346
340
  res
347
341
  end
348
342
 
349
- def eval_variable(tensor, _child_context)
350
- raise "variable #{tensor.name} not initalized" if tensor.value.nil? && (tensor.buffer.nil? || !tensor.buffer.dirty)
351
-
352
- tensor.buffer = wrap_opencl(tensor, name: tensor.name) if tensor.buffer.nil?
353
- tensor.buffer
354
- end
355
-
356
343
  register_op :no_op do |_context, _tensor, _inputs|
357
344
  end
358
345
 
@@ -396,14 +383,14 @@ module TensorStream
396
383
  end
397
384
 
398
385
  %i[less less_equal greater greater_equal equal not_equal logical_and].each do |op|
399
- register_op op do |context, tensor, inputs|
386
+ register_op op do |_context, tensor, inputs|
400
387
  execute_2_operand_func(op.to_s, tensor, inputs[0], inputs[1], 'cond')
401
388
  end
402
389
  end
403
390
 
404
391
  register_op :where, noop: true do |context, tensor, inputs|
405
- pred = tensor.options[:pred]
406
- execute_cond_func('where', tensor, pred, inputs[0], inputs[1], context)
392
+ pred = inputs[0]
393
+ execute_cond_func('where', tensor, pred, inputs[1], inputs[2], context)
407
394
  end
408
395
 
409
396
  register_op :check_numerics, noop: true do |context, tensor, inputs|
@@ -455,10 +442,36 @@ module TensorStream
455
442
  nil
456
443
  end
457
444
 
445
+ register_op :const do |_context, tensor, inputs|
446
+ wrap_opencl(tensor.const_value, name: tensor.name, data_type: tensor.data_type)
447
+ end
448
+
458
449
  register_op :size do |_context, tensor, inputs|
459
450
  wrap_opencl(inputs[0].buffer.size, name: tensor.name, data_type: tensor.options[:out_type] || :int32)
460
451
  end
461
452
 
453
+ register_op :restore_ts do |context, tensor, inputs|
454
+ inputs = inputs.dup
455
+ filename = inputs.shift
456
+ tensor_names = inputs
457
+
458
+ filename = read_final_result(complete_eval(filename, context))
459
+ tensor_names.map! { |n| read_final_result(complete_eval(n, context)) }
460
+
461
+ input_dump = YAML.safe_load(File.read(filename), [Symbol])
462
+ vars = tensor.graph.get_collection(GraphKeys::GLOBAL_VARIABLES)
463
+
464
+ vars.select! { |v| input_dump['variables'].key?(v.name) && tensor_names.include?(v.name) }
465
+ vars.each do |variable|
466
+ data = TensorStream::Packer.unpack(Zlib::Inflate.inflate(Base64.decode64(input_dump['variables'][variable.name]['data'])), variable.data_type)
467
+ shape = input_dump['variables'][variable.name]['shape']
468
+ variable.buffer = convert_to_opencl(data, shape, data_type: variable.data_type, name: variable.name)
469
+ variable.value = TensorShape.reshape(data, shape)
470
+ end
471
+
472
+ nil
473
+ end
474
+
462
475
  def eval_operation(tensor, child_context)
463
476
  cache_key = "#{tensor.graph.object_id}_opencl_#{tensor.name}:#{object_id}"
464
477
  return @context[:_cache][cache_key] if @context[:_cache].key?(cache_key)
@@ -514,7 +527,7 @@ module TensorStream
514
527
  # File.write('/home/jedld/workspace/tensor_stream/samples/error.graphml', TensorStream::Graphml.new.get_string(tensor, @session))
515
528
 
516
529
  # File.write('/Users/josephemmanueldayo/workspace/gradients.graphml', TensorStream::Graphml.new.get_string(tensor, @session))
517
- raise EvaluatorExcecutionException.new(e, tensor), "error #{e.message} while evaluating #{tensor.name} : #{tensor.to_math(true, 1)} defined at #{tensor.source}"
530
+ raise EvaluatorExcecutionException.new(e, tensor), "error #{e.message} while evaluating #{tensor.name} : defined at #{tensor.source}"
518
531
  end
519
532
 
520
533
  def eval_tensor(tensor, child_context)
@@ -539,21 +552,21 @@ module TensorStream
539
552
  assign = tensor.inputs[0] || tensor
540
553
  buffer = complete_eval(b, child_context)
541
554
 
542
- if assign.buffer
543
- event_wait_list = build_event_wait_list([buffer, assign.buffer])
544
- assign.buffer.op = if assign.buffer.cl_buffer != buffer.cl_buffer
545
- _opencl_queue.enqueue_copy_buffer(buffer.cl_buffer, assign.buffer.cl_buffer, event_wait_list: event_wait_list)
546
- else
547
- buffer.op
548
- end
555
+ if assign.container_buffer
556
+ event_wait_list = build_event_wait_list([buffer, assign.container_buffer])
557
+ assign.container_buffer.op = if assign.container_buffer.cl_buffer != buffer.cl_buffer
558
+ _opencl_queue.enqueue_copy_buffer(buffer.cl_buffer, assign.container_buffer.cl_buffer, event_wait_list: event_wait_list)
559
+ else
560
+ buffer.op
561
+ end
549
562
  else
550
563
  value = read_final_result(buffer)
551
- assign.buffer = convert_to_opencl(value, buffer.shape, data_type: tensor.data_type, name: assign.name)
552
- assign.value = value
564
+ assign.options[:container].buffer = convert_to_opencl(value, buffer.shape, data_type: tensor.data_type, name: assign.name)
565
+ assign.options[:container].value = value
553
566
  end
554
567
 
555
- assign.buffer.dirty = true
556
- assign.buffer
568
+ assign.container_buffer.dirty = true
569
+ assign.container_buffer
557
570
  end
558
571
 
559
572
  def execute_2_operand_func(op_name, tensor, a, b, prog_name = nil)
@@ -572,7 +585,7 @@ module TensorStream
572
585
  [m || 1, n || 1]
573
586
  elsif (b.shape.size == 1) && (result_shape.last == b.shape.last)
574
587
  last_dim = b.shape.last
575
- [result_shape.reduce(:*) / last_dim, last_dim]
588
+ [result_shape.reduce(:*) / last_dim, last_dim]
576
589
  else
577
590
  raise "rank > 2 not supported for now"
578
591
  end
@@ -622,7 +635,7 @@ module TensorStream
622
635
  work_group = if p.shape.size > 2
623
636
  [m, p.shape.reduce(:*) / m]
624
637
  else
625
- [ m || 1, n || 1]
638
+ [m || 1, n || 1]
626
639
  end
627
640
 
628
641
  cl_m = OpenCL::Int1.new(work_group[0])
@@ -1,5 +1,5 @@
1
1
  module TensorStream
2
2
  module Opencl
3
- VERSION = "0.2.4"
3
+ VERSION = "0.2.5"
4
4
  end
5
5
  end
@@ -1,4 +1,5 @@
1
1
  require "tensor_stream/opencl/version"
2
+ require 'tensor_stream'
2
3
  require "tensor_stream/opencl/opencl_evaluator"
3
4
 
4
5
  module TensorStream
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "tensor_stream"
5
+ require 'mnist-learn'
6
+ require 'fileutils'
7
+
8
+ file_path = ARGV[0]
9
+ model_path = ARGV[1]
10
+
11
+ decoded_image = TensorStream.image.decode_png(File.read(file_path), channels: 1)
12
+ target_graph = TensorStream::YamlLoader.new.load_from_file(model_path)
13
+ input = target_graph['Placeholder']
14
+ output = TensorStream.argmax(target_graph['out'], 1)
15
+ sess = TensorStream.session
16
+
17
+ reshaped_image = 255.0.t - decoded_image.reshape([1, 28, 28, 1]).cast(:float32)
18
+ result = sess.run(output, feed_dict: { input => reshaped_image})
19
+
20
+ puts "image is a #{result.first}"
21
+
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "tensor_stream"
5
+ require 'mnist-learn'
6
+ require 'fileutils'
7
+
8
+ mnist = Mnist.read_data_sets('/tmp/data', one_hot: true)
9
+
10
+ ts = TensorStream
11
+ test_data = mnist.test.images
12
+ FileUtils.mkdir_p 'test_images'
13
+
14
+ sess = ts.session
15
+
16
+ test_data.each_with_index do |image , index|
17
+ image = 255.t - ts.cast(ts.reshape(image, [28, 28, 1]), :uint8) # reshape image
18
+ encoder = ts.image.encode_png(image)
19
+ blob = sess.run(encoder)
20
+ File.write(File.join('test_images', "#{index}_image.png"), blob)
21
+ end
@@ -0,0 +1,9 @@
1
+ require "bundler/setup"
2
+ require 'tensor_stream'
3
+ require 'mnist-learn'
4
+ require 'csv'
5
+
6
+ # Enable OpenCL hardware accelerated computation, not using OpenCL can be very slow
7
+ require 'tensor_stream/opencl'
8
+
9
+
@@ -54,18 +54,10 @@ b5 = tf.variable(tf.zeros([10]))
54
54
  x_ = tf.reshape(x, [-1, 784])
55
55
 
56
56
  y1 = tf.nn.relu(tf.matmul(x_, w1) + b1)
57
- y1d = tf.nn.dropout(y1, pkeep)
58
-
59
- y2 = tf.nn.relu(tf.matmul(y1d, w2) + b2)
60
- y2d = tf.nn.dropout(y2, pkeep)
61
-
62
- y3 = tf.nn.relu(tf.matmul(y2d, w3) + b3)
63
- y3d = tf.nn.dropout(y3, pkeep)
64
-
65
- y4 = tf.nn.relu(tf.matmul(y3d, w4) + b4)
66
- y4d = tf.nn.dropout(y4, pkeep)
67
-
68
- ylogits = tf.matmul(y4d, w5) + b5
57
+ y2 = tf.nn.relu(tf.matmul(y1, w2) + b2)
58
+ y3 = tf.nn.relu(tf.matmul(y2, w3) + b3)
59
+ y4 = tf.nn.relu(tf.matmul(y3, w4) + b4)
60
+ ylogits = tf.matmul(y4, w5) + b5
69
61
 
70
62
  # model
71
63
  y = tf.nn.softmax(ylogits)
@@ -10,6 +10,7 @@ require "bundler/setup"
10
10
  require 'tensor_stream'
11
11
  require 'mnist-learn'
12
12
  require 'pry-byebug'
13
+ require 'csv'
13
14
 
14
15
  # Enable OpenCL hardware accelerated computation, not using OpenCL can be very slow
15
16
  require 'tensor_stream/opencl'
@@ -21,6 +22,7 @@ puts "Tensorstream version #{tf.__version__} with OpenCL lib #{TensorStream::Ope
21
22
  puts "downloading minst data"
22
23
  # Download images and labels into mnist.test (10K images+labels) and mnist.train (60K images+labels)
23
24
  mnist = Mnist.read_data_sets('/tmp/data', one_hot: true)
25
+
24
26
  puts "downloading finished"
25
27
 
26
28
  # neural network structure for this sample:
@@ -85,13 +87,10 @@ y3 = tf.nn.relu(tf.nn.conv2d(y2, w3, [1, stride, stride, 1], 'SAME') + b3)
85
87
  yy = tf.reshape(y3, [-1, 7 * 7 * M])
86
88
  y4 = tf.nn.relu(tf.matmul(yy, w4) + b4)
87
89
 
88
- # dropout to prevent overfitting
89
- yy4 = tf.nn.dropout(y4, pkeep)
90
-
91
- ylogits = tf.matmul(yy4, w5) + b5
90
+ ylogits = tf.matmul(y4, w5) + b5
92
91
 
93
92
  # model
94
- y = tf.nn.softmax(ylogits)
93
+ y = tf.nn.softmax(ylogits, name: 'out')
95
94
 
96
95
 
97
96
 
@@ -111,16 +110,21 @@ accuracy = tf.reduce_mean(tf.cast(is_correct, :float32))
111
110
  lr = 0.0001.t + tf.train.exponential_decay(0.003, step, 2000, 1/Math::E)
112
111
  train_step = TensorStream::Train::AdamOptimizer.new(lr).minimize(cross_entropy)
113
112
 
114
- sess = tf.session
113
+ sess = tf.session(profile_enabled: true)
115
114
  # Add ops to save and restore all the variables.
116
115
 
117
116
  init = tf.global_variables_initializer
118
117
 
119
118
  sess.run(init)
119
+
120
+ #Setup save and restore
121
+ model_save_path = "test_models/mnist_data_3.0"
122
+ saver = tf::Train::Saver.new
123
+ saver.restore(sess, model_save_path)
124
+
120
125
  mnist_train = mnist.train
121
126
  test_data = { x => mnist.test.images, y_ => mnist.test.labels, pkeep => 1.0 }
122
127
 
123
-
124
128
  (0..10001).each do |i|
125
129
  # load batch of images and correct answers
126
130
  batch_x, batch_y = mnist_train.next_batch(100)
@@ -130,7 +134,8 @@ test_data = { x => mnist.test.images, y_ => mnist.test.labels, pkeep => 1.0 }
130
134
  sess.run(train_step, feed_dict: train_data)
131
135
 
132
136
  if (i % 10 == 0)
133
- # File.write("profile.json", TensorStream::ReportTool.profile_for(sess).to_json)
137
+ # result = TensorStream::ReportTool.profile_for(sess)
138
+ # File.write("profile.csv", result.map(&:to_csv).join("\n"))
134
139
  # success? add code to print it
135
140
  a_train, c_train, l = sess.run([accuracy, cross_entropy, lr], feed_dict: { x => batch_x, y_ => batch_y, step => i, pkeep => 1.0})
136
141
  puts "#{i}: accuracy:#{a_train} loss:#{c_train} (lr:#{l})"
@@ -140,6 +145,9 @@ test_data = { x => mnist.test.images, y_ => mnist.test.labels, pkeep => 1.0 }
140
145
  # success on test data?
141
146
  a_test, c_test = sess.run([accuracy, cross_entropy], feed_dict: test_data, pkeep => 1.0)
142
147
  puts("#{i}: ******** test accuracy: #{a_test} test loss: #{c_test}")
148
+
149
+ # save current state of the model
150
+ save_path = saver.save(sess, model_save_path)
143
151
  end
144
152
  end
145
153
 
@@ -39,7 +39,7 @@ Gem::Specification.new do |spec|
39
39
  spec.add_development_dependency "awesome_print"
40
40
  spec.add_development_dependency "mnist-learn"
41
41
  spec.add_development_dependency "simplecov"
42
- spec.add_dependency "tensor_stream", "~> 0.9.8"
42
+ spec.add_dependency "tensor_stream", "1.0.0-rc1"
43
43
  spec.add_dependency "opencl_ruby_ffi"
44
44
  spec.add_dependency "oily_png"
45
45
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tensor_stream-opencl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joseph Dayo
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-11-25 00:00:00.000000000 Z
11
+ date: 2019-01-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -112,16 +112,16 @@ dependencies:
112
112
  name: tensor_stream
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
- - - "~>"
115
+ - - '='
116
116
  - !ruby/object:Gem::Version
117
- version: 0.9.8
117
+ version: 1.0.0.pre.rc1
118
118
  type: :runtime
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
- - - "~>"
122
+ - - '='
123
123
  - !ruby/object:Gem::Version
124
- version: 0.9.8
124
+ version: 1.0.0.pre.rc1
125
125
  - !ruby/object:Gem::Dependency
126
126
  name: opencl_ruby_ffi
127
127
  requirement: !ruby/object:Gem::Requirement
@@ -246,6 +246,9 @@ files:
246
246
  - lib/tensor_stream/opencl/opencl_evaluator.rb
247
247
  - lib/tensor_stream/opencl/opencl_template_helper.rb
248
248
  - lib/tensor_stream/opencl/version.rb
249
+ - samples/classify.rb
250
+ - samples/dump_mnist.rb
251
+ - samples/image_sort.rb
249
252
  - samples/iris.data
250
253
  - samples/iris.rb
251
254
  - samples/logistic_regression.rb