tensor_stream-opencl 0.2.4 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3e4aa123289372c651cd4da3e7c206abc4f9f67a551d4062180c5cf6555dc243
4
- data.tar.gz: 6517954207c85f56cd08b2892b0119d4bb7a35e2d4bd9b9cacc5d3c9ccfb9e42
3
+ metadata.gz: c0e8de1676b30c21f9529cdce9d3fee406cdf2945d54f890ae49c14c1329860e
4
+ data.tar.gz: 66932db63589eedcd6247083a27344bed8c80860b6d096d01fb5b46db1b53521
5
5
  SHA512:
6
- metadata.gz: 7f61d61be79dd1e06ebfdc77ed2dff9e717e0cdb292160fe20c9ca08693d867e1b0e0350c71db5d24feb4671a26e793f44d6b80762c384193c1985b6b1616376
7
- data.tar.gz: 72c32530717fac8ff947ce4b204535755134bde14e0f70d0d120ff101b5654843312186317cb480fd5e1c620a25328a3590b1f35193faf1d196e7ad631d169b0
6
+ metadata.gz: 5b905243d98976c94cb58dd443fc25f0b1e78bba689f20677ee3457dc44641d228642eb65adc4e58227d1feb0686fe29a3cabd8b25910f8e9c1aa0ef575ae8ff
7
+ data.tar.gz: 03ad5c5cd27ff058df206de8e109699fe0ef492e8ced1b97217cc0fe1ab7a4e43da5db45fed85b2e96b7bb35dc14465e39695b7995b3a1ccfcece7c050e0cae3
data/.gitignore CHANGED
@@ -1,6 +1,7 @@
1
1
  /.bundle/
2
2
  /.yardoc
3
3
  /_yardoc/
4
+ /test_images/
4
5
  /coverage/
5
6
  /doc/
6
7
  /pkg/
@@ -10,6 +11,8 @@ Gemfile.lock
10
11
  *.gem
11
12
  *.ckpt
12
13
  profile.json
14
+ profile.csv
15
+ /test_models/
13
16
 
14
17
  # rspec failure tracking
15
18
  .rspec_status
data/.rubocop.yml CHANGED
@@ -34,6 +34,7 @@ Metrics/CyclomaticComplexity:
34
34
  Metrics/BlockLength:
35
35
  Exclude:
36
36
  - lib/tensor_stream/math_gradients.rb
37
+ - benchmark/benchmark.rb
37
38
 
38
39
  Naming/AccessorMethodName:
39
40
  Exclude:
@@ -86,4 +87,8 @@ Style/TrailingCommaInHashLiteral:
86
87
  Naming/UncommunicativeMethodParamName:
87
88
  Exclude:
88
89
  - lib/tensor_stream/evaluator/ruby_evaluator.rb
89
- - lib/tensor_stream/ops.rb
90
+ - lib/tensor_stream/ops.rb
91
+
92
+ Style/BlockDelimiters:
93
+ Exclude:
94
+ - benchmark/benchmark.rb
@@ -103,26 +103,27 @@ module TensorStream
103
103
  end
104
104
  else
105
105
  raise TensorStream::ValueError, "#{num_split} does not divide #{value_shape[axis]} evenly" if num_split.reduce(:+) != value_shape[axis]
106
+
106
107
  # compute shapes of individual output buffers
107
108
  new_shapes = num_split.each_with_index.collect do |num, index|
108
109
  new_shape = value_shape.dup
109
110
  new_shape[axis] = num
110
111
  new_shape
111
112
  end
113
+ out = []
114
+
112
115
  if axis.zero? # axis zero fast copy path
113
116
  start = 0
114
- out = []
115
- new_shapes.each_with_index do |new_shape, index|
116
- element_count = new_shape.reduce(:*) || 1
117
+
118
+ new_shapes.each_with_index do |ns, index|
119
+ element_count = ns.reduce(:*) || 1
117
120
  region_size_in_bytes = element_count * value.buffer.element_size
118
- out << _create_variable_result_sub_buffer(value, index, start, region_size_in_bytes, tensor.data_type, new_shape, "#{tensor.name}/out_#{index}_#{new_shape.join('.')}")
121
+ out << _create_variable_result_sub_buffer(value, index, start, region_size_in_bytes, tensor.data_type, ns, "#{tensor.name}/out_#{index}_#{ns.join('.')}")
119
122
  start += region_size_in_bytes
120
123
  end
121
- out
122
124
  else
123
125
  # create buffers for each piece
124
126
  work_buffer = _create_result_buffer(tensor.data_type, value_shape, "#{tensor.name}/out")
125
- out = []
126
127
  start = 0
127
128
 
128
129
  steps = num_split.dup.reverse.drop(1).inject([0]) do |a, s|
@@ -157,14 +158,15 @@ module TensorStream
157
158
  event_wait_list: event_wait_list)
158
159
  end
159
160
  work_buffer.op = events
160
- new_shapes.each_with_index do |new_shape, index|
161
- element_count = new_shape.reduce(:*) || 1
161
+ new_shapes.each_with_index do |ns, index|
162
+ element_count = ns.reduce(:*) || 1
162
163
  region_size_in_bytes = element_count * work_buffer.buffer.element_size
163
- out << _create_variable_result_sub_buffer(work_buffer, index, start, region_size_in_bytes, tensor.data_type, new_shape, "#{tensor.name}/out_#{index}_#{new_shape.join('.')}")
164
+ out << _create_variable_result_sub_buffer(work_buffer, index, start, region_size_in_bytes, tensor.data_type, ns, "#{tensor.name}/out_#{index}_#{new_shape.join('.')}")
164
165
  start += region_size_in_bytes
165
166
  end
166
- out
167
167
  end
168
+
169
+ out
168
170
  end
169
171
 
170
172
  TensorStream::Evaluator::OutputGroup.new(outputs, outputs.map(&:data_type))
@@ -195,58 +197,57 @@ module TensorStream
195
197
 
196
198
  output_buffer = _create_result_buffer(tensor.data_type, new_shape, tensor.name)
197
199
  ops = if axis.zero? # fast path
198
- inputs.each_with_index.map do |input, index|
199
- next if input.empty_value?
200
-
201
- start = index * input.buffer.size * input.buffer.element_size
202
- region = [input.buffer.size * input.buffer.element_size, 1, 1]
203
- event_wait_list = build_event_wait_list(input)
204
- _opencl_queue.enqueue_copy_buffer_rect(input.cl_buffer, output_buffer.cl_buffer,
205
- region, dst_origin: [start, 0, 0], event_wait_list: event_wait_list)
206
- end.compact
207
- else
208
- elem_size = shape.empty? ? 1 : shape.reduce(:*)
209
- cl_n = OpenCL::Int1.new(elem_size)
200
+ inputs.each_with_index.map do |input, index|
201
+ next if input.empty_value?
210
202
 
211
- steps = inputs.map(&:shape).reverse.drop(1).inject([0]) do |a, shape|
212
- a << shape[axis] + a.last
213
- end
203
+ start = index * input.buffer.size * input.buffer.element_size
204
+ region = [input.buffer.size * input.buffer.element_size, 1, 1]
205
+ event_wait_list = build_event_wait_list(input)
206
+ _opencl_queue.enqueue_copy_buffer_rect(input.cl_buffer, output_buffer.cl_buffer,
207
+ region, dst_origin: [start, 0, 0], event_wait_list: event_wait_list)
208
+ end.compact
209
+ else
210
+ elem_size = shape.empty? ? 1 : shape.reduce(:*)
211
+ cl_n = OpenCL::Int1.new(elem_size)
214
212
 
215
- work_group = [elem_size]
216
- event_wait_list = build_event_wait_list(inputs)
213
+ steps = inputs.map(&:shape).reverse.drop(1).inject([0]) do |a, shape|
214
+ a << shape[axis] + a.last
215
+ end
216
+
217
+ work_group = [elem_size]
218
+ event_wait_list = build_event_wait_list(inputs)
219
+
220
+ inputs.each_with_index.map do |input, index|
221
+ cl_index = OpenCL::Int1.new(index)
222
+ step = OpenCL::Int1.new(steps[index])
223
+ _cl_program('concat', data_type: tensor.data_type, divisors: divisors, multipliers: multipliers, axis: axis).
224
+ concat(_opencl_queue, work_group, cl_n, cl_index, step, input.cl_buffer,
225
+ output_buffer.cl_buffer, event_wait_list: event_wait_list)
226
+ end
227
+ end
217
228
 
218
- inputs.each_with_index.map do |input, index|
219
- cl_index = OpenCL::Int1.new(index)
220
- step = OpenCL::Int1.new(steps[index])
221
- _cl_program('concat', data_type: tensor.data_type, divisors: divisors, multipliers: multipliers, axis: axis).
222
- concat(_opencl_queue, work_group, cl_n, cl_index, step, input.cl_buffer,
223
- output_buffer.cl_buffer, event_wait_list: event_wait_list)
224
- end
225
- end
226
229
  output_buffer.op = ops
227
230
  output_buffer
228
231
  end
229
232
 
230
- register_op :squeeze do |context, tensor, inputs|
233
+ register_op :squeeze do |_context, tensor, inputs|
231
234
  arr = inputs[0]
232
235
  shape = inputs[0].shape.dup
233
236
  axis = !tensor.options[:axis].is_a?(Array) ? [tensor.options[:axis]] : tensor.options[:axis]
234
237
  if !axis.empty?
235
- axis.each do |axis|
236
- if shape[axis] == 1
237
- shape[axis] = nil
238
- else
239
- raise TensorStream::ValueError, "unable to squeeze dimension that does not have a size of 1"
240
- end
238
+ axis.each do |x|
239
+ raise TensorStream::ValueError, "unable to squeeze dimension that does not have a size of 1" unless shape[x] == 1
240
+
241
+ shape[x] = nil
241
242
  end
242
243
  else
243
244
  shape = shape.map { |s| s == 1 ? nil : s }
244
245
  end
245
246
 
246
247
  OpenCLBuffer.new(self, name: tensor.name, data_type: tensor.data_type,
247
- shape: shape.compact, buffer: arr.buffer,
248
- cl_buffer: arr.cl_buffer,
249
- op: arr.op)
248
+ shape: shape.compact, buffer: arr.buffer,
249
+ cl_buffer: arr.cl_buffer,
250
+ op: arr.op)
250
251
  end
251
252
 
252
253
  register_op :stack do |_context, tensor, inputs|
@@ -312,7 +313,6 @@ module TensorStream
312
313
  a << s * a.last
313
314
  end.reverse
314
315
 
315
- step = multipliers[0]
316
316
  sub_shape = new_shape.dup
317
317
  sub_shape.shift
318
318
 
@@ -375,9 +375,9 @@ module TensorStream
375
375
  end
376
376
 
377
377
  OpenCLBuffer.new(self, name: tensor.name, data_type: tensor.data_type,
378
- shape: shape, buffer: arr.buffer,
379
- cl_buffer: arr.cl_buffer,
380
- op: arr.op)
378
+ shape: shape, buffer: arr.buffer,
379
+ cl_buffer: arr.cl_buffer,
380
+ op: arr.op)
381
381
  end
382
382
 
383
383
  register_op :transpose, buffer: true do |_context, tensor, inputs|
@@ -407,7 +407,10 @@ module TensorStream
407
407
 
408
408
  shape = input_a.shape
409
409
 
410
- slice_param = input_b.zip(size).collect.with_index { | p, index| p[1] = (p[1] == -1) ? shape[index] : p[1] ; p[0]..p[0] + p[1] - 1 }.reverse
410
+ slice_param = input_b.zip(size).collect.with_index do |p, index|
411
+ p[1] = p[1] == -1 ? shape[index] : p[1]
412
+ p[0]..p[0] + p[1] - 1
413
+ end.reverse
411
414
 
412
415
  new_buf = input_a.buffer.reshape(*input_a.shape.reverse)
413
416
  sliced = new_buf.slice[*slice_param]
@@ -423,11 +426,11 @@ module TensorStream
423
426
  if a.data_type != tensor.data_type
424
427
  buffer = _create_result_buffer(tensor.data_type, a.shape, tensor.name)
425
428
  work_group = if inputs[0].shape.size > 2
426
- [ inputs[0].shape.reduce(:*) / inputs[0].shape.last, inputs[0].shape.last]
427
- else
428
- m, n = inputs[0].shape
429
- [m || 1, n || 1]
430
- end
429
+ [inputs[0].shape.reduce(:*) / inputs[0].shape.last, inputs[0].shape.last]
430
+ else
431
+ m, n = inputs[0].shape
432
+ [m || 1, n || 1]
433
+ end
431
434
 
432
435
  cl_m = OpenCL::Int1.new(work_group[0])
433
436
  cl_n = OpenCL::Int1.new(work_group[1])
@@ -11,12 +11,12 @@ module TensorStream
11
11
 
12
12
  assign = tensor.inputs[0] || tensor
13
13
 
14
- assign.buffer.dirty = true # force buffer copy when variable is read externally
15
- output_buffer = assign.buffer
14
+ assign.container_buffer.dirty = true # force buffer copy when variable is read externally
15
+ output_buffer = assign.container_buffer
16
16
 
17
17
  work_group = [output_buffer.total_elements]
18
18
 
19
- event_wait_list = build_event_wait_list([assign.buffer, learning_rate, delta])
19
+ event_wait_list = build_event_wait_list([assign.container_buffer, learning_rate, delta])
20
20
 
21
21
  event = call_program("apply_gradient", output_buffer.data_type,
22
22
  work_group,
@@ -33,21 +33,21 @@ module TensorStream
33
33
 
34
34
  assign = tensor.inputs[0] || tensor
35
35
  assign_acc = tensor.inputs[1]
36
- assign.buffer.dirty = true # force buffer copy when variable is read externally
37
- assign_acc.buffer.dirty = true # force buffer copy when variable is read externally
36
+ assign.container_buffer.dirty = true # force buffer copy when variable is read externally
37
+ assign_acc.container_buffer.dirty = true # force buffer copy when variable is read externally
38
38
 
39
- output_buffer = assign.buffer
39
+ output_buffer = assign.container_buffer
40
40
 
41
41
  work_group = [output_buffer.total_elements]
42
42
 
43
- event_wait_list = build_event_wait_list([assign.buffer, assign_acc.buffer, learning_rate, grad, momentum])
43
+ event_wait_list = build_event_wait_list([assign.container_buffer, assign_acc.container_buffer, learning_rate, grad, momentum])
44
44
  method_call = :"apply_momentum_#{output_buffer.data_type}"
45
45
  event = _cl_program("apply_momentum", nesterov: tensor.options[:use_nesterov], dtype: output_buffer.data_type).
46
46
  send(method_call, _opencl_queue, work_group, grad.cl_buffer,
47
47
  learning_rate.cl_buffer, momentum.cl_buffer, output_buffer.cl_buffer,
48
- assign_acc.buffer.cl_buffer, event_wait_list: event_wait_list)
48
+ assign_acc.container_buffer.cl_buffer, event_wait_list: event_wait_list)
49
49
  output_buffer.op = event
50
- assign_acc.buffer.op = event
50
+ assign_acc.container_buffer.op = event
51
51
  output_buffer
52
52
  end
53
53
 
@@ -58,11 +58,11 @@ module TensorStream
58
58
  assign_acc_update = tensor.inputs[2]
59
59
 
60
60
  # mark variable buffers as dirty
61
- assign.buffer.dirty = true # force buffer copy when variable is read externally
62
- assign_acc.buffer.dirty = true # force buffer copy when variable is read externally
63
- assign_acc_update.buffer.dirty = true # force buffer copy when variable is read externally
61
+ assign.container_buffer.dirty = true # force buffer copy when variable is read externally
62
+ assign_acc.container_buffer.dirty = true # force buffer copy when variable is read externally
63
+ assign_acc_update.container_buffer.dirty = true # force buffer copy when variable is read externally
64
64
 
65
- output_buffer = assign.buffer
65
+ output_buffer = assign.container_buffer
66
66
 
67
67
  work_group = [output_buffer.total_elements]
68
68
 
@@ -73,13 +73,13 @@ module TensorStream
73
73
  rho.cl_buffer,
74
74
  epsilon.cl_buffer,
75
75
  grad.cl_buffer,
76
- assign.buffer.cl_buffer,
77
- assign_acc.buffer.cl_buffer,
78
- assign_acc_update.buffer.cl_buffer,
76
+ assign.container_buffer.cl_buffer,
77
+ assign_acc.container_buffer.cl_buffer,
78
+ assign_acc_update.container_buffer.cl_buffer,
79
79
  event_wait_list: event_wait_list)
80
80
  output_buffer.op = event
81
- assign_acc.buffer.op = event
82
- assign_acc_update.buffer.op = event
81
+ assign_acc.container_buffer.op = event
82
+ assign_acc_update.container_buffer.op = event
83
83
  output_buffer
84
84
  end
85
85
 
@@ -92,11 +92,11 @@ module TensorStream
92
92
  assign_v = tensor.inputs[2]
93
93
 
94
94
  # mark variable buffers as dirty
95
- assign.buffer.dirty = true # force buffer copy when variable is read externally
96
- assign_m.buffer.dirty = true # force buffer copy when variable is read externally
97
- assign_v.buffer.dirty = true # force buffer copy when variable is read externally
95
+ assign.container_buffer.dirty = true # force buffer copy when variable is read externally
96
+ assign_m.container_buffer.dirty = true # force buffer copy when variable is read externally
97
+ assign_v.container_buffer.dirty = true # force buffer copy when variable is read externally
98
98
 
99
- output_buffer = assign.buffer
99
+ output_buffer = assign.container_buffer
100
100
 
101
101
  work_group = [output_buffer.total_elements]
102
102
 
@@ -110,13 +110,13 @@ module TensorStream
110
110
  beta1_t.cl_buffer,
111
111
  beta2_t.cl_buffer,
112
112
  epsilon_t.cl_buffer,
113
- assign_m.buffer.cl_buffer,
114
- assign.buffer.cl_buffer,
115
- assign_v.buffer.cl_buffer,
113
+ assign_m.container_buffer.cl_buffer,
114
+ assign.container_buffer.cl_buffer,
115
+ assign_v.container_buffer.cl_buffer,
116
116
  event_wait_list: event_wait_list)
117
117
  output_buffer.op = event
118
- assign_m.buffer.op = event
119
- assign_v.buffer.op = event
118
+ assign_m.container_buffer.op = event
119
+ assign_v.container_buffer.op = event
120
120
  output_buffer
121
121
  end
122
122
 
@@ -126,9 +126,9 @@ module TensorStream
126
126
  assign = tensor.inputs[0] || tensor
127
127
  assign_acc = tensor.inputs[1]
128
128
 
129
- assign.buffer.dirty = true
130
- assign_acc.buffer.dirty = true
131
- output_buffer = assign.buffer
129
+ assign.container_buffer.dirty = true
130
+ assign_acc.container_buffer.dirty = true
131
+ output_buffer = assign.container_buffer
132
132
 
133
133
  work_group = [output_buffer.total_elements]
134
134
 
@@ -138,11 +138,11 @@ module TensorStream
138
138
  work_group,
139
139
  lr.cl_buffer,
140
140
  grad.cl_buffer,
141
- assign.buffer.cl_buffer,
142
- assign_acc.buffer.cl_buffer,
141
+ assign.container_buffer.cl_buffer,
142
+ assign_acc.container_buffer.cl_buffer,
143
143
  event_wait_list: event_wait_list)
144
144
  output_buffer.op = event
145
- assign_acc.buffer.op = event
145
+ assign_acc.container_buffer.op = event
146
146
  output_buffer
147
147
  end
148
148
 
@@ -154,11 +154,11 @@ module TensorStream
154
154
  assign_ms = tensor.inputs[2]
155
155
  assign_mom = tensor.inputs[3]
156
156
 
157
- assign.buffer.dirty = true
158
- assign_mg.buffer.dirty = true
159
- assign_ms.buffer.dirty = true
160
- assign_mom.buffer.dirty = true
161
- output_buffer = assign.buffer
157
+ assign.container_buffer.dirty = true
158
+ assign_mg.container_buffer.dirty = true
159
+ assign_ms.container_buffer.dirty = true
160
+ assign_mom.container_buffer.dirty = true
161
+ output_buffer = assign.container_buffer
162
162
  event_wait_list = build_event_wait_list(inputs)
163
163
  work_group = [output_buffer.total_elements]
164
164
 
@@ -168,30 +168,30 @@ module TensorStream
168
168
  momentum.cl_buffer,
169
169
  epsilon.cl_buffer,
170
170
  grad.cl_buffer,
171
- assign.buffer.cl_buffer,
172
- assign_ms.buffer.cl_buffer,
173
- assign_mg.buffer.cl_buffer,
174
- assign_mom.buffer.cl_buffer,
171
+ assign.container_buffer.cl_buffer,
172
+ assign_ms.container_buffer.cl_buffer,
173
+ assign_mg.container_buffer.cl_buffer,
174
+ assign_mom.container_buffer.cl_buffer,
175
175
  event_wait_list: event_wait_list)
176
176
 
177
177
  output_buffer.op = event
178
- assign_mg.buffer.op = event
179
- assign_ms.buffer.op = event
180
- assign_mom.buffer.op = event
178
+ assign_mg.container_buffer.op = event
179
+ assign_ms.container_buffer.op = event
180
+ assign_mom.container_buffer.op = event
181
181
  output_buffer
182
182
  end
183
183
 
184
- register_op :apply_rms_prop do |context, tensor, inputs|
184
+ register_op :apply_rms_prop do |_context, tensor, inputs|
185
185
  var, ms, mom, lr, rho, momentum, epsilon, grad = inputs
186
186
 
187
187
  assign = tensor.inputs[0]
188
188
  assign_ms = tensor.inputs[1]
189
189
  assign_mom = tensor.inputs[2]
190
190
 
191
- assign.buffer.dirty = true
192
- assign_ms.buffer.dirty = true
193
- assign_mom.buffer.dirty = true
194
- output_buffer = assign.buffer
191
+ assign.container_buffer.dirty = true
192
+ assign_ms.container_buffer.dirty = true
193
+ assign_mom.container_buffer.dirty = true
194
+ output_buffer = assign.container_buffer
195
195
  event_wait_list = build_event_wait_list(inputs)
196
196
  work_group = [output_buffer.total_elements]
197
197
 
@@ -202,14 +202,14 @@ module TensorStream
202
202
  momentum.cl_buffer,
203
203
  epsilon.cl_buffer,
204
204
  grad.cl_buffer,
205
- assign.buffer.cl_buffer,
206
- assign_ms.buffer.cl_buffer,
207
- assign_mom.buffer.cl_buffer,
205
+ assign.container_buffer.cl_buffer,
206
+ assign_ms.container_buffer.cl_buffer,
207
+ assign_mom.container_buffer.cl_buffer,
208
208
  event_wait_list: event_wait_list)
209
209
 
210
210
  output_buffer.op = event
211
- assign_ms.buffer.op = event
212
- assign_mom.buffer.op = event
211
+ assign_ms.container_buffer.op = event
212
+ assign_mom.container_buffer.op = event
213
213
  output_buffer
214
214
  end
215
215
 
@@ -273,7 +273,7 @@ module TensorStream
273
273
  output_buffer_backprop.op = event
274
274
 
275
275
  loss = reduction(context, tensor, output_buffer, rank, :sum)
276
- TensorStream::Evaluator::OutputGroup.new([loss, output_buffer_backprop], [tensor.inputs[0].data_type, tensor.inputs[0].data_type])
276
+ TensorStream::Evaluator::OutputGroup.new([loss, output_buffer_backprop], [tensor.inputs[0].data_type, tensor.inputs[0].data_type])
277
277
  end
278
278
 
279
279
  register_op :softmax_cross_entropy_with_logits_v2_grad do |_context, tensor, inputs|
@@ -370,6 +370,7 @@ module TensorStream
370
370
  raise TensorStream::ValueError, " Current implementation does not yet support strides in the batch and depth dimensions." if strides[0] != 1 || strides[3] != 1
371
371
 
372
372
  padding_option = tensor.options[:padding]
373
+
373
374
  padding = conv2d_padding_options(padding_option, filter_shape, height, width, height_stride, width_stride)
374
375
  event_wait_list = build_event_wait_list(inputs)
375
376
 
@@ -33,13 +33,18 @@ module TensorStream
33
33
  end
34
34
 
35
35
  if shape.empty?
36
- return buffer.to_s if data_type == :string
37
- return buffer[0] != 0 if data_type == :boolean
38
- return buffer[0]
36
+ return case data_type
37
+ when :string
38
+ buffer.to_s
39
+ when :boolean
40
+ buffer[0] != 0
41
+ else
42
+ buffer[0]
43
+ end
39
44
  end
40
-
41
- result = buffer.reshape(*shape.map(&:to_i).reverse).to_a
42
- data_type == :boolean ? process_function_op(result, ->(a, _b) { a != 0 }) : result
45
+
46
+ result = buffer.reshape(*shape.map(&:to_i).reverse).to_a
47
+ data_type == :boolean ? process_function_op(result) { |a, _b| a != 0 } : result
43
48
  end
44
49
 
45
50
  def self.nil_buffer(owner, name, data_type)
@@ -225,7 +225,6 @@ module TensorStream
225
225
  def prepare_input(tensor, context, options = {})
226
226
  return nil unless tensor
227
227
 
228
- tensor = resolve_placeholder(tensor)
229
228
  if options[:noop]
230
229
  tensor
231
230
  elsif options[:buffer]
@@ -329,30 +328,18 @@ module TensorStream
329
328
  tensor = tensor.call if tensor.is_a?(Proc)
330
329
 
331
330
  child_context = execution_context.dup
332
- res = if tensor.is_a?(Operation)
333
- if !on_same_device?(tensor) # tensor is on another device or evaluator
334
- perform_transition(tensor, tensor, @context[:_cache][:placement][tensor.name][1], execution_context)
335
- else
336
- eval_operation(tensor, child_context)
337
- end
338
- elsif tensor.is_a?(Variable)
339
- eval_variable(tensor, child_context)
340
- elsif tensor.is_a?(Placeholder)
341
- resolve_placeholder(tensor, child_context)
331
+ res = if !on_same_device?(tensor) # tensor is on another device or evaluator
332
+ perform_transition(tensor, tensor, @context[:_cache][:placement][tensor.name][1], execution_context)
333
+ elsif tensor.is_a?(Operation)
334
+ eval_operation(tensor, child_context)
342
335
  else
343
- eval_tensor(tensor, child_context)
336
+ raise "invalid tensor type!"
344
337
  end
338
+
345
339
  execution_context.deep_merge!(returns: child_context[:returns])
346
340
  res
347
341
  end
348
342
 
349
- def eval_variable(tensor, _child_context)
350
- raise "variable #{tensor.name} not initalized" if tensor.value.nil? && (tensor.buffer.nil? || !tensor.buffer.dirty)
351
-
352
- tensor.buffer = wrap_opencl(tensor, name: tensor.name) if tensor.buffer.nil?
353
- tensor.buffer
354
- end
355
-
356
343
  register_op :no_op do |_context, _tensor, _inputs|
357
344
  end
358
345
 
@@ -396,14 +383,14 @@ module TensorStream
396
383
  end
397
384
 
398
385
  %i[less less_equal greater greater_equal equal not_equal logical_and].each do |op|
399
- register_op op do |context, tensor, inputs|
386
+ register_op op do |_context, tensor, inputs|
400
387
  execute_2_operand_func(op.to_s, tensor, inputs[0], inputs[1], 'cond')
401
388
  end
402
389
  end
403
390
 
404
391
  register_op :where, noop: true do |context, tensor, inputs|
405
- pred = tensor.options[:pred]
406
- execute_cond_func('where', tensor, pred, inputs[0], inputs[1], context)
392
+ pred = inputs[0]
393
+ execute_cond_func('where', tensor, pred, inputs[1], inputs[2], context)
407
394
  end
408
395
 
409
396
  register_op :check_numerics, noop: true do |context, tensor, inputs|
@@ -455,10 +442,36 @@ module TensorStream
455
442
  nil
456
443
  end
457
444
 
445
+ register_op :const do |_context, tensor, inputs|
446
+ wrap_opencl(tensor.const_value, name: tensor.name, data_type: tensor.data_type)
447
+ end
448
+
458
449
  register_op :size do |_context, tensor, inputs|
459
450
  wrap_opencl(inputs[0].buffer.size, name: tensor.name, data_type: tensor.options[:out_type] || :int32)
460
451
  end
461
452
 
453
+ register_op :restore_ts do |context, tensor, inputs|
454
+ inputs = inputs.dup
455
+ filename = inputs.shift
456
+ tensor_names = inputs
457
+
458
+ filename = read_final_result(complete_eval(filename, context))
459
+ tensor_names.map! { |n| read_final_result(complete_eval(n, context)) }
460
+
461
+ input_dump = YAML.safe_load(File.read(filename), [Symbol])
462
+ vars = tensor.graph.get_collection(GraphKeys::GLOBAL_VARIABLES)
463
+
464
+ vars.select! { |v| input_dump['variables'].key?(v.name) && tensor_names.include?(v.name) }
465
+ vars.each do |variable|
466
+ data = TensorStream::Packer.unpack(Zlib::Inflate.inflate(Base64.decode64(input_dump['variables'][variable.name]['data'])), variable.data_type)
467
+ shape = input_dump['variables'][variable.name]['shape']
468
+ variable.buffer = convert_to_opencl(data, shape, data_type: variable.data_type, name: variable.name)
469
+ variable.value = TensorShape.reshape(data, shape)
470
+ end
471
+
472
+ nil
473
+ end
474
+
462
475
  def eval_operation(tensor, child_context)
463
476
  cache_key = "#{tensor.graph.object_id}_opencl_#{tensor.name}:#{object_id}"
464
477
  return @context[:_cache][cache_key] if @context[:_cache].key?(cache_key)
@@ -514,7 +527,7 @@ module TensorStream
514
527
  # File.write('/home/jedld/workspace/tensor_stream/samples/error.graphml', TensorStream::Graphml.new.get_string(tensor, @session))
515
528
 
516
529
  # File.write('/Users/josephemmanueldayo/workspace/gradients.graphml', TensorStream::Graphml.new.get_string(tensor, @session))
517
- raise EvaluatorExcecutionException.new(e, tensor), "error #{e.message} while evaluating #{tensor.name} : #{tensor.to_math(true, 1)} defined at #{tensor.source}"
530
+ raise EvaluatorExcecutionException.new(e, tensor), "error #{e.message} while evaluating #{tensor.name} : defined at #{tensor.source}"
518
531
  end
519
532
 
520
533
  def eval_tensor(tensor, child_context)
@@ -539,21 +552,21 @@ module TensorStream
539
552
  assign = tensor.inputs[0] || tensor
540
553
  buffer = complete_eval(b, child_context)
541
554
 
542
- if assign.buffer
543
- event_wait_list = build_event_wait_list([buffer, assign.buffer])
544
- assign.buffer.op = if assign.buffer.cl_buffer != buffer.cl_buffer
545
- _opencl_queue.enqueue_copy_buffer(buffer.cl_buffer, assign.buffer.cl_buffer, event_wait_list: event_wait_list)
546
- else
547
- buffer.op
548
- end
555
+ if assign.container_buffer
556
+ event_wait_list = build_event_wait_list([buffer, assign.container_buffer])
557
+ assign.container_buffer.op = if assign.container_buffer.cl_buffer != buffer.cl_buffer
558
+ _opencl_queue.enqueue_copy_buffer(buffer.cl_buffer, assign.container_buffer.cl_buffer, event_wait_list: event_wait_list)
559
+ else
560
+ buffer.op
561
+ end
549
562
  else
550
563
  value = read_final_result(buffer)
551
- assign.buffer = convert_to_opencl(value, buffer.shape, data_type: tensor.data_type, name: assign.name)
552
- assign.value = value
564
+ assign.options[:container].buffer = convert_to_opencl(value, buffer.shape, data_type: tensor.data_type, name: assign.name)
565
+ assign.options[:container].value = value
553
566
  end
554
567
 
555
- assign.buffer.dirty = true
556
- assign.buffer
568
+ assign.container_buffer.dirty = true
569
+ assign.container_buffer
557
570
  end
558
571
 
559
572
  def execute_2_operand_func(op_name, tensor, a, b, prog_name = nil)
@@ -572,7 +585,7 @@ module TensorStream
572
585
  [m || 1, n || 1]
573
586
  elsif (b.shape.size == 1) && (result_shape.last == b.shape.last)
574
587
  last_dim = b.shape.last
575
- [result_shape.reduce(:*) / last_dim, last_dim]
588
+ [result_shape.reduce(:*) / last_dim, last_dim]
576
589
  else
577
590
  raise "rank > 2 not supported for now"
578
591
  end
@@ -622,7 +635,7 @@ module TensorStream
622
635
  work_group = if p.shape.size > 2
623
636
  [m, p.shape.reduce(:*) / m]
624
637
  else
625
- [ m || 1, n || 1]
638
+ [m || 1, n || 1]
626
639
  end
627
640
 
628
641
  cl_m = OpenCL::Int1.new(work_group[0])
@@ -1,5 +1,5 @@
1
1
  module TensorStream
2
2
  module Opencl
3
- VERSION = "0.2.4"
3
+ VERSION = "0.2.5"
4
4
  end
5
5
  end
@@ -1,4 +1,5 @@
1
1
  require "tensor_stream/opencl/version"
2
+ require 'tensor_stream'
2
3
  require "tensor_stream/opencl/opencl_evaluator"
3
4
 
4
5
  module TensorStream
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "tensor_stream"
5
+ require 'mnist-learn'
6
+ require 'fileutils'
7
+
8
+ file_path = ARGV[0]
9
+ model_path = ARGV[1]
10
+
11
+ decoded_image = TensorStream.image.decode_png(File.read(file_path), channels: 1)
12
+ target_graph = TensorStream::YamlLoader.new.load_from_file(model_path)
13
+ input = target_graph['Placeholder']
14
+ output = TensorStream.argmax(target_graph['out'], 1)
15
+ sess = TensorStream.session
16
+
17
+ reshaped_image = 255.0.t - decoded_image.reshape([1, 28, 28, 1]).cast(:float32)
18
+ result = sess.run(output, feed_dict: { input => reshaped_image})
19
+
20
+ puts "image is a #{result.first}"
21
+
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "tensor_stream"
5
+ require 'mnist-learn'
6
+ require 'fileutils'
7
+
8
+ mnist = Mnist.read_data_sets('/tmp/data', one_hot: true)
9
+
10
+ ts = TensorStream
11
+ test_data = mnist.test.images
12
+ FileUtils.mkdir_p 'test_images'
13
+
14
+ sess = ts.session
15
+
16
+ test_data.each_with_index do |image , index|
17
+ image = 255.t - ts.cast(ts.reshape(image, [28, 28, 1]), :uint8) # reshape image
18
+ encoder = ts.image.encode_png(image)
19
+ blob = sess.run(encoder)
20
+ File.write(File.join('test_images', "#{index}_image.png"), blob)
21
+ end
@@ -0,0 +1,9 @@
1
+ require "bundler/setup"
2
+ require 'tensor_stream'
3
+ require 'mnist-learn'
4
+ require 'csv'
5
+
6
+ # Enable OpenCL hardware accelerated computation, not using OpenCL can be very slow
7
+ require 'tensor_stream/opencl'
8
+
9
+
@@ -54,18 +54,10 @@ b5 = tf.variable(tf.zeros([10]))
54
54
  x_ = tf.reshape(x, [-1, 784])
55
55
 
56
56
  y1 = tf.nn.relu(tf.matmul(x_, w1) + b1)
57
- y1d = tf.nn.dropout(y1, pkeep)
58
-
59
- y2 = tf.nn.relu(tf.matmul(y1d, w2) + b2)
60
- y2d = tf.nn.dropout(y2, pkeep)
61
-
62
- y3 = tf.nn.relu(tf.matmul(y2d, w3) + b3)
63
- y3d = tf.nn.dropout(y3, pkeep)
64
-
65
- y4 = tf.nn.relu(tf.matmul(y3d, w4) + b4)
66
- y4d = tf.nn.dropout(y4, pkeep)
67
-
68
- ylogits = tf.matmul(y4d, w5) + b5
57
+ y2 = tf.nn.relu(tf.matmul(y1, w2) + b2)
58
+ y3 = tf.nn.relu(tf.matmul(y2, w3) + b3)
59
+ y4 = tf.nn.relu(tf.matmul(y3, w4) + b4)
60
+ ylogits = tf.matmul(y4, w5) + b5
69
61
 
70
62
  # model
71
63
  y = tf.nn.softmax(ylogits)
@@ -10,6 +10,7 @@ require "bundler/setup"
10
10
  require 'tensor_stream'
11
11
  require 'mnist-learn'
12
12
  require 'pry-byebug'
13
+ require 'csv'
13
14
 
14
15
  # Enable OpenCL hardware accelerated computation, not using OpenCL can be very slow
15
16
  require 'tensor_stream/opencl'
@@ -21,6 +22,7 @@ puts "Tensorstream version #{tf.__version__} with OpenCL lib #{TensorStream::Ope
21
22
  puts "downloading minst data"
22
23
  # Download images and labels into mnist.test (10K images+labels) and mnist.train (60K images+labels)
23
24
  mnist = Mnist.read_data_sets('/tmp/data', one_hot: true)
25
+
24
26
  puts "downloading finished"
25
27
 
26
28
  # neural network structure for this sample:
@@ -85,13 +87,10 @@ y3 = tf.nn.relu(tf.nn.conv2d(y2, w3, [1, stride, stride, 1], 'SAME') + b3)
85
87
  yy = tf.reshape(y3, [-1, 7 * 7 * M])
86
88
  y4 = tf.nn.relu(tf.matmul(yy, w4) + b4)
87
89
 
88
- # dropout to prevent overfitting
89
- yy4 = tf.nn.dropout(y4, pkeep)
90
-
91
- ylogits = tf.matmul(yy4, w5) + b5
90
+ ylogits = tf.matmul(y4, w5) + b5
92
91
 
93
92
  # model
94
- y = tf.nn.softmax(ylogits)
93
+ y = tf.nn.softmax(ylogits, name: 'out')
95
94
 
96
95
 
97
96
 
@@ -111,16 +110,21 @@ accuracy = tf.reduce_mean(tf.cast(is_correct, :float32))
111
110
  lr = 0.0001.t + tf.train.exponential_decay(0.003, step, 2000, 1/Math::E)
112
111
  train_step = TensorStream::Train::AdamOptimizer.new(lr).minimize(cross_entropy)
113
112
 
114
- sess = tf.session
113
+ sess = tf.session(profile_enabled: true)
115
114
  # Add ops to save and restore all the variables.
116
115
 
117
116
  init = tf.global_variables_initializer
118
117
 
119
118
  sess.run(init)
119
+
120
+ #Setup save and restore
121
+ model_save_path = "test_models/mnist_data_3.0"
122
+ saver = tf::Train::Saver.new
123
+ saver.restore(sess, model_save_path)
124
+
120
125
  mnist_train = mnist.train
121
126
  test_data = { x => mnist.test.images, y_ => mnist.test.labels, pkeep => 1.0 }
122
127
 
123
-
124
128
  (0..10001).each do |i|
125
129
  # load batch of images and correct answers
126
130
  batch_x, batch_y = mnist_train.next_batch(100)
@@ -130,7 +134,8 @@ test_data = { x => mnist.test.images, y_ => mnist.test.labels, pkeep => 1.0 }
130
134
  sess.run(train_step, feed_dict: train_data)
131
135
 
132
136
  if (i % 10 == 0)
133
- # File.write("profile.json", TensorStream::ReportTool.profile_for(sess).to_json)
137
+ # result = TensorStream::ReportTool.profile_for(sess)
138
+ # File.write("profile.csv", result.map(&:to_csv).join("\n"))
134
139
  # success? add code to print it
135
140
  a_train, c_train, l = sess.run([accuracy, cross_entropy, lr], feed_dict: { x => batch_x, y_ => batch_y, step => i, pkeep => 1.0})
136
141
  puts "#{i}: accuracy:#{a_train} loss:#{c_train} (lr:#{l})"
@@ -140,6 +145,9 @@ test_data = { x => mnist.test.images, y_ => mnist.test.labels, pkeep => 1.0 }
140
145
  # success on test data?
141
146
  a_test, c_test = sess.run([accuracy, cross_entropy], feed_dict: test_data, pkeep => 1.0)
142
147
  puts("#{i}: ******** test accuracy: #{a_test} test loss: #{c_test}")
148
+
149
+ # save current state of the model
150
+ save_path = saver.save(sess, model_save_path)
143
151
  end
144
152
  end
145
153
 
@@ -39,7 +39,7 @@ Gem::Specification.new do |spec|
39
39
  spec.add_development_dependency "awesome_print"
40
40
  spec.add_development_dependency "mnist-learn"
41
41
  spec.add_development_dependency "simplecov"
42
- spec.add_dependency "tensor_stream", "~> 0.9.8"
42
+ spec.add_dependency "tensor_stream", "1.0.0-rc1"
43
43
  spec.add_dependency "opencl_ruby_ffi"
44
44
  spec.add_dependency "oily_png"
45
45
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tensor_stream-opencl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joseph Dayo
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-11-25 00:00:00.000000000 Z
11
+ date: 2019-01-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -112,16 +112,16 @@ dependencies:
112
112
  name: tensor_stream
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
- - - "~>"
115
+ - - '='
116
116
  - !ruby/object:Gem::Version
117
- version: 0.9.8
117
+ version: 1.0.0.pre.rc1
118
118
  type: :runtime
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
- - - "~>"
122
+ - - '='
123
123
  - !ruby/object:Gem::Version
124
- version: 0.9.8
124
+ version: 1.0.0.pre.rc1
125
125
  - !ruby/object:Gem::Dependency
126
126
  name: opencl_ruby_ffi
127
127
  requirement: !ruby/object:Gem::Requirement
@@ -246,6 +246,9 @@ files:
246
246
  - lib/tensor_stream/opencl/opencl_evaluator.rb
247
247
  - lib/tensor_stream/opencl/opencl_template_helper.rb
248
248
  - lib/tensor_stream/opencl/version.rb
249
+ - samples/classify.rb
250
+ - samples/dump_mnist.rb
251
+ - samples/image_sort.rb
249
252
  - samples/iris.data
250
253
  - samples/iris.rb
251
254
  - samples/logistic_regression.rb