tensor_stream-opencl 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 03fcb3bb50485dd601bf17b58f216209c86fb20aeb6c0b61b23144b5d644efaa
4
- data.tar.gz: 7b96f90b902ff747b74575be13015e52cfda0f4104273e14eca5bee90fc1a405
3
+ metadata.gz: 2f7c2e06a5711e3efc8503de82f4c836af70c3b0dfd6ce0f4790f0bb6d3abcb9
4
+ data.tar.gz: c103f23ba5d27f3a6356ed28b10966b8333f9fb3fabc203924ce357c4c0523c8
5
5
  SHA512:
6
- metadata.gz: 2916b8a053754bfd58594cef0680d79d12d38332d99d28a3cded71028c58b7a35ec3a6480b6b80f486aa2a59b617f89ee1b534bf00f51343e4827df250ff92f4
7
- data.tar.gz: 308153886efa111da2251b31f4a6b5d4ad610336681e3d94b1bb9b055cf8a7e97ad9460271a5ae14e738410a77fc75d8669636732909af2ff574f0e907cad44b
6
+ metadata.gz: 637ede65bf27b9ce06a755e344e58567c4d1e83e4831115e872d6f2ca0ff778f49f4d4e60af643a920fcf3a1b9033078b0c81f6e6e0f62f2e31f8f9ac4fee89b
7
+ data.tar.gz: af8482a75b98db484c074c2862d455709ed5563e596819ad445a0c502467c0b5189eef04abbf55060dcbc0640289ce839715b19b3332aa9c21217345726ac3f3
data/.gitignore CHANGED
@@ -8,6 +8,7 @@
8
8
  /tmp/
9
9
  Gemfile.lock
10
10
  *.gem
11
+ *.ckpt
11
12
 
12
13
  # rspec failure tracking
13
14
  .rspec_status
data/.rubocop.yml ADDED
@@ -0,0 +1,89 @@
1
+ AllCops:
2
+ Exclude:
3
+ - samples/*
4
+ - bin/*
5
+ - spec/**/*
6
+ - tensor_stream.gemspec
7
+ - Rakefile
8
+
9
+ Naming/AccessorMethodName:
10
+ Exclude:
11
+ - lib/tensor_stream/utils.rb
12
+
13
+ Style/StringLiterals:
14
+ Enabled: false
15
+
16
+ Layout/TrailingBlankLines:
17
+ Enabled: false
18
+
19
+ Metrics/LineLength:
20
+ Max: 200
21
+
22
+ Metrics/AbcSize:
23
+ Enabled: false
24
+
25
+ Metrics/PerceivedComplexity:
26
+ Enabled: false
27
+
28
+ Metrics/MethodLength:
29
+ Enabled: false
30
+
31
+ Metrics/CyclomaticComplexity:
32
+ Enabled: false
33
+
34
+ Metrics/BlockLength:
35
+ Exclude:
36
+ - lib/tensor_stream/math_gradients.rb
37
+
38
+ Naming/AccessorMethodName:
39
+ Exclude:
40
+ - lib/tensor_stream.rb
41
+ - lib/tensor_stream/control_flow.rb
42
+ - lib/tensor_stream/graph.rb
43
+ - lib/tensor_stream/operation.rb
44
+
45
+ Style/Documentation:
46
+ Exclude:
47
+ - lib/tensor_stream/version.rb
48
+ - lib/tensor_stream/trainer.rb
49
+ - lib/tensor_stream/nn/nn_ops.rb
50
+ - lib/tensor_stream/evaluator/evaluator.rb
51
+
52
+ Lint/UnusedMethodArgument:
53
+ Exclude:
54
+ - lib/tensor_stream/train/saver.rb
55
+ - lib/tensor_stream/ops.rb
56
+
57
+ Metrics/ParameterLists:
58
+ Max: 8
59
+
60
+ Style/PerlBackrefs:
61
+ Enabled: false
62
+
63
+ Style/RegexpLiteral:
64
+ Enabled: false
65
+
66
+ Naming/MemoizedInstanceVariableName:
67
+ Enabled: false
68
+
69
+ Metrics/ModuleLength:
70
+ Max: 200
71
+
72
+ Metrics/ClassLength:
73
+ Max: 250
74
+ Exclude:
75
+ - lib/tensor_stream/evaluator/ruby_evaluator.rb
76
+
77
+ Naming/VariableNumber:
78
+ Enabled: false
79
+
80
+ Style/DoubleNegation:
81
+ Enabled: false
82
+
83
+ Style/TrailingCommaInHashLiteral:
84
+ Enabled: false
85
+
86
+ Naming/UncommunicativeMethodParamName:
87
+ Exclude:
88
+ - lib/tensor_stream/evaluator/ruby_evaluator.rb
89
+ - lib/tensor_stream/ops.rb
@@ -4,6 +4,28 @@ module TensorStream
4
4
  module ArrayOps
5
5
  def ArrayOps.included(klass)
6
6
  klass.class_eval do
7
+
8
+ #fast cached 0/1 constant fill
9
+ register_op %i[zeros ones zeros_like ones_like] do |context, tensor, inputs|
10
+ shape = if %i[zeros_like ones_like].include?(tensor.operation)
11
+ inputs[0].shape
12
+ elsif !inputs[0].nil?
13
+ read_final_result(complete_eval(inputs[0], context))
14
+ else
15
+ tensor.shape.shape
16
+ end
17
+ cache_key = "cons_#{tensor.name}_#{tensor.data_type}_#{shape}"
18
+ @context[:_cache][:_cl_buffers][cache_key] ||= begin
19
+ buffer = allocate_narray_for_type(tensor.data_type, shape.reduce(:*) || 1)
20
+ if %i[zeros zeros_like].include?(tensor.operation)
21
+ buffer.fill!(0)
22
+ else
23
+ buffer.fill!(1)
24
+ end
25
+ convert_to_opencl(buffer, shape, data_type: tensor.data_type, name: tensor.name)
26
+ end
27
+ end
28
+
7
29
  register_op :expand_dims, buffer: true do |_context, tensor, inputs|
8
30
  axis = inputs[1].buffer[0]
9
31
  shape = inputs[0].shape.dup
@@ -17,8 +39,10 @@ module TensorStream
17
39
  shape = inputs[0]
18
40
  value = inputs[1]
19
41
 
20
- narray_size = shape.buffer.to_a.reduce(:*) || 1
21
- cl_buffer = get_cached_buffer(tensor.name, shape.buffer.to_a)
42
+ fill_shape = shape.nil? ? tensor.shape.shape : shape.buffer.to_a
43
+ narray_size = fill_shape.reduce(:*) || 1
44
+
45
+ cl_buffer = get_cached_buffer(tensor.name, fill_shape)
22
46
 
23
47
  buffer = if cl_buffer
24
48
  cl_buffer.buffer
@@ -27,7 +51,7 @@ module TensorStream
27
51
  end
28
52
 
29
53
  buffer.fill!(value.buffer[0])
30
- convert_to_opencl(buffer, shape.buffer.to_a, data_type: tensor.data_type, name: tensor.name)
54
+ convert_to_opencl(buffer, fill_shape, data_type: tensor.data_type, name: tensor.name)
31
55
  end
32
56
 
33
57
  register_op :split do |context, tensor, inputs|
@@ -119,7 +143,7 @@ module TensorStream
119
143
  piece_size = shape.reduce(:*) || 1
120
144
  work_group = [piece_size]
121
145
  cl_offset = OpenCL::Int1.new(offset)
122
-
146
+
123
147
  _cl_program('split_n', axis: axis,
124
148
  div: divisors,
125
149
  mul: multipliers,
@@ -218,7 +242,7 @@ module TensorStream
218
242
  shape = shape.map { |s| s == 1 ? nil : s }
219
243
  end
220
244
 
221
- OpenCLBuffer.new(name: tensor.name, data_type: tensor.data_type,
245
+ OpenCLBuffer.new(self, name: tensor.name, data_type: tensor.data_type,
222
246
  shape: shape.compact, buffer: arr.buffer,
223
247
  cl_buffer: arr.cl_buffer,
224
248
  op: arr.op)
@@ -350,7 +374,7 @@ module TensorStream
350
374
  TensorShape.fix_inferred_elements(new_shape, arr.buffer.size)
351
375
  end
352
376
 
353
- OpenCLBuffer.new(name: tensor.name, data_type: tensor.data_type,
377
+ OpenCLBuffer.new(self, name: tensor.name, data_type: tensor.data_type,
354
378
  shape: shape, buffer: arr.buffer,
355
379
  cl_buffer: arr.cl_buffer,
356
380
  op: arr.op)
@@ -12,9 +12,9 @@
12
12
  __global <%= c_dtype %> *output, __global <%= c_dtype %> *v) {
13
13
  // Get the index of the current element to be processed
14
14
  const int index = get_global_id(0);
15
- <%= c_dtype %> alpha = learning_rate[0] * sqrt(1.0 - beta2_power[0]) / (1.0 - beta1_power[0]);
15
+ <%= c_dtype %> alpha = learning_rate[0] * sqrt((<%= c_dtype %>)1.0 - beta2_power[0]) / (1.0 - beta1_power[0]);
16
16
 
17
17
  momentum[index] += (grad[index] - momentum[index]) * (1.0 - beta1[0]);
18
18
  v[index] += (grad[index] * grad[index] - v[index]) * (1.0 - beta2[0]);
19
- output[index] -= (momentum[index] * alpha) / ( sqrt(v[index]) + epsilon[0] );
19
+ output[index] -= (momentum[index] * alpha) / ( sqrt((<%= c_dtype %>)v[index]) + epsilon[0] );
20
20
  }
@@ -80,8 +80,10 @@ module TensorStream
80
80
 
81
81
  transpose_a = OpenCL::Int1.new(tensor.options[:transpose_a] ? 1 : 0)
82
82
  transpose_b = OpenCL::Int1.new(tensor.options[:transpose_b] ? 1 : 0)
83
- event_wait_list = build_event_wait_list(inputs)
83
+ event_wait_list = build_event_wait_list([a, b])
84
+
84
85
  output_buffer.op = _cl_program('gemm', dtype: dtype).send(:"gemm_#{dtype}", _opencl_queue, result_shape, cl_m, cl_n, cl_k, transpose_a, transpose_b, a.cl_buffer, b.cl_buffer, output_buffer.cl_buffer, event_wait_list: event_wait_list)
86
+
85
87
  output_buffer
86
88
  end
87
89
 
@@ -3,15 +3,16 @@ module TensorStream
3
3
  class OpenCLBuffer < Buffer
4
4
  include ArrayOpsHelper
5
5
 
6
- attr_accessor :shape, :buffer, :cl_buffer, :op
6
+ attr_accessor :shape, :buffer, :cl_buffer, :op, :owner
7
7
 
8
- def initialize(data_type:, shape:, buffer:, cl_buffer:, op: nil, name: nil)
8
+ def initialize(owner, data_type:, shape:, buffer:, cl_buffer:, op: nil, name: nil)
9
9
  @data_type = data_type
10
10
  @shape = shape
11
11
  @buffer = buffer
12
12
  @cl_buffer = cl_buffer
13
13
  @name = name
14
14
  @op = op
15
+ @owner = owner
15
16
  end
16
17
 
17
18
  def total_elements
@@ -38,7 +38,8 @@ module TensorStream
38
38
  # PURE ruby evaluator used for testing and development
39
39
  class OpenclEvaluator < BaseEvaluator
40
40
  attr_accessor :retain
41
- attr_reader :opencl_device
41
+ attr_reader :opencl_device, :opencl_context
42
+ attr_writer :context
42
43
 
43
44
  include TensorStream::OpHelper
44
45
  include TensorStream::ArrayOpsHelper
@@ -50,14 +51,14 @@ module TensorStream
50
51
 
51
52
  def initialize(session, device, thread_pool: nil, log_intermediates: false)
52
53
  super
53
- _create_opencl_context(device.native_device)
54
+ _create_opencl_context
54
55
  @opencl_device = device.native_device
55
56
  create_command_queue
56
57
  end
57
58
 
58
59
  def self.query_supported_devices
59
60
  devices = query_devices_with_score
60
- devices.sort { |a| a[1] }.reverse.map do |d|
61
+ devices.sort { |a, b| a[1] <=> b[1] }.map do |d|
61
62
  opencl_to_device(d)
62
63
  end
63
64
  end
@@ -68,16 +69,16 @@ module TensorStream
68
69
  opencl_to_device(platform_devices[[query[1].to_i, platform_devices.size - 1].min])
69
70
  end
70
71
 
71
- def self.opencl_to_device(d)
72
- device = d[0]
73
- index = d[3]
72
+ def self.opencl_to_device(dev)
73
+ device = dev[0]
74
+ index = dev[3]
74
75
  platform_name = device.platform.name.tr(' ', '_').downcase
75
76
  uri = [platform_name, index].join(':')
76
77
 
77
78
  device_type = device.type.to_s == 'GPU' ? :gpu : :cpu
78
79
 
79
- OpenclDevice.new(uri, device_type, self).tap do |devide|
80
- devide.native_device = device
80
+ OpenclDevice.new(uri, device_type, self).tap do |d|
81
+ d.native_device = device
81
82
  end
82
83
  end
83
84
 
@@ -85,14 +86,14 @@ module TensorStream
85
86
  # Select the best device available in the system for this evaluator
86
87
  def self.default_device
87
88
  devices = OpenclEvaluator.query_devices_with_score
88
- device = devices.sort { |a| a[1] }.reverse.first
89
+ device = devices.max { |a, b| a[1] <=> b[1] }
89
90
  opencl_to_device(device)
90
91
  end
91
92
 
92
93
  # opencl evaluator main entrypoint
93
94
  def run(tensor, execution_context)
94
- result = complete_eval(tensor, execution_context)
95
- # puts "wait finish"
95
+ result = complete_eval(tensor, execution_context)
96
+ # puts "-------------------wait finish------------------------"
96
97
  _opencl_queue.finish
97
98
  read_final_result(result)
98
99
  end
@@ -115,18 +116,22 @@ module TensorStream
115
116
  # buffer comes from non-opencl evaluator
116
117
  def convert_from_buffer(tensor, result)
117
118
  if result.buffer.is_a?(TensorStream::Evaluator::OutputGroup)
118
- converted_outputs = result.buffer.outputs.zip(result.buffer.data_types).map { |output, data_type| convert_to_opencl([output].flatten, shape_eval(output), data_type: data_type, name: tensor.name) }
119
+ converted_outputs = result.buffer.outputs.zip(result.buffer.data_types).map do |output, data_type|
120
+ convert_to_opencl([output].flatten, shape_eval(output), data_type: data_type, name: tensor.name)
121
+ end
119
122
  TensorStream::Evaluator::OutputGroup.new(converted_outputs, result.buffer.data_types)
120
123
  else
121
124
  convert_to_opencl([result.buffer].flatten, shape_eval(result.buffer), data_type: result.data_type, name: tensor.name)
122
125
  end
123
126
  end
124
127
 
128
+ # Generate OpenCL instruction to read back from GPU memory to Host memory for a tensor
125
129
  def enqueue_buffer_read(tensor, context)
126
130
  buffer = _run(tensor, context)
127
131
  if buffer.is_a?(Array)
128
132
  buffer.collect do |b|
129
133
  next b if b.buffer.size.zero?
134
+
130
135
  b.op = _opencl_queue.enqueue_read_buffer(b.cl_buffer, b.buffer, event_wait_list: build_event_wait_list([b]))
131
136
  b
132
137
  end
@@ -135,6 +140,7 @@ module TensorStream
135
140
  return buffer if buffer.nil?
136
141
  return [] if buffer.buffer.nil?
137
142
  return buffer if buffer.buffer.size.zero?
143
+
138
144
  buffer.op = _opencl_queue.enqueue_read_buffer(buffer.cl_buffer, buffer.buffer, event_wait_list: build_event_wait_list([buffer]))
139
145
  buffer
140
146
  end
@@ -145,7 +151,7 @@ module TensorStream
145
151
 
146
152
  buffer = enqueue_buffer_read(tensor, context)
147
153
  events = build_event_wait_list([buffer])
148
- # puts "wait #{tensor.name}"
154
+ # puts "** wait #{tensor.name} **"
149
155
  OpenCL.wait_for_events(events) unless events.empty?
150
156
  buffer
151
157
  end
@@ -154,6 +160,7 @@ module TensorStream
154
160
  OpenCL.platforms.flat_map do |p|
155
161
  p.devices.select { |d| d.available > 0 }.each_with_index.collect do |d, index|
156
162
  score = 0
163
+
157
164
  if d.type.to_s == 'CPU'
158
165
  score += 1
159
166
  elsif d.type.to_s == 'GPU'
@@ -162,8 +169,7 @@ module TensorStream
162
169
 
163
170
  score += 1000 if d.platform.name == 'NVIDIA CUDA'
164
171
 
165
- score += d.max_compute_units
166
- score += d.max_clock_frequency
172
+ score += d.max_compute_units * d.max_clock_frequency
167
173
 
168
174
  [d, score, p.name, index]
169
175
  end
@@ -172,6 +178,31 @@ module TensorStream
172
178
 
173
179
  protected
174
180
 
181
+ ##
182
+ # called when passing control to another evaluator
183
+ def perform_transition(tensor, input, next_evaluator, execution_context)
184
+ if next_evaluator.is_a?(OpenclEvaluator) # OpenCL but different device?
185
+ # create opencl buffer for this tensor
186
+ next_evaluator.context = @context
187
+
188
+ foreign_buffer = next_evaluator._run(input, execution_context)
189
+ event_list = build_event_wait_list([foreign_buffer])
190
+
191
+ output_buffer = _create_result_buffer(input.data_type, foreign_buffer.shape, "t_#{tensor.name}_#{input.name}")
192
+ output_buffer.op = if next_evaluator.opencl_context == @opencl_context
193
+ _opencl_queue.enqueue_copy_buffer(foreign_buffer.cl_buffer, output_buffer.cl_buffer, event_wait_list: event_list)
194
+ else
195
+ puts "wait finish transition ** #{input.name} **"
196
+ read_event = next_evaluator._opencl_queue.enqueue_read_buffer(foreign_buffer.cl_buffer, output_buffer.buffer, event_wait_list: event_list)
197
+ OpenCL.wait_for_events(read_event)
198
+ _opencl_queue.enqueue_write_buffer(output_buffer.cl_buffer, output_buffer.buffer)
199
+ end
200
+ output_buffer
201
+ else
202
+ super
203
+ end
204
+ end
205
+
175
206
  def prepare_input(tensor, context, options = {})
176
207
  return nil unless tensor
177
208
 
@@ -195,8 +226,19 @@ module TensorStream
195
226
  buffer.to_ruby
196
227
  end
197
228
 
198
- def _create_opencl_context(opencl_device)
199
- @opencl_context = OpenCL.create_context(opencl_device)
229
+ def _create_opencl_context(device = nil)
230
+ if device.nil?
231
+ @@global_opencl_context ||= begin
232
+ all_devices = OpenclEvaluator.query_supported_devices.map(&:native_device)
233
+ puts "global context created for #{all_devices}"
234
+ OpenCL.create_context(all_devices)
235
+ end
236
+
237
+ @opencl_context = @@global_opencl_context
238
+ else
239
+ puts "context created for #{device.native_device}"
240
+ @opencl_context = OpenCL.create_context(device.native_device)
241
+ end
200
242
  end
201
243
 
202
244
  def create_command_queue
@@ -205,6 +247,7 @@ module TensorStream
205
247
  properties = []
206
248
  properties << OpenCL::CommandQueue::PROFILING_ENABLE if supported_proprties.include?('PROFILING_ENABLE')
207
249
  properties << OpenCL::CommandQueue::OUT_OF_ORDER_EXEC_MODE_ENABLE if supported_proprties.include?('OUT_OF_ORDER_EXEC_MODE_ENABLE')
250
+ # puts "creating queue with properties #{supported_proprties}"
208
251
  @command_queue = _opencl_context.create_command_queue(opencl_device, properties: properties)
209
252
  end
210
253
 
@@ -222,28 +265,32 @@ module TensorStream
222
265
 
223
266
  def _cl_program(kernel, args = {})
224
267
  suffix = args.collect { |k, v| "#{k}.#{escape_arg_content(v)}" }.join('.')
225
- @context[:_cache]["_opencl_kernel_#{kernel}.#{suffix}:#{object_id}"] ||= begin
226
- file_path = File.join('/tmp', "#{kernel}.#{suffix}.cl")
227
- source = if File.exist?(file_path) && ENV['TS_OPENCL_FILE_CACHE']
228
- File.read(file_path)
229
- else
230
- filename = %w[cl.erb cl].map { |ext| cl_template_path(kernel, ext) }.find { |n| File.exist?(n) }
231
- raise "opencl kernel template for #{kernel} has not yet been defined" if filename.nil?
232
- source = File.read(filename)
233
- source = OpenclTemplateHelper.new(source).generate(args)
234
- File.write(file_path, source) if ENV['TS_OPENCL_FILE_CACHE']
235
- source
236
- end
237
- program = _opencl_context.create_program_with_source(source)
238
- program.build
239
- rescue OpenCL::Error::BUILD_PROGRAM_FAILURE => e
240
- puts "OpenCL Compile error: #{program.build_log}"
241
- raise e
242
- end
268
+ kernel_cache_key = "_opencl_kernel_#{kernel}.#{suffix}:#{object_id}"
269
+ @context[:_cache][kernel_cache_key] ||=
270
+ begin
271
+ # puts "building #{kernel_cache_key}"
272
+ file_path = File.join('/tmp', "#{kernel}.#{suffix}.cl")
273
+ source = if File.exist?(file_path) && ENV['TS_OPENCL_FILE_CACHE']
274
+ File.read(file_path)
275
+ else
276
+ filename = %w[cl.erb cl].map { |ext| cl_template_path(kernel, ext) }.find { |n| File.exist?(n) }
277
+ raise "opencl kernel template for #{kernel} has not yet been defined" if filename.nil?
278
+
279
+ source = File.read(filename)
280
+ source = OpenclTemplateHelper.new(source).generate(args)
281
+ File.write(file_path, source) if ENV['TS_OPENCL_FILE_CACHE']
282
+ source
283
+ end
284
+ program = _opencl_context.create_program_with_source(source)
285
+ program.build
286
+ rescue OpenCL::Error::BUILD_PROGRAM_FAILURE => e
287
+ puts "OpenCL Compile error: #{program.build_log}"
288
+ raise e
289
+ end
243
290
  end
244
291
 
245
292
  def escape_arg_content(value)
246
- return value.tr(' ','_') if value.is_a?(String)
293
+ return value.tr(' ', '_') if value.is_a?(String)
247
294
  return value.join('-') if value.is_a?(Array)
248
295
 
249
296
  value
@@ -257,9 +304,8 @@ module TensorStream
257
304
 
258
305
  child_context = execution_context.dup
259
306
  res = if tensor.is_a?(Operation)
260
- if !self.class.ops.include?(tensor.operation.to_sym)
261
- result = @session.delegate_to_evaluator(tensor, @context, execution_context)
262
- convert_from_buffer(tensor, result)
307
+ if !on_same_device?(tensor) # tensor is on another device or evaluator
308
+ perform_transition(tensor, tensor, @context[:_cache][:placement][tensor.name][1], execution_context)
263
309
  else
264
310
  eval_operation(tensor, child_context)
265
311
  end
@@ -295,7 +341,7 @@ module TensorStream
295
341
 
296
342
  register_op :identity do |context, tensor, inputs|
297
343
  value = inputs[0]
298
- buffer = OpenCLBuffer.new(name: tensor.name, data_type: tensor.data_type, shape: value.shape, buffer: value.buffer, cl_buffer: value.cl_buffer)
344
+ buffer = OpenCLBuffer.new(self, name: tensor.name, data_type: tensor.data_type, shape: value.shape, buffer: value.buffer, cl_buffer: value.cl_buffer)
299
345
  buffer.op = build_event_wait_list(inputs)
300
346
  buffer
301
347
  end
@@ -375,6 +421,7 @@ module TensorStream
375
421
 
376
422
  register_op :flow_group do |_context, _tensor, inputs|
377
423
  events = build_event_wait_list(inputs)
424
+ # puts "** wait for event flow_group**"
378
425
  OpenCL.wait_for_events(events) unless events.empty?
379
426
  nil
380
427
  end
@@ -387,8 +434,10 @@ module TensorStream
387
434
  cache_key = "#{tensor.graph.object_id}_opencl_#{tensor.name}:#{object_id}"
388
435
  return @context[:_cache][cache_key] if @context[:_cache].key?(cache_key)
389
436
  return @context[cache_key] if @context.key?(cache_key)
390
- # puts "opencl: #{tensor.name}"
437
+
438
+ # puts "opencl eval #{object_id} #{tensor.name}"
391
439
  invoke(tensor, child_context).tap do |result|
440
+ # puts "result done opencl #{object_id}: #{tensor.name}"
392
441
  if tensor.breakpoint
393
442
  a = resolve_placeholder(tensor.inputs[0], child_context) if tensor.inputs && tensor.inputs[0]
394
443
  b = resolve_placeholder(tensor.inputs[1], child_context) if tensor.inputs && tensor.inputs[1]
@@ -603,6 +652,7 @@ module TensorStream
603
652
  end
604
653
 
605
654
  def convert_to_opencl(value, shape, data_type: nil, name: nil)
655
+ # puts "convert_to_opencl called for #{name}"
606
656
  value = [value] if !value.is_a?(Array) && !value.is_a?(NArray)
607
657
 
608
658
  cache_key = "_cl_object_#{name}:#{shape.join('_')}:#{object_id}"
@@ -630,7 +680,7 @@ module TensorStream
630
680
  _opencl_context.create_buffer(cl_buffer_size * buffer.element_size)
631
681
  end
632
682
 
633
- @context[:_cache][cache_key] = OpenCLBuffer.new(name: name, data_type: data_type, shape: shape, buffer: buffer, cl_buffer: cl_buffer)
683
+ @context[:_cache][cache_key] = OpenCLBuffer.new(self, name: name, data_type: data_type, shape: shape, buffer: buffer, cl_buffer: cl_buffer)
634
684
  end
635
685
  if data_type == :string
636
686
  value[0].each_byte.with_index do |c, index|
@@ -664,15 +714,15 @@ module TensorStream
664
714
 
665
715
  def allocate_narray_for_type(data_type, narray_size)
666
716
  case data_type
667
- when :float, :float32
717
+ when :float, :float32, :float16
668
718
  NArray.sfloat(narray_size)
669
719
  when :float64
670
720
  NArray.float(narray_size)
671
- when :int, :int32, :int64
721
+ when :int, :int32, :int64, :uint64, :uint32 #NArray does not have 64 bit int types
672
722
  NArray.int(narray_size)
673
- when :int16
723
+ when :int16, :uint16
674
724
  NArray.sint(narray_size)
675
- when :uint8
725
+ when :uint8, :int8
676
726
  NArray.byte(narray_size)
677
727
  when :boolean
678
728
  NArray.byte(narray_size)
@@ -686,12 +736,14 @@ module TensorStream
686
736
  end
687
737
 
688
738
  def _create_result_buffer(data_type, shape, name)
689
- return OpenCLBuffer.new(name: name, data_type: data_type, shape: [0], buffer: nil, cl_buffer: nil) if shape == [0]
690
- @context[:_cache][:_cl_buffers]["_result_#{name}_#{shape.join('_')}:#{object_id}"] ||= begin
739
+ return OpenCLBuffer.new(self, name: name, data_type: data_type, shape: [0], buffer: nil, cl_buffer: nil) if shape == [0]
740
+ cache_key = "_result_#{name}_#{shape.join('_')}:#{object_id}"
741
+ @context[:_cache][:_cl_buffers][cache_key] ||= begin
742
+ # puts "create result buffer #{cache_key}"
691
743
  size = shape.empty? || shape == [0] ? 1 : shape.reduce(:*)
692
744
  buffer = allocate_narray_for_type(data_type, size)
693
745
  cl_buffer = _opencl_context.create_buffer(buffer.size * buffer.element_size)
694
- OpenCLBuffer.new(data_type: data_type, shape: shape, buffer: buffer, cl_buffer: cl_buffer, name: name)
746
+ OpenCLBuffer.new(self, data_type: data_type, shape: shape, buffer: buffer, cl_buffer: cl_buffer, name: name)
695
747
  end
696
748
  end
697
749
 
@@ -706,7 +758,7 @@ module TensorStream
706
758
  start = index * buffer.size * buffer.element_size
707
759
  region = OpenCL::BufferRegion::new(start, buffer.size * buffer.element_size)
708
760
  cl_buffer = parent_buffer.cl_buffer.create_sub_buffer(OpenCL::BUFFER_CREATE_TYPE_REGION, region)
709
- OpenCLBuffer.new(data_type: data_type, shape: shape, buffer: buffer, cl_buffer: cl_buffer, name: name)
761
+ OpenCLBuffer.new(self, data_type: data_type, shape: shape, buffer: buffer, cl_buffer: cl_buffer, name: name)
710
762
  else
711
763
  _create_result_buffer(tensor.data_type, shape, name)
712
764
  end
@@ -728,7 +780,7 @@ module TensorStream
728
780
 
729
781
  # create sub buffers of different sizes
730
782
  def _create_variable_result_sub_buffer(parent_buffer, index, start, region_size_in_bytes, data_type, shape, name)
731
- cache_key ="_sub_result_#{parent_buffer.object_id}_#{name}_#{index}:#{object_id}"
783
+ cache_key = "_sub_result_#{parent_buffer.object_id}_#{name}_#{index}:#{object_id}"
732
784
  @context[:_cache][:_cl_buffers][cache_key] ||= begin
733
785
  size = shape.empty? || shape == [0] ? 1 : shape.reduce(:*)
734
786
  buffer = allocate_narray_for_type(data_type, size)
@@ -736,7 +788,7 @@ module TensorStream
736
788
  if parent_buffer.cl_buffer.associated_memobject.nil?
737
789
  region = OpenCL::BufferRegion::new(start, region_size_in_bytes)
738
790
  cl_buffer = parent_buffer.cl_buffer.create_sub_buffer(OpenCL::BUFFER_CREATE_TYPE_REGION, region)
739
- OpenCLBuffer.new(data_type: data_type, shape: shape, buffer: buffer, cl_buffer: cl_buffer, name: "#{name}/sub")
791
+ OpenCLBuffer.new(self, data_type: data_type, shape: shape, buffer: buffer, cl_buffer: cl_buffer, name: "#{name}/sub")
740
792
  else
741
793
  _create_result_buffer(tensor.data_type, shape, name)
742
794
  end
@@ -806,6 +858,7 @@ module TensorStream
806
858
  convert_to_opencl(red, [], data_type: tensor.data_type, name: tensor.name)
807
859
  else
808
860
  return input if input.shape.empty?
861
+
809
862
  value = input.buffer.reshape(*input.shape.reverse)
810
863
  rank = input.shape.size - 1
811
864
 
@@ -862,17 +915,15 @@ module TensorStream
862
915
 
863
916
  def resolve_placeholder(placeholder, _execution_context = {})
864
917
  return nil if placeholder.nil?
918
+ return placeholder unless placeholder.is_a?(Placeholder)
865
919
 
866
- var = if placeholder.is_a?(Placeholder)
867
- @context[placeholder.name.to_sym].tap do |c|
868
- raise "missing placeholder #{placeholder.name}" if c.nil?
869
- end
870
- else
871
- placeholder
872
- end
920
+ var = @context[placeholder.name.to_sym]
921
+ raise "missing placeholder #{placeholder.name}" if var.nil?
873
922
 
874
- return convert_to_opencl(var, shape_eval(var), data_type: placeholder.data_type, name: placeholder.name) unless var.is_a?(Tensor)
875
- Tensor.cast_dtype(var, placeholder.data_type)
923
+ cache_key = "#{placeholder.graph.object_id}_opencl_#{placeholder.name}_p:#{object_id}"
924
+ @context[cache_key] ||= begin
925
+ convert_to_opencl(var, shape_eval(var), data_type: placeholder.data_type, name: placeholder.name) unless var.is_a?(Tensor)
926
+ end
876
927
  end
877
928
 
878
929
  def all_true?(arr)
@@ -32,10 +32,18 @@ class OpenclTemplateHelper
32
32
  case dtype.to_s
33
33
  when 'float64'
34
34
  'double'
35
- when 'float32', 'float'
35
+ when 'float32', 'float', 'float16'
36
36
  'float'
37
+ when 'uint32'
38
+ 'uint'
39
+ when 'int64'
40
+ 'int' # 'long' - NArray does not support 64bit int types
41
+ when 'uint64'
42
+ 'uint' # 'ulong' - NArray does not support 64bit int types
37
43
  when 'int32', 'int'
38
44
  'int'
45
+ when 'uint16'
46
+ 'ushort'
39
47
  when 'int16'
40
48
  'short'
41
49
  when 'uint8'
@@ -51,10 +59,12 @@ class OpenclTemplateHelper
51
59
  case dtype.to_s
52
60
  when 'float64'
53
61
  'DBL_MIN'
54
- when 'float32', 'float'
62
+ when 'float32', 'float', 'float16'
55
63
  'FLT_MIN'
56
64
  when 'int32', 'int'
57
65
  'INT_MIN'
66
+ when 'uint32', 'uint16'
67
+ '0'
58
68
  when 'int16'
59
69
  'SHRT_MIN'
60
70
  when 'int8'
@@ -1,5 +1,5 @@
1
1
  module TensorStream
2
2
  module Opencl
3
- VERSION = "0.2.1"
3
+ VERSION = "0.2.2"
4
4
  end
5
5
  end
data/samples/iris.rb CHANGED
@@ -48,8 +48,6 @@ x_test.each_with_index do |x, index|
48
48
  validation_cases << [x, y_test[index]]
49
49
  end
50
50
 
51
-
52
-
53
51
  def init_weights(shape)
54
52
  # Weight initialization
55
53
  weights = TensorStream.random_normal(shape, stddev: 0.1)
@@ -0,0 +1,99 @@
1
+ # A ruby port of the example code discussed by Martin Gorner in
2
+ # "TensorFlow and Deep Learning without a PhD, Part 1 (Google Cloud Next '17)""
3
+ #
4
+ # https://www.youtube.com/watch?v=u4alGiomYP4
5
+ #
6
+ # Requirements:
7
+ # mnist-learn gem
8
+ # opencl_ruby_ffi gem
9
+ require "bundler/setup"
10
+ require 'tensor_stream'
11
+ require 'mnist-learn'
12
+ require 'pry-byebug'
13
+
14
+ # Enable OpenCL hardware accelerated computation, not using OpenCL can be very slow
15
+ require 'tensor_stream/opencl'
16
+
17
+ tf = TensorStream
18
+
19
+ puts "Tensorstream version #{tf.__version__} with OpenCL lib #{TensorStream::Opencl::VERSION}"
20
+ tf.set_random_seed(0)
21
+
22
+ # Import MNIST data
23
+ puts "downloading minst data"
24
+ mnist = Mnist.read_data_sets('/tmp/data', one_hot: true)
25
+ puts "downloading finished"
26
+
27
+ x = tf.placeholder(:float32, shape: [nil, 784])
28
+
29
+ K = 200
30
+ L = 100
31
+ M = 60
32
+ N = 30
33
+
34
+
35
+ w1 = tf.variable(tf.random_normal([784, K]))
36
+ b1 = tf.variable(tf.zeros([K]))
37
+
38
+ w2 = tf.variable(tf.random_normal([K, L]))
39
+ b2 = tf.variable(tf.zeros([L]))
40
+
41
+ w3 = tf.variable(tf.random_normal([L, M]))
42
+ b3 = tf.variable(tf.zeros([M]))
43
+
44
+ w4 = tf.variable(tf.random_normal([M, N]))
45
+ b4 = tf.variable(tf.zeros([N]))
46
+
47
+ w5 = tf.variable(tf.random_normal([N, 10]))
48
+ b5 = tf.variable(tf.zeros([10]))
49
+
50
+ x_ = tf.reshape(x, [-1, 784])
51
+
52
+ y1 = tf.sigmoid(tf.matmul(x_, w1) + b1)
53
+ y2 = tf.sigmoid(tf.matmul(y1, w2) + b2)
54
+ y3 = tf.sigmoid(tf.matmul(y2, w3) + b3)
55
+ y4 = tf.sigmoid(tf.matmul(y3, w4) + b4)
56
+ ylogits = tf.matmul(y4, w5) + b5
57
+
58
+ # model
59
+ y = tf.nn.softmax(ylogits)
60
+
61
+ y_ = tf.placeholder(:float32, shape: [nil, 10])
62
+
63
+ # cross-entropy loss function (= -sum(Y_i * log(Yi)) ), normalised for batches of 100 images
64
+ # TensorFlow provides the softmax_cross_entropy_with_logits function to avoid numerical stability
65
+ # problems with log(0) which is NaN
66
+ cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits: ylogits, labels: y_)
67
+ cross_entropy = tf.reduce_mean(cross_entropy)*100
68
+
69
+ is_correct = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
70
+ accuracy = tf.reduce_mean(tf.cast(is_correct, :float32))
71
+
72
+ # training step, learning rate = 0.003
73
+ learning_rate = 0.003
74
+ train_step = TensorStream::Train::AdamOptimizer.new(learning_rate).minimize(cross_entropy)
75
+
76
+ sess = tf.session
77
+ init = tf.global_variables_initializer
78
+ sess.run(init)
79
+
80
+ mnist_train = mnist.train
81
+ test_data = { x => mnist.test.images, y_ => mnist.test.labels }
82
+
83
+ (0..10000).each do |i|
84
+ # load batch of images and correct answers
85
+ batch_x, batch_y = mnist_train.next_batch(100)
86
+ train_data = { x => batch_x, y_ => batch_y }
87
+
88
+ # train
89
+ sess.run(train_step, feed_dict: train_data)
90
+ if (i % 50 == 0)
91
+ # success? add code to print it
92
+ a_train, c_train = sess.run([accuracy, cross_entropy], feed_dict: train_data)
93
+
94
+ # success on test data?
95
+ a_test, c_test = sess.run([accuracy, cross_entropy], feed_dict: test_data)
96
+ puts "#{i} train accuracy #{a_train}, error #{c_train} test accuracy #{a_test}, error #{c_test}"
97
+ end
98
+ end
99
+
@@ -0,0 +1,98 @@
1
+ # A ruby port of the example code discussed by Martin Gorner in
2
+ # "TensorFlow and Deep Learning without a PhD, Part 1 (Google Cloud Next '17)""
3
+ #
4
+ # https://www.youtube.com/watch?v=u4alGiomYP4
5
+ #
6
+ # Requirements:
7
+ # mnist-learn gem
8
+ # opencl_ruby_ffi gem
9
+ require "bundler/setup"
10
+ require 'tensor_stream'
11
+ require 'mnist-learn'
12
+ require 'pry-byebug'
13
+
14
+ # Enable OpenCL hardware accelerated computation, not using OpenCL can be very slow
15
+ require 'tensor_stream/opencl'
16
+
17
+ tf = TensorStream
18
+
19
+ # Import MNIST data
20
+ puts "downloading minst data"
21
+ mnist = Mnist.read_data_sets('/tmp/data', one_hot: true)
22
+ puts "downloading finished"
23
+
24
+ x = tf.placeholder(:float32, shape: [nil, 784])
25
+
26
+ K = 200
27
+ L = 100
28
+ M = 60
29
+ N = 30
30
+
31
+
32
+ w1 = tf.variable(tf.random_normal([784, K]))
33
+ b1 = tf.variable(tf.zeros([K]))
34
+
35
+ w2 = tf.variable(tf.random_normal([K, L]))
36
+ b2 = tf.variable(tf.zeros([L]))
37
+
38
+ w3 = tf.variable(tf.random_normal([L, M]))
39
+ b3 = tf.variable(tf.zeros([M]))
40
+
41
+ w4 = tf.variable(tf.random_normal([M, N]))
42
+ b4 = tf.variable(tf.zeros([N]))
43
+
44
+ w5 = tf.variable(tf.random_normal([N, 10]))
45
+ b5 = tf.variable(tf.zeros([10]))
46
+
47
+ x_ = tf.reshape(x, [-1, 784])
48
+
49
+ y1 = tf.nn.relu(tf.matmul(x_, w1) + b1)
50
+ y2 = tf.nn.relu(tf.matmul(y1, w2) + b2)
51
+ y3 = tf.nn.relu(tf.matmul(y2, w3) + b3)
52
+ y4 = tf.nn.relu(tf.matmul(y3, w4) + b4)
53
+ ylogits = tf.matmul(y4, w5) + b5
54
+
55
+ # model
56
+ y = tf.nn.softmax(ylogits)
57
+
58
+ y_ = tf.placeholder(:float32, shape: [nil, 10])
59
+
60
+ # cross-entropy loss function (= -sum(Y_i * log(Yi)) ), normalised for batches of 100 images
61
+ # TensorFlow provides the softmax_cross_entropy_with_logits function to avoid numerical stability
62
+ # problems with log(0) which is NaN
63
+ cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits: ylogits, labels: y_)
64
+ cross_entropy = tf.reduce_mean(cross_entropy)*100
65
+
66
+ is_correct = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
67
+ accuracy = tf.reduce_mean(tf.cast(is_correct, :float32))
68
+
69
+ # training step, learning rate = 0.003
70
+ learning_rate = 0.003
71
+ train_step = TensorStream::Train::AdamOptimizer.new(learning_rate).minimize(cross_entropy)
72
+
73
+ sess = tf.session
74
+ # Add ops to save and restore all the variables.
75
+ saver = tf::Train::Saver.new
76
+ init = tf.global_variables_initializer
77
+
78
+ sess.run(init)
79
+ mnist_train = mnist.train
80
+ test_data = { x => mnist.test.images, y_ => mnist.test.labels }
81
+
82
+ (0..1000).each do |i|
83
+ # load batch of images and correct answers
84
+ batch_x, batch_y = mnist_train.next_batch(100)
85
+ train_data = { x => batch_x, y_ => batch_y }
86
+
87
+ # train
88
+ sess.run(train_step, feed_dict: train_data)
89
+ if (i % 50 == 0)
90
+ # success? add code to print it
91
+ a_train, c_train = sess.run([accuracy, cross_entropy], feed_dict: train_data)
92
+
93
+ # success on test data?
94
+ a_test, c_test = sess.run([accuracy, cross_entropy], feed_dict: test_data)
95
+ puts "#{i} train accuracy #{a_train}, error #{c_train} test accuracy #{a_test}, error #{c_test}"
96
+ end
97
+ end
98
+
data/samples/multigpu.rb CHANGED
@@ -11,7 +11,6 @@ DIMEN = 1024
11
11
  A = ts.random_uniform([DIMEN, DIMEN]).eval
12
12
  B = ts.random_uniform([DIMEN, DIMEN]).eval
13
13
 
14
-
15
14
  # Create a graph to store results
16
15
  c1 = []
17
16
  c2 = []
@@ -35,17 +34,24 @@ sum = ts.device('/device:GPU:0') do
35
34
  ts.add_n(c1)
36
35
  end
37
36
 
38
- t1_1 = Time.now.to_i
37
+ t1_1 = nil
39
38
  t2_1 = nil
40
-
41
- ts.session(log_device_placement: true) do |sess|
39
+ puts "===================== starting single GPU test ================"
40
+ ts.session(log_device_placement: true, profile_enabled: true) do |sess|
41
+ puts "-- warmup ---"
42
+ sess.run(sum, feed_dict: { a => A, b => B}) # warmup
43
+ puts "-- warmup ---"
44
+ time = Time.now
45
+ t1_1 = time.to_i * (10 ** 9) + time.nsec
42
46
  sess.run(sum, feed_dict: { a => A, b => B})
43
- t2_1 = Time.now.to_i
47
+ time = Time.now
48
+ t2_1 = time.to_i * (10 ** 9) + time.nsec
44
49
  end
45
-
50
+ puts "===================== end single GPU test ================"
51
+ puts "===================== MULTI GPU text ================"
46
52
  # Multi GPU computing
47
53
  # GPU:0 computes A^n
48
- ts.device('/device:GPU:1') do
54
+ ts.device('/device:GPU:0') do
49
55
  a = ts.placeholder(:float32, shape: [DIMEN, DIMEN])
50
56
  c2 << matpow(a, n)
51
57
  end
@@ -56,18 +62,26 @@ ts.device('/device:GPU:1') do
56
62
  c2 << matpow(b, n)
57
63
  end
58
64
 
59
- ts.device('/device:GPU:1') do
65
+ ts.device('/device:GPU:0') do
60
66
  sum = ts.add_n(c2) #Addition of all elements in c2, i.e. A^n + B^n
61
67
  end
62
68
 
63
- t1_2 = Time.now.to_i
69
+ t1_2 = nil
64
70
  t2_2 = nil
65
- ts.session(log_device_placement:true) do |sess|
71
+
72
+ ts.session(log_device_placement: true, profile_enabled: true) do |sess|
66
73
  # Run the op.
74
+ puts "-- warmup ---"
75
+ sess.run(sum, feed_dict: {a => A, b => B}) # warm up
76
+ puts "-- warmup ---"
77
+ time = Time.now
78
+ t1_2 = time.to_i * (10 ** 9) + time.nsec
79
+ puts "================ starting multiGPU test ==============="
67
80
  sess.run(sum, feed_dict: {a => A, b => B})
68
- t2_2 = Time.now.to_i
81
+ time = Time.now
82
+ t2_2 = time.to_i * (10 ** 9) + time.nsec
69
83
  end
70
84
 
71
85
 
72
- print("Single GPU computation time: " + (t2_1-t1_1).to_s)
73
- print("Multi GPU computation time: " + (t2_2-t1_2).to_s)
86
+ puts("Single GPU computation time: " + ((t2_1-t1_1)/ 1000000.to_f).to_s)
87
+ puts("Multi GPU computation time: " + ((t2_2-t1_2)/ 1000000.to_f).to_s)
@@ -38,7 +38,7 @@ Gem::Specification.new do |spec|
38
38
  spec.add_development_dependency "pry-byebug"
39
39
  spec.add_development_dependency "awesome_print"
40
40
  spec.add_development_dependency "mnist-learn"
41
- spec.add_dependency "tensor_stream", "~> 0.9.0"
41
+ spec.add_dependency "tensor_stream", "~> 0.9.2"
42
42
  spec.add_dependency "opencl_ruby_ffi"
43
43
  spec.add_dependency "oily_png"
44
44
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tensor_stream-opencl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joseph Dayo
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-10-08 00:00:00.000000000 Z
11
+ date: 2018-10-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: 0.9.0
103
+ version: 0.9.2
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: 0.9.0
110
+ version: 0.9.2
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: opencl_ruby_ffi
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -145,10 +145,10 @@ extra_rdoc_files: []
145
145
  files:
146
146
  - ".gitignore"
147
147
  - ".rspec"
148
+ - ".rubocop.yml"
148
149
  - ".travis.yml"
149
150
  - CODE_OF_CONDUCT.md
150
151
  - Gemfile
151
- - Gemfile.lock
152
152
  - LICENSE.txt
153
153
  - README.md
154
154
  - Rakefile
@@ -226,7 +226,8 @@ files:
226
226
  - lib/tensor_stream/opencl/version.rb
227
227
  - samples/iris.data
228
228
  - samples/iris.rb
229
- - samples/mnist_data.rb
229
+ - samples/mnist_data_2.1.rb
230
+ - samples/mnist_data_2.2.rb
230
231
  - samples/multigpu.rb
231
232
  - samples/nearest_neighbor.rb
232
233
  - samples/rnn.rb
data/Gemfile.lock DELETED
@@ -1,70 +0,0 @@
1
- PATH
2
- remote: .
3
- specs:
4
- tensor_stream-opencl (0.2.1)
5
- oily_png
6
- opencl_ruby_ffi
7
- tensor_stream (~> 0.9.0)
8
-
9
- GEM
10
- remote: https://rubygems.org/
11
- specs:
12
- awesome_print (1.8.0)
13
- byebug (10.0.2)
14
- chunky_png (1.3.10)
15
- coderay (1.1.2)
16
- concurrent-ruby (1.0.5)
17
- deep_merge (1.2.1)
18
- diff-lcs (1.3)
19
- ffi (1.9.25)
20
- method_source (0.9.0)
21
- mnist-learn (0.1.1)
22
- narray (0.6.1.2)
23
- narray_ffi (1.4.4)
24
- ffi (~> 1.9, >= 1.9.3)
25
- narray (~> 0.6, >= 0.6.0.8)
26
- oily_png (1.2.1)
27
- chunky_png (~> 1.3.7)
28
- opencl_ruby_ffi (1.3.4)
29
- ffi (~> 1.9, >= 1.9.3)
30
- narray (~> 0.6, >= 0.6.0.8)
31
- narray_ffi (~> 1.0, >= 1.0.0)
32
- pry (0.11.3)
33
- coderay (~> 1.1.0)
34
- method_source (~> 0.9.0)
35
- pry-byebug (3.6.0)
36
- byebug (~> 10.0)
37
- pry (~> 0.10)
38
- rake (10.5.0)
39
- rspec (3.8.0)
40
- rspec-core (~> 3.8.0)
41
- rspec-expectations (~> 3.8.0)
42
- rspec-mocks (~> 3.8.0)
43
- rspec-core (3.8.0)
44
- rspec-support (~> 3.8.0)
45
- rspec-expectations (3.8.1)
46
- diff-lcs (>= 1.2.0, < 2.0)
47
- rspec-support (~> 3.8.0)
48
- rspec-mocks (3.8.0)
49
- diff-lcs (>= 1.2.0, < 2.0)
50
- rspec-support (~> 3.8.0)
51
- rspec-support (3.8.0)
52
- tensor_stream (0.9.0)
53
- chunky_png
54
- concurrent-ruby
55
- deep_merge
56
-
57
- PLATFORMS
58
- ruby
59
-
60
- DEPENDENCIES
61
- awesome_print
62
- bundler (~> 1.16)
63
- mnist-learn
64
- pry-byebug
65
- rake (~> 10.0)
66
- rspec (~> 3.0)
67
- tensor_stream-opencl!
68
-
69
- BUNDLED WITH
70
- 1.16.2
@@ -1,65 +0,0 @@
1
- # A ruby port of the example code discussed by Martin Gorner in
2
- # "TensorFlow and Deep Learning without a PhD, Part 1 (Google Cloud Next '17)""
3
- #
4
- # https://www.youtube.com/watch?v=u4alGiomYP4
5
- #
6
- # Requirements:
7
- # mnist-learn gem
8
- # opencl_ruby_ffi gem
9
- require "bundler/setup"
10
- require 'tensor_stream'
11
- require 'mnist-learn'
12
-
13
- # Enable OpenCL hardware accelerated computation, not using OpenCL can be very slow
14
- # require 'tensor_stream/opencl'
15
-
16
- tf = TensorStream
17
-
18
- # Import MNIST data
19
- puts "downloading minst data"
20
- mnist = Mnist.read_data_sets('/tmp/data', one_hot: true)
21
- puts "downloading finished"
22
-
23
- x = tf.placeholder(:float32, shape: [nil, 784])
24
- w = tf.variable(tf.zeros([784, 10]))
25
- b = tf.variable(tf.zeros([10]))
26
-
27
-
28
-
29
- # model
30
- y = tf.nn.softmax(tf.matmul(tf.reshape(x, [-1, 784]), w) + b)
31
-
32
- y_ = tf.placeholder(:float32, shape: [nil, 10])
33
-
34
- # loss function
35
- cross_entropy = -tf.reduce_sum(y_ * tf.log(y))
36
-
37
- is_correct = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
38
- accuracy = tf.reduce_mean(tf.cast(is_correct, :float32))
39
-
40
- optimizer = TensorStream::Train::AdamOptimizer.new
41
- train_step = optimizer.minimize(cross_entropy)
42
-
43
- sess = tf.session
44
- init = tf.global_variables_initializer
45
- sess.run(init)
46
-
47
- (0...1000).each do |i|
48
- # load batch of images and correct answers
49
- batch_x, batch_y = mnist.train.next_batch(100)
50
- train_data = { x => batch_x, y_ => batch_y }
51
-
52
- # train
53
- sess.run(train_step, feed_dict: train_data)
54
- if (i % 10 == 0)
55
- # success? add code to print it
56
- a, c = sess.run([accuracy, cross_entropy], feed_dict: train_data)
57
- puts "#{i} train accuracy #{a}, error #{c}"
58
-
59
- # success on test data?
60
- test_data = { x => mnist.test.images, y_ => mnist.test.labels }
61
- a, c = sess.run([accuracy, cross_entropy], feed_dict: test_data)
62
- puts " test accuracy #{a}, error #{c}"
63
- end
64
- end
65
-