tensor_stream-opencl 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 03fcb3bb50485dd601bf17b58f216209c86fb20aeb6c0b61b23144b5d644efaa
4
- data.tar.gz: 7b96f90b902ff747b74575be13015e52cfda0f4104273e14eca5bee90fc1a405
3
+ metadata.gz: 2f7c2e06a5711e3efc8503de82f4c836af70c3b0dfd6ce0f4790f0bb6d3abcb9
4
+ data.tar.gz: c103f23ba5d27f3a6356ed28b10966b8333f9fb3fabc203924ce357c4c0523c8
5
5
  SHA512:
6
- metadata.gz: 2916b8a053754bfd58594cef0680d79d12d38332d99d28a3cded71028c58b7a35ec3a6480b6b80f486aa2a59b617f89ee1b534bf00f51343e4827df250ff92f4
7
- data.tar.gz: 308153886efa111da2251b31f4a6b5d4ad610336681e3d94b1bb9b055cf8a7e97ad9460271a5ae14e738410a77fc75d8669636732909af2ff574f0e907cad44b
6
+ metadata.gz: 637ede65bf27b9ce06a755e344e58567c4d1e83e4831115e872d6f2ca0ff778f49f4d4e60af643a920fcf3a1b9033078b0c81f6e6e0f62f2e31f8f9ac4fee89b
7
+ data.tar.gz: af8482a75b98db484c074c2862d455709ed5563e596819ad445a0c502467c0b5189eef04abbf55060dcbc0640289ce839715b19b3332aa9c21217345726ac3f3
data/.gitignore CHANGED
@@ -8,6 +8,7 @@
8
8
  /tmp/
9
9
  Gemfile.lock
10
10
  *.gem
11
+ *.ckpt
11
12
 
12
13
  # rspec failure tracking
13
14
  .rspec_status
data/.rubocop.yml ADDED
@@ -0,0 +1,89 @@
1
+ AllCops:
2
+ Exclude:
3
+ - samples/*
4
+ - bin/*
5
+ - spec/**/*
6
+ - tensor_stream.gemspec
7
+ - Rakefile
8
+
9
+ Naming/AccessorMethodName:
10
+ Exclude:
11
+ - lib/tensor_stream/utils.rb
12
+
13
+ Style/StringLiterals:
14
+ Enabled: false
15
+
16
+ Layout/TrailingBlankLines:
17
+ Enabled: false
18
+
19
+ Metrics/LineLength:
20
+ Max: 200
21
+
22
+ Metrics/AbcSize:
23
+ Enabled: false
24
+
25
+ Metrics/PerceivedComplexity:
26
+ Enabled: false
27
+
28
+ Metrics/MethodLength:
29
+ Enabled: false
30
+
31
+ Metrics/CyclomaticComplexity:
32
+ Enabled: false
33
+
34
+ Metrics/BlockLength:
35
+ Exclude:
36
+ - lib/tensor_stream/math_gradients.rb
37
+
38
+ Naming/AccessorMethodName:
39
+ Exclude:
40
+ - lib/tensor_stream.rb
41
+ - lib/tensor_stream/control_flow.rb
42
+ - lib/tensor_stream/graph.rb
43
+ - lib/tensor_stream/operation.rb
44
+
45
+ Style/Documentation:
46
+ Exclude:
47
+ - lib/tensor_stream/version.rb
48
+ - lib/tensor_stream/trainer.rb
49
+ - lib/tensor_stream/nn/nn_ops.rb
50
+ - lib/tensor_stream/evaluator/evaluator.rb
51
+
52
+ Lint/UnusedMethodArgument:
53
+ Exclude:
54
+ - lib/tensor_stream/train/saver.rb
55
+ - lib/tensor_stream/ops.rb
56
+
57
+ Metrics/ParameterLists:
58
+ Max: 8
59
+
60
+ Style/PerlBackrefs:
61
+ Enabled: false
62
+
63
+ Style/RegexpLiteral:
64
+ Enabled: false
65
+
66
+ Naming/MemoizedInstanceVariableName:
67
+ Enabled: false
68
+
69
+ Metrics/ModuleLength:
70
+ Max: 200
71
+
72
+ Metrics/ClassLength:
73
+ Max: 250
74
+ Exclude:
75
+ - lib/tensor_stream/evaluator/ruby_evaluator.rb
76
+
77
+ Naming/VariableNumber:
78
+ Enabled: false
79
+
80
+ Style/DoubleNegation:
81
+ Enabled: false
82
+
83
+ Style/TrailingCommaInHashLiteral:
84
+ Enabled: false
85
+
86
+ Naming/UncommunicativeMethodParamName:
87
+ Exclude:
88
+ - lib/tensor_stream/evaluator/ruby_evaluator.rb
89
+ - lib/tensor_stream/ops.rb
@@ -4,6 +4,28 @@ module TensorStream
4
4
  module ArrayOps
5
5
  def ArrayOps.included(klass)
6
6
  klass.class_eval do
7
+
8
+ #fast cached 0/1 constant fill
9
+ register_op %i[zeros ones zeros_like ones_like] do |context, tensor, inputs|
10
+ shape = if %i[zeros_like ones_like].include?(tensor.operation)
11
+ inputs[0].shape
12
+ elsif !inputs[0].nil?
13
+ read_final_result(complete_eval(inputs[0], context))
14
+ else
15
+ tensor.shape.shape
16
+ end
17
+ cache_key = "cons_#{tensor.name}_#{tensor.data_type}_#{shape}"
18
+ @context[:_cache][:_cl_buffers][cache_key] ||= begin
19
+ buffer = allocate_narray_for_type(tensor.data_type, shape.reduce(:*) || 1)
20
+ if %i[zeros zeros_like].include?(tensor.operation)
21
+ buffer.fill!(0)
22
+ else
23
+ buffer.fill!(1)
24
+ end
25
+ convert_to_opencl(buffer, shape, data_type: tensor.data_type, name: tensor.name)
26
+ end
27
+ end
28
+
7
29
  register_op :expand_dims, buffer: true do |_context, tensor, inputs|
8
30
  axis = inputs[1].buffer[0]
9
31
  shape = inputs[0].shape.dup
@@ -17,8 +39,10 @@ module TensorStream
17
39
  shape = inputs[0]
18
40
  value = inputs[1]
19
41
 
20
- narray_size = shape.buffer.to_a.reduce(:*) || 1
21
- cl_buffer = get_cached_buffer(tensor.name, shape.buffer.to_a)
42
+ fill_shape = shape.nil? ? tensor.shape.shape : shape.buffer.to_a
43
+ narray_size = fill_shape.reduce(:*) || 1
44
+
45
+ cl_buffer = get_cached_buffer(tensor.name, fill_shape)
22
46
 
23
47
  buffer = if cl_buffer
24
48
  cl_buffer.buffer
@@ -27,7 +51,7 @@ module TensorStream
27
51
  end
28
52
 
29
53
  buffer.fill!(value.buffer[0])
30
- convert_to_opencl(buffer, shape.buffer.to_a, data_type: tensor.data_type, name: tensor.name)
54
+ convert_to_opencl(buffer, fill_shape, data_type: tensor.data_type, name: tensor.name)
31
55
  end
32
56
 
33
57
  register_op :split do |context, tensor, inputs|
@@ -119,7 +143,7 @@ module TensorStream
119
143
  piece_size = shape.reduce(:*) || 1
120
144
  work_group = [piece_size]
121
145
  cl_offset = OpenCL::Int1.new(offset)
122
-
146
+
123
147
  _cl_program('split_n', axis: axis,
124
148
  div: divisors,
125
149
  mul: multipliers,
@@ -218,7 +242,7 @@ module TensorStream
218
242
  shape = shape.map { |s| s == 1 ? nil : s }
219
243
  end
220
244
 
221
- OpenCLBuffer.new(name: tensor.name, data_type: tensor.data_type,
245
+ OpenCLBuffer.new(self, name: tensor.name, data_type: tensor.data_type,
222
246
  shape: shape.compact, buffer: arr.buffer,
223
247
  cl_buffer: arr.cl_buffer,
224
248
  op: arr.op)
@@ -350,7 +374,7 @@ module TensorStream
350
374
  TensorShape.fix_inferred_elements(new_shape, arr.buffer.size)
351
375
  end
352
376
 
353
- OpenCLBuffer.new(name: tensor.name, data_type: tensor.data_type,
377
+ OpenCLBuffer.new(self, name: tensor.name, data_type: tensor.data_type,
354
378
  shape: shape, buffer: arr.buffer,
355
379
  cl_buffer: arr.cl_buffer,
356
380
  op: arr.op)
@@ -12,9 +12,9 @@
12
12
  __global <%= c_dtype %> *output, __global <%= c_dtype %> *v) {
13
13
  // Get the index of the current element to be processed
14
14
  const int index = get_global_id(0);
15
- <%= c_dtype %> alpha = learning_rate[0] * sqrt(1.0 - beta2_power[0]) / (1.0 - beta1_power[0]);
15
+ <%= c_dtype %> alpha = learning_rate[0] * sqrt((<%= c_dtype %>)1.0 - beta2_power[0]) / (1.0 - beta1_power[0]);
16
16
 
17
17
  momentum[index] += (grad[index] - momentum[index]) * (1.0 - beta1[0]);
18
18
  v[index] += (grad[index] * grad[index] - v[index]) * (1.0 - beta2[0]);
19
- output[index] -= (momentum[index] * alpha) / ( sqrt(v[index]) + epsilon[0] );
19
+ output[index] -= (momentum[index] * alpha) / ( sqrt((<%= c_dtype %>)v[index]) + epsilon[0] );
20
20
  }
@@ -80,8 +80,10 @@ module TensorStream
80
80
 
81
81
  transpose_a = OpenCL::Int1.new(tensor.options[:transpose_a] ? 1 : 0)
82
82
  transpose_b = OpenCL::Int1.new(tensor.options[:transpose_b] ? 1 : 0)
83
- event_wait_list = build_event_wait_list(inputs)
83
+ event_wait_list = build_event_wait_list([a, b])
84
+
84
85
  output_buffer.op = _cl_program('gemm', dtype: dtype).send(:"gemm_#{dtype}", _opencl_queue, result_shape, cl_m, cl_n, cl_k, transpose_a, transpose_b, a.cl_buffer, b.cl_buffer, output_buffer.cl_buffer, event_wait_list: event_wait_list)
86
+
85
87
  output_buffer
86
88
  end
87
89
 
@@ -3,15 +3,16 @@ module TensorStream
3
3
  class OpenCLBuffer < Buffer
4
4
  include ArrayOpsHelper
5
5
 
6
- attr_accessor :shape, :buffer, :cl_buffer, :op
6
+ attr_accessor :shape, :buffer, :cl_buffer, :op, :owner
7
7
 
8
- def initialize(data_type:, shape:, buffer:, cl_buffer:, op: nil, name: nil)
8
+ def initialize(owner, data_type:, shape:, buffer:, cl_buffer:, op: nil, name: nil)
9
9
  @data_type = data_type
10
10
  @shape = shape
11
11
  @buffer = buffer
12
12
  @cl_buffer = cl_buffer
13
13
  @name = name
14
14
  @op = op
15
+ @owner = owner
15
16
  end
16
17
 
17
18
  def total_elements
@@ -38,7 +38,8 @@ module TensorStream
38
38
  # PURE ruby evaluator used for testing and development
39
39
  class OpenclEvaluator < BaseEvaluator
40
40
  attr_accessor :retain
41
- attr_reader :opencl_device
41
+ attr_reader :opencl_device, :opencl_context
42
+ attr_writer :context
42
43
 
43
44
  include TensorStream::OpHelper
44
45
  include TensorStream::ArrayOpsHelper
@@ -50,14 +51,14 @@ module TensorStream
50
51
 
51
52
  def initialize(session, device, thread_pool: nil, log_intermediates: false)
52
53
  super
53
- _create_opencl_context(device.native_device)
54
+ _create_opencl_context
54
55
  @opencl_device = device.native_device
55
56
  create_command_queue
56
57
  end
57
58
 
58
59
  def self.query_supported_devices
59
60
  devices = query_devices_with_score
60
- devices.sort { |a| a[1] }.reverse.map do |d|
61
+ devices.sort { |a, b| a[1] <=> b[1] }.map do |d|
61
62
  opencl_to_device(d)
62
63
  end
63
64
  end
@@ -68,16 +69,16 @@ module TensorStream
68
69
  opencl_to_device(platform_devices[[query[1].to_i, platform_devices.size - 1].min])
69
70
  end
70
71
 
71
- def self.opencl_to_device(d)
72
- device = d[0]
73
- index = d[3]
72
+ def self.opencl_to_device(dev)
73
+ device = dev[0]
74
+ index = dev[3]
74
75
  platform_name = device.platform.name.tr(' ', '_').downcase
75
76
  uri = [platform_name, index].join(':')
76
77
 
77
78
  device_type = device.type.to_s == 'GPU' ? :gpu : :cpu
78
79
 
79
- OpenclDevice.new(uri, device_type, self).tap do |devide|
80
- devide.native_device = device
80
+ OpenclDevice.new(uri, device_type, self).tap do |d|
81
+ d.native_device = device
81
82
  end
82
83
  end
83
84
 
@@ -85,14 +86,14 @@ module TensorStream
85
86
  # Select the best device available in the system for this evaluator
86
87
  def self.default_device
87
88
  devices = OpenclEvaluator.query_devices_with_score
88
- device = devices.sort { |a| a[1] }.reverse.first
89
+ device = devices.max { |a, b| a[1] <=> b[1] }
89
90
  opencl_to_device(device)
90
91
  end
91
92
 
92
93
  # opencl evaluator main entrypoint
93
94
  def run(tensor, execution_context)
94
- result = complete_eval(tensor, execution_context)
95
- # puts "wait finish"
95
+ result = complete_eval(tensor, execution_context)
96
+ # puts "-------------------wait finish------------------------"
96
97
  _opencl_queue.finish
97
98
  read_final_result(result)
98
99
  end
@@ -115,18 +116,22 @@ module TensorStream
115
116
  # buffer comes from non-opencl evaluator
116
117
  def convert_from_buffer(tensor, result)
117
118
  if result.buffer.is_a?(TensorStream::Evaluator::OutputGroup)
118
- converted_outputs = result.buffer.outputs.zip(result.buffer.data_types).map { |output, data_type| convert_to_opencl([output].flatten, shape_eval(output), data_type: data_type, name: tensor.name) }
119
+ converted_outputs = result.buffer.outputs.zip(result.buffer.data_types).map do |output, data_type|
120
+ convert_to_opencl([output].flatten, shape_eval(output), data_type: data_type, name: tensor.name)
121
+ end
119
122
  TensorStream::Evaluator::OutputGroup.new(converted_outputs, result.buffer.data_types)
120
123
  else
121
124
  convert_to_opencl([result.buffer].flatten, shape_eval(result.buffer), data_type: result.data_type, name: tensor.name)
122
125
  end
123
126
  end
124
127
 
128
+ # Generate OpenCL instruction to read back from GPU memory to Host memory for a tensor
125
129
  def enqueue_buffer_read(tensor, context)
126
130
  buffer = _run(tensor, context)
127
131
  if buffer.is_a?(Array)
128
132
  buffer.collect do |b|
129
133
  next b if b.buffer.size.zero?
134
+
130
135
  b.op = _opencl_queue.enqueue_read_buffer(b.cl_buffer, b.buffer, event_wait_list: build_event_wait_list([b]))
131
136
  b
132
137
  end
@@ -135,6 +140,7 @@ module TensorStream
135
140
  return buffer if buffer.nil?
136
141
  return [] if buffer.buffer.nil?
137
142
  return buffer if buffer.buffer.size.zero?
143
+
138
144
  buffer.op = _opencl_queue.enqueue_read_buffer(buffer.cl_buffer, buffer.buffer, event_wait_list: build_event_wait_list([buffer]))
139
145
  buffer
140
146
  end
@@ -145,7 +151,7 @@ module TensorStream
145
151
 
146
152
  buffer = enqueue_buffer_read(tensor, context)
147
153
  events = build_event_wait_list([buffer])
148
- # puts "wait #{tensor.name}"
154
+ # puts "** wait #{tensor.name} **"
149
155
  OpenCL.wait_for_events(events) unless events.empty?
150
156
  buffer
151
157
  end
@@ -154,6 +160,7 @@ module TensorStream
154
160
  OpenCL.platforms.flat_map do |p|
155
161
  p.devices.select { |d| d.available > 0 }.each_with_index.collect do |d, index|
156
162
  score = 0
163
+
157
164
  if d.type.to_s == 'CPU'
158
165
  score += 1
159
166
  elsif d.type.to_s == 'GPU'
@@ -162,8 +169,7 @@ module TensorStream
162
169
 
163
170
  score += 1000 if d.platform.name == 'NVIDIA CUDA'
164
171
 
165
- score += d.max_compute_units
166
- score += d.max_clock_frequency
172
+ score += d.max_compute_units * d.max_clock_frequency
167
173
 
168
174
  [d, score, p.name, index]
169
175
  end
@@ -172,6 +178,31 @@ module TensorStream
172
178
 
173
179
  protected
174
180
 
181
+ ##
182
+ # called when passing control to another evaluator
183
+ def perform_transition(tensor, input, next_evaluator, execution_context)
184
+ if next_evaluator.is_a?(OpenclEvaluator) # OpenCL but different device?
185
+ # create opencl buffer for this tensor
186
+ next_evaluator.context = @context
187
+
188
+ foreign_buffer = next_evaluator._run(input, execution_context)
189
+ event_list = build_event_wait_list([foreign_buffer])
190
+
191
+ output_buffer = _create_result_buffer(input.data_type, foreign_buffer.shape, "t_#{tensor.name}_#{input.name}")
192
+ output_buffer.op = if next_evaluator.opencl_context == @opencl_context
193
+ _opencl_queue.enqueue_copy_buffer(foreign_buffer.cl_buffer, output_buffer.cl_buffer, event_wait_list: event_list)
194
+ else
195
+ puts "wait finish transition ** #{input.name} **"
196
+ read_event = next_evaluator._opencl_queue.enqueue_read_buffer(foreign_buffer.cl_buffer, output_buffer.buffer, event_wait_list: event_list)
197
+ OpenCL.wait_for_events(read_event)
198
+ _opencl_queue.enqueue_write_buffer(output_buffer.cl_buffer, output_buffer.buffer)
199
+ end
200
+ output_buffer
201
+ else
202
+ super
203
+ end
204
+ end
205
+
175
206
  def prepare_input(tensor, context, options = {})
176
207
  return nil unless tensor
177
208
 
@@ -195,8 +226,19 @@ module TensorStream
195
226
  buffer.to_ruby
196
227
  end
197
228
 
198
- def _create_opencl_context(opencl_device)
199
- @opencl_context = OpenCL.create_context(opencl_device)
229
+ def _create_opencl_context(device = nil)
230
+ if device.nil?
231
+ @@global_opencl_context ||= begin
232
+ all_devices = OpenclEvaluator.query_supported_devices.map(&:native_device)
233
+ puts "global context created for #{all_devices}"
234
+ OpenCL.create_context(all_devices)
235
+ end
236
+
237
+ @opencl_context = @@global_opencl_context
238
+ else
239
+ puts "context created for #{device.native_device}"
240
+ @opencl_context = OpenCL.create_context(device.native_device)
241
+ end
200
242
  end
201
243
 
202
244
  def create_command_queue
@@ -205,6 +247,7 @@ module TensorStream
205
247
  properties = []
206
248
  properties << OpenCL::CommandQueue::PROFILING_ENABLE if supported_proprties.include?('PROFILING_ENABLE')
207
249
  properties << OpenCL::CommandQueue::OUT_OF_ORDER_EXEC_MODE_ENABLE if supported_proprties.include?('OUT_OF_ORDER_EXEC_MODE_ENABLE')
250
+ # puts "creating queue with properties #{supported_proprties}"
208
251
  @command_queue = _opencl_context.create_command_queue(opencl_device, properties: properties)
209
252
  end
210
253
 
@@ -222,28 +265,32 @@ module TensorStream
222
265
 
223
266
  def _cl_program(kernel, args = {})
224
267
  suffix = args.collect { |k, v| "#{k}.#{escape_arg_content(v)}" }.join('.')
225
- @context[:_cache]["_opencl_kernel_#{kernel}.#{suffix}:#{object_id}"] ||= begin
226
- file_path = File.join('/tmp', "#{kernel}.#{suffix}.cl")
227
- source = if File.exist?(file_path) && ENV['TS_OPENCL_FILE_CACHE']
228
- File.read(file_path)
229
- else
230
- filename = %w[cl.erb cl].map { |ext| cl_template_path(kernel, ext) }.find { |n| File.exist?(n) }
231
- raise "opencl kernel template for #{kernel} has not yet been defined" if filename.nil?
232
- source = File.read(filename)
233
- source = OpenclTemplateHelper.new(source).generate(args)
234
- File.write(file_path, source) if ENV['TS_OPENCL_FILE_CACHE']
235
- source
236
- end
237
- program = _opencl_context.create_program_with_source(source)
238
- program.build
239
- rescue OpenCL::Error::BUILD_PROGRAM_FAILURE => e
240
- puts "OpenCL Compile error: #{program.build_log}"
241
- raise e
242
- end
268
+ kernel_cache_key = "_opencl_kernel_#{kernel}.#{suffix}:#{object_id}"
269
+ @context[:_cache][kernel_cache_key] ||=
270
+ begin
271
+ # puts "building #{kernel_cache_key}"
272
+ file_path = File.join('/tmp', "#{kernel}.#{suffix}.cl")
273
+ source = if File.exist?(file_path) && ENV['TS_OPENCL_FILE_CACHE']
274
+ File.read(file_path)
275
+ else
276
+ filename = %w[cl.erb cl].map { |ext| cl_template_path(kernel, ext) }.find { |n| File.exist?(n) }
277
+ raise "opencl kernel template for #{kernel} has not yet been defined" if filename.nil?
278
+
279
+ source = File.read(filename)
280
+ source = OpenclTemplateHelper.new(source).generate(args)
281
+ File.write(file_path, source) if ENV['TS_OPENCL_FILE_CACHE']
282
+ source
283
+ end
284
+ program = _opencl_context.create_program_with_source(source)
285
+ program.build
286
+ rescue OpenCL::Error::BUILD_PROGRAM_FAILURE => e
287
+ puts "OpenCL Compile error: #{program.build_log}"
288
+ raise e
289
+ end
243
290
  end
244
291
 
245
292
  def escape_arg_content(value)
246
- return value.tr(' ','_') if value.is_a?(String)
293
+ return value.tr(' ', '_') if value.is_a?(String)
247
294
  return value.join('-') if value.is_a?(Array)
248
295
 
249
296
  value
@@ -257,9 +304,8 @@ module TensorStream
257
304
 
258
305
  child_context = execution_context.dup
259
306
  res = if tensor.is_a?(Operation)
260
- if !self.class.ops.include?(tensor.operation.to_sym)
261
- result = @session.delegate_to_evaluator(tensor, @context, execution_context)
262
- convert_from_buffer(tensor, result)
307
+ if !on_same_device?(tensor) # tensor is on another device or evaluator
308
+ perform_transition(tensor, tensor, @context[:_cache][:placement][tensor.name][1], execution_context)
263
309
  else
264
310
  eval_operation(tensor, child_context)
265
311
  end
@@ -295,7 +341,7 @@ module TensorStream
295
341
 
296
342
  register_op :identity do |context, tensor, inputs|
297
343
  value = inputs[0]
298
- buffer = OpenCLBuffer.new(name: tensor.name, data_type: tensor.data_type, shape: value.shape, buffer: value.buffer, cl_buffer: value.cl_buffer)
344
+ buffer = OpenCLBuffer.new(self, name: tensor.name, data_type: tensor.data_type, shape: value.shape, buffer: value.buffer, cl_buffer: value.cl_buffer)
299
345
  buffer.op = build_event_wait_list(inputs)
300
346
  buffer
301
347
  end
@@ -375,6 +421,7 @@ module TensorStream
375
421
 
376
422
  register_op :flow_group do |_context, _tensor, inputs|
377
423
  events = build_event_wait_list(inputs)
424
+ # puts "** wait for event flow_group**"
378
425
  OpenCL.wait_for_events(events) unless events.empty?
379
426
  nil
380
427
  end
@@ -387,8 +434,10 @@ module TensorStream
387
434
  cache_key = "#{tensor.graph.object_id}_opencl_#{tensor.name}:#{object_id}"
388
435
  return @context[:_cache][cache_key] if @context[:_cache].key?(cache_key)
389
436
  return @context[cache_key] if @context.key?(cache_key)
390
- # puts "opencl: #{tensor.name}"
437
+
438
+ # puts "opencl eval #{object_id} #{tensor.name}"
391
439
  invoke(tensor, child_context).tap do |result|
440
+ # puts "result done opencl #{object_id}: #{tensor.name}"
392
441
  if tensor.breakpoint
393
442
  a = resolve_placeholder(tensor.inputs[0], child_context) if tensor.inputs && tensor.inputs[0]
394
443
  b = resolve_placeholder(tensor.inputs[1], child_context) if tensor.inputs && tensor.inputs[1]
@@ -603,6 +652,7 @@ module TensorStream
603
652
  end
604
653
 
605
654
  def convert_to_opencl(value, shape, data_type: nil, name: nil)
655
+ # puts "convert_to_opencl called for #{name}"
606
656
  value = [value] if !value.is_a?(Array) && !value.is_a?(NArray)
607
657
 
608
658
  cache_key = "_cl_object_#{name}:#{shape.join('_')}:#{object_id}"
@@ -630,7 +680,7 @@ module TensorStream
630
680
  _opencl_context.create_buffer(cl_buffer_size * buffer.element_size)
631
681
  end
632
682
 
633
- @context[:_cache][cache_key] = OpenCLBuffer.new(name: name, data_type: data_type, shape: shape, buffer: buffer, cl_buffer: cl_buffer)
683
+ @context[:_cache][cache_key] = OpenCLBuffer.new(self, name: name, data_type: data_type, shape: shape, buffer: buffer, cl_buffer: cl_buffer)
634
684
  end
635
685
  if data_type == :string
636
686
  value[0].each_byte.with_index do |c, index|
@@ -664,15 +714,15 @@ module TensorStream
664
714
 
665
715
  def allocate_narray_for_type(data_type, narray_size)
666
716
  case data_type
667
- when :float, :float32
717
+ when :float, :float32, :float16
668
718
  NArray.sfloat(narray_size)
669
719
  when :float64
670
720
  NArray.float(narray_size)
671
- when :int, :int32, :int64
721
+ when :int, :int32, :int64, :uint64, :uint32 #NArray does not have 64 bit int types
672
722
  NArray.int(narray_size)
673
- when :int16
723
+ when :int16, :uint16
674
724
  NArray.sint(narray_size)
675
- when :uint8
725
+ when :uint8, :int8
676
726
  NArray.byte(narray_size)
677
727
  when :boolean
678
728
  NArray.byte(narray_size)
@@ -686,12 +736,14 @@ module TensorStream
686
736
  end
687
737
 
688
738
  def _create_result_buffer(data_type, shape, name)
689
- return OpenCLBuffer.new(name: name, data_type: data_type, shape: [0], buffer: nil, cl_buffer: nil) if shape == [0]
690
- @context[:_cache][:_cl_buffers]["_result_#{name}_#{shape.join('_')}:#{object_id}"] ||= begin
739
+ return OpenCLBuffer.new(self, name: name, data_type: data_type, shape: [0], buffer: nil, cl_buffer: nil) if shape == [0]
740
+ cache_key = "_result_#{name}_#{shape.join('_')}:#{object_id}"
741
+ @context[:_cache][:_cl_buffers][cache_key] ||= begin
742
+ # puts "create result buffer #{cache_key}"
691
743
  size = shape.empty? || shape == [0] ? 1 : shape.reduce(:*)
692
744
  buffer = allocate_narray_for_type(data_type, size)
693
745
  cl_buffer = _opencl_context.create_buffer(buffer.size * buffer.element_size)
694
- OpenCLBuffer.new(data_type: data_type, shape: shape, buffer: buffer, cl_buffer: cl_buffer, name: name)
746
+ OpenCLBuffer.new(self, data_type: data_type, shape: shape, buffer: buffer, cl_buffer: cl_buffer, name: name)
695
747
  end
696
748
  end
697
749
 
@@ -706,7 +758,7 @@ module TensorStream
706
758
  start = index * buffer.size * buffer.element_size
707
759
  region = OpenCL::BufferRegion::new(start, buffer.size * buffer.element_size)
708
760
  cl_buffer = parent_buffer.cl_buffer.create_sub_buffer(OpenCL::BUFFER_CREATE_TYPE_REGION, region)
709
- OpenCLBuffer.new(data_type: data_type, shape: shape, buffer: buffer, cl_buffer: cl_buffer, name: name)
761
+ OpenCLBuffer.new(self, data_type: data_type, shape: shape, buffer: buffer, cl_buffer: cl_buffer, name: name)
710
762
  else
711
763
  _create_result_buffer(tensor.data_type, shape, name)
712
764
  end
@@ -728,7 +780,7 @@ module TensorStream
728
780
 
729
781
  # create sub buffers of different sizes
730
782
  def _create_variable_result_sub_buffer(parent_buffer, index, start, region_size_in_bytes, data_type, shape, name)
731
- cache_key ="_sub_result_#{parent_buffer.object_id}_#{name}_#{index}:#{object_id}"
783
+ cache_key = "_sub_result_#{parent_buffer.object_id}_#{name}_#{index}:#{object_id}"
732
784
  @context[:_cache][:_cl_buffers][cache_key] ||= begin
733
785
  size = shape.empty? || shape == [0] ? 1 : shape.reduce(:*)
734
786
  buffer = allocate_narray_for_type(data_type, size)
@@ -736,7 +788,7 @@ module TensorStream
736
788
  if parent_buffer.cl_buffer.associated_memobject.nil?
737
789
  region = OpenCL::BufferRegion::new(start, region_size_in_bytes)
738
790
  cl_buffer = parent_buffer.cl_buffer.create_sub_buffer(OpenCL::BUFFER_CREATE_TYPE_REGION, region)
739
- OpenCLBuffer.new(data_type: data_type, shape: shape, buffer: buffer, cl_buffer: cl_buffer, name: "#{name}/sub")
791
+ OpenCLBuffer.new(self, data_type: data_type, shape: shape, buffer: buffer, cl_buffer: cl_buffer, name: "#{name}/sub")
740
792
  else
741
793
  _create_result_buffer(tensor.data_type, shape, name)
742
794
  end
@@ -806,6 +858,7 @@ module TensorStream
806
858
  convert_to_opencl(red, [], data_type: tensor.data_type, name: tensor.name)
807
859
  else
808
860
  return input if input.shape.empty?
861
+
809
862
  value = input.buffer.reshape(*input.shape.reverse)
810
863
  rank = input.shape.size - 1
811
864
 
@@ -862,17 +915,15 @@ module TensorStream
862
915
 
863
916
  def resolve_placeholder(placeholder, _execution_context = {})
864
917
  return nil if placeholder.nil?
918
+ return placeholder unless placeholder.is_a?(Placeholder)
865
919
 
866
- var = if placeholder.is_a?(Placeholder)
867
- @context[placeholder.name.to_sym].tap do |c|
868
- raise "missing placeholder #{placeholder.name}" if c.nil?
869
- end
870
- else
871
- placeholder
872
- end
920
+ var = @context[placeholder.name.to_sym]
921
+ raise "missing placeholder #{placeholder.name}" if var.nil?
873
922
 
874
- return convert_to_opencl(var, shape_eval(var), data_type: placeholder.data_type, name: placeholder.name) unless var.is_a?(Tensor)
875
- Tensor.cast_dtype(var, placeholder.data_type)
923
+ cache_key = "#{placeholder.graph.object_id}_opencl_#{placeholder.name}_p:#{object_id}"
924
+ @context[cache_key] ||= begin
925
+ convert_to_opencl(var, shape_eval(var), data_type: placeholder.data_type, name: placeholder.name) unless var.is_a?(Tensor)
926
+ end
876
927
  end
877
928
 
878
929
  def all_true?(arr)
@@ -32,10 +32,18 @@ class OpenclTemplateHelper
32
32
  case dtype.to_s
33
33
  when 'float64'
34
34
  'double'
35
- when 'float32', 'float'
35
+ when 'float32', 'float', 'float16'
36
36
  'float'
37
+ when 'uint32'
38
+ 'uint'
39
+ when 'int64'
40
+ 'int' # 'long' - NArray does not support 64bit int types
41
+ when 'uint64'
42
+ 'uint' # 'ulong' - NArray does not support 64bit int types
37
43
  when 'int32', 'int'
38
44
  'int'
45
+ when 'uint16'
46
+ 'ushort'
39
47
  when 'int16'
40
48
  'short'
41
49
  when 'uint8'
@@ -51,10 +59,12 @@ class OpenclTemplateHelper
51
59
  case dtype.to_s
52
60
  when 'float64'
53
61
  'DBL_MIN'
54
- when 'float32', 'float'
62
+ when 'float32', 'float', 'float16'
55
63
  'FLT_MIN'
56
64
  when 'int32', 'int'
57
65
  'INT_MIN'
66
+ when 'uint32', 'uint16'
67
+ '0'
58
68
  when 'int16'
59
69
  'SHRT_MIN'
60
70
  when 'int8'
@@ -1,5 +1,5 @@
1
1
  module TensorStream
2
2
  module Opencl
3
- VERSION = "0.2.1"
3
+ VERSION = "0.2.2"
4
4
  end
5
5
  end
data/samples/iris.rb CHANGED
@@ -48,8 +48,6 @@ x_test.each_with_index do |x, index|
48
48
  validation_cases << [x, y_test[index]]
49
49
  end
50
50
 
51
-
52
-
53
51
  def init_weights(shape)
54
52
  # Weight initialization
55
53
  weights = TensorStream.random_normal(shape, stddev: 0.1)
@@ -0,0 +1,99 @@
1
+ # A ruby port of the example code discussed by Martin Gorner in
2
+ # "TensorFlow and Deep Learning without a PhD, Part 1 (Google Cloud Next '17)""
3
+ #
4
+ # https://www.youtube.com/watch?v=u4alGiomYP4
5
+ #
6
+ # Requirements:
7
+ # mnist-learn gem
8
+ # opencl_ruby_ffi gem
9
+ require "bundler/setup"
10
+ require 'tensor_stream'
11
+ require 'mnist-learn'
12
+ require 'pry-byebug'
13
+
14
+ # Enable OpenCL hardware accelerated computation, not using OpenCL can be very slow
15
+ require 'tensor_stream/opencl'
16
+
17
+ tf = TensorStream
18
+
19
+ puts "Tensorstream version #{tf.__version__} with OpenCL lib #{TensorStream::Opencl::VERSION}"
20
+ tf.set_random_seed(0)
21
+
22
+ # Import MNIST data
23
+ puts "downloading minst data"
24
+ mnist = Mnist.read_data_sets('/tmp/data', one_hot: true)
25
+ puts "downloading finished"
26
+
27
+ x = tf.placeholder(:float32, shape: [nil, 784])
28
+
29
+ K = 200
30
+ L = 100
31
+ M = 60
32
+ N = 30
33
+
34
+
35
+ w1 = tf.variable(tf.random_normal([784, K]))
36
+ b1 = tf.variable(tf.zeros([K]))
37
+
38
+ w2 = tf.variable(tf.random_normal([K, L]))
39
+ b2 = tf.variable(tf.zeros([L]))
40
+
41
+ w3 = tf.variable(tf.random_normal([L, M]))
42
+ b3 = tf.variable(tf.zeros([M]))
43
+
44
+ w4 = tf.variable(tf.random_normal([M, N]))
45
+ b4 = tf.variable(tf.zeros([N]))
46
+
47
+ w5 = tf.variable(tf.random_normal([N, 10]))
48
+ b5 = tf.variable(tf.zeros([10]))
49
+
50
+ x_ = tf.reshape(x, [-1, 784])
51
+
52
+ y1 = tf.sigmoid(tf.matmul(x_, w1) + b1)
53
+ y2 = tf.sigmoid(tf.matmul(y1, w2) + b2)
54
+ y3 = tf.sigmoid(tf.matmul(y2, w3) + b3)
55
+ y4 = tf.sigmoid(tf.matmul(y3, w4) + b4)
56
+ ylogits = tf.matmul(y4, w5) + b5
57
+
58
+ # model
59
+ y = tf.nn.softmax(ylogits)
60
+
61
+ y_ = tf.placeholder(:float32, shape: [nil, 10])
62
+
63
+ # cross-entropy loss function (= -sum(Y_i * log(Yi)) ), normalised for batches of 100 images
64
+ # TensorFlow provides the softmax_cross_entropy_with_logits function to avoid numerical stability
65
+ # problems with log(0) which is NaN
66
+ cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits: ylogits, labels: y_)
67
+ cross_entropy = tf.reduce_mean(cross_entropy)*100
68
+
69
+ is_correct = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
70
+ accuracy = tf.reduce_mean(tf.cast(is_correct, :float32))
71
+
72
+ # training step, learning rate = 0.003
73
+ learning_rate = 0.003
74
+ train_step = TensorStream::Train::AdamOptimizer.new(learning_rate).minimize(cross_entropy)
75
+
76
+ sess = tf.session
77
+ init = tf.global_variables_initializer
78
+ sess.run(init)
79
+
80
+ mnist_train = mnist.train
81
+ test_data = { x => mnist.test.images, y_ => mnist.test.labels }
82
+
83
+ (0..10000).each do |i|
84
+ # load batch of images and correct answers
85
+ batch_x, batch_y = mnist_train.next_batch(100)
86
+ train_data = { x => batch_x, y_ => batch_y }
87
+
88
+ # train
89
+ sess.run(train_step, feed_dict: train_data)
90
+ if (i % 50 == 0)
91
+ # success? add code to print it
92
+ a_train, c_train = sess.run([accuracy, cross_entropy], feed_dict: train_data)
93
+
94
+ # success on test data?
95
+ a_test, c_test = sess.run([accuracy, cross_entropy], feed_dict: test_data)
96
+ puts "#{i} train accuracy #{a_train}, error #{c_train} test accuracy #{a_test}, error #{c_test}"
97
+ end
98
+ end
99
+
@@ -0,0 +1,98 @@
1
+ # A ruby port of the example code discussed by Martin Gorner in
2
+ # "TensorFlow and Deep Learning without a PhD, Part 1 (Google Cloud Next '17)""
3
+ #
4
+ # https://www.youtube.com/watch?v=u4alGiomYP4
5
+ #
6
+ # Requirements:
7
+ # mnist-learn gem
8
+ # opencl_ruby_ffi gem
9
+ require "bundler/setup"
10
+ require 'tensor_stream'
11
+ require 'mnist-learn'
12
+ require 'pry-byebug'
13
+
14
+ # Enable OpenCL hardware accelerated computation, not using OpenCL can be very slow
15
+ require 'tensor_stream/opencl'
16
+
17
+ tf = TensorStream
18
+
19
+ # Import MNIST data
20
+ puts "downloading minst data"
21
+ mnist = Mnist.read_data_sets('/tmp/data', one_hot: true)
22
+ puts "downloading finished"
23
+
24
+ x = tf.placeholder(:float32, shape: [nil, 784])
25
+
26
+ K = 200
27
+ L = 100
28
+ M = 60
29
+ N = 30
30
+
31
+
32
+ w1 = tf.variable(tf.random_normal([784, K]))
33
+ b1 = tf.variable(tf.zeros([K]))
34
+
35
+ w2 = tf.variable(tf.random_normal([K, L]))
36
+ b2 = tf.variable(tf.zeros([L]))
37
+
38
+ w3 = tf.variable(tf.random_normal([L, M]))
39
+ b3 = tf.variable(tf.zeros([M]))
40
+
41
+ w4 = tf.variable(tf.random_normal([M, N]))
42
+ b4 = tf.variable(tf.zeros([N]))
43
+
44
+ w5 = tf.variable(tf.random_normal([N, 10]))
45
+ b5 = tf.variable(tf.zeros([10]))
46
+
47
+ x_ = tf.reshape(x, [-1, 784])
48
+
49
+ y1 = tf.nn.relu(tf.matmul(x_, w1) + b1)
50
+ y2 = tf.nn.relu(tf.matmul(y1, w2) + b2)
51
+ y3 = tf.nn.relu(tf.matmul(y2, w3) + b3)
52
+ y4 = tf.nn.relu(tf.matmul(y3, w4) + b4)
53
+ ylogits = tf.matmul(y4, w5) + b5
54
+
55
+ # model
56
+ y = tf.nn.softmax(ylogits)
57
+
58
+ y_ = tf.placeholder(:float32, shape: [nil, 10])
59
+
60
+ # cross-entropy loss function (= -sum(Y_i * log(Yi)) ), normalised for batches of 100 images
61
+ # TensorFlow provides the softmax_cross_entropy_with_logits function to avoid numerical stability
62
+ # problems with log(0) which is NaN
63
+ cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits: ylogits, labels: y_)
64
+ cross_entropy = tf.reduce_mean(cross_entropy)*100
65
+
66
+ is_correct = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
67
+ accuracy = tf.reduce_mean(tf.cast(is_correct, :float32))
68
+
69
+ # training step, learning rate = 0.003
70
+ learning_rate = 0.003
71
+ train_step = TensorStream::Train::AdamOptimizer.new(learning_rate).minimize(cross_entropy)
72
+
73
+ sess = tf.session
74
+ # Add ops to save and restore all the variables.
75
+ saver = tf::Train::Saver.new
76
+ init = tf.global_variables_initializer
77
+
78
+ sess.run(init)
79
+ mnist_train = mnist.train
80
+ test_data = { x => mnist.test.images, y_ => mnist.test.labels }
81
+
82
+ (0..1000).each do |i|
83
+ # load batch of images and correct answers
84
+ batch_x, batch_y = mnist_train.next_batch(100)
85
+ train_data = { x => batch_x, y_ => batch_y }
86
+
87
+ # train
88
+ sess.run(train_step, feed_dict: train_data)
89
+ if (i % 50 == 0)
90
+ # success? add code to print it
91
+ a_train, c_train = sess.run([accuracy, cross_entropy], feed_dict: train_data)
92
+
93
+ # success on test data?
94
+ a_test, c_test = sess.run([accuracy, cross_entropy], feed_dict: test_data)
95
+ puts "#{i} train accuracy #{a_train}, error #{c_train} test accuracy #{a_test}, error #{c_test}"
96
+ end
97
+ end
98
+
data/samples/multigpu.rb CHANGED
@@ -11,7 +11,6 @@ DIMEN = 1024
11
11
  A = ts.random_uniform([DIMEN, DIMEN]).eval
12
12
  B = ts.random_uniform([DIMEN, DIMEN]).eval
13
13
 
14
-
15
14
  # Create a graph to store results
16
15
  c1 = []
17
16
  c2 = []
@@ -35,17 +34,24 @@ sum = ts.device('/device:GPU:0') do
35
34
  ts.add_n(c1)
36
35
  end
37
36
 
38
- t1_1 = Time.now.to_i
37
+ t1_1 = nil
39
38
  t2_1 = nil
40
-
41
- ts.session(log_device_placement: true) do |sess|
39
+ puts "===================== starting single GPU test ================"
40
+ ts.session(log_device_placement: true, profile_enabled: true) do |sess|
41
+ puts "-- warmup ---"
42
+ sess.run(sum, feed_dict: { a => A, b => B}) # warmup
43
+ puts "-- warmup ---"
44
+ time = Time.now
45
+ t1_1 = time.to_i * (10 ** 9) + time.nsec
42
46
  sess.run(sum, feed_dict: { a => A, b => B})
43
- t2_1 = Time.now.to_i
47
+ time = Time.now
48
+ t2_1 = time.to_i * (10 ** 9) + time.nsec
44
49
  end
45
-
50
+ puts "===================== end single GPU test ================"
51
+ puts "===================== MULTI GPU text ================"
46
52
  # Multi GPU computing
47
53
  # GPU:0 computes A^n
48
- ts.device('/device:GPU:1') do
54
+ ts.device('/device:GPU:0') do
49
55
  a = ts.placeholder(:float32, shape: [DIMEN, DIMEN])
50
56
  c2 << matpow(a, n)
51
57
  end
@@ -56,18 +62,26 @@ ts.device('/device:GPU:1') do
56
62
  c2 << matpow(b, n)
57
63
  end
58
64
 
59
- ts.device('/device:GPU:1') do
65
+ ts.device('/device:GPU:0') do
60
66
  sum = ts.add_n(c2) #Addition of all elements in c2, i.e. A^n + B^n
61
67
  end
62
68
 
63
- t1_2 = Time.now.to_i
69
+ t1_2 = nil
64
70
  t2_2 = nil
65
- ts.session(log_device_placement:true) do |sess|
71
+
72
+ ts.session(log_device_placement: true, profile_enabled: true) do |sess|
66
73
  # Run the op.
74
+ puts "-- warmup ---"
75
+ sess.run(sum, feed_dict: {a => A, b => B}) # warm up
76
+ puts "-- warmup ---"
77
+ time = Time.now
78
+ t1_2 = time.to_i * (10 ** 9) + time.nsec
79
+ puts "================ starting multiGPU test ==============="
67
80
  sess.run(sum, feed_dict: {a => A, b => B})
68
- t2_2 = Time.now.to_i
81
+ time = Time.now
82
+ t2_2 = time.to_i * (10 ** 9) + time.nsec
69
83
  end
70
84
 
71
85
 
72
- print("Single GPU computation time: " + (t2_1-t1_1).to_s)
73
- print("Multi GPU computation time: " + (t2_2-t1_2).to_s)
86
+ puts("Single GPU computation time: " + ((t2_1-t1_1)/ 1000000.to_f).to_s)
87
+ puts("Multi GPU computation time: " + ((t2_2-t1_2)/ 1000000.to_f).to_s)
@@ -38,7 +38,7 @@ Gem::Specification.new do |spec|
38
38
  spec.add_development_dependency "pry-byebug"
39
39
  spec.add_development_dependency "awesome_print"
40
40
  spec.add_development_dependency "mnist-learn"
41
- spec.add_dependency "tensor_stream", "~> 0.9.0"
41
+ spec.add_dependency "tensor_stream", "~> 0.9.2"
42
42
  spec.add_dependency "opencl_ruby_ffi"
43
43
  spec.add_dependency "oily_png"
44
44
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tensor_stream-opencl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joseph Dayo
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-10-08 00:00:00.000000000 Z
11
+ date: 2018-10-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: 0.9.0
103
+ version: 0.9.2
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: 0.9.0
110
+ version: 0.9.2
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: opencl_ruby_ffi
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -145,10 +145,10 @@ extra_rdoc_files: []
145
145
  files:
146
146
  - ".gitignore"
147
147
  - ".rspec"
148
+ - ".rubocop.yml"
148
149
  - ".travis.yml"
149
150
  - CODE_OF_CONDUCT.md
150
151
  - Gemfile
151
- - Gemfile.lock
152
152
  - LICENSE.txt
153
153
  - README.md
154
154
  - Rakefile
@@ -226,7 +226,8 @@ files:
226
226
  - lib/tensor_stream/opencl/version.rb
227
227
  - samples/iris.data
228
228
  - samples/iris.rb
229
- - samples/mnist_data.rb
229
+ - samples/mnist_data_2.1.rb
230
+ - samples/mnist_data_2.2.rb
230
231
  - samples/multigpu.rb
231
232
  - samples/nearest_neighbor.rb
232
233
  - samples/rnn.rb
data/Gemfile.lock DELETED
@@ -1,70 +0,0 @@
1
- PATH
2
- remote: .
3
- specs:
4
- tensor_stream-opencl (0.2.1)
5
- oily_png
6
- opencl_ruby_ffi
7
- tensor_stream (~> 0.9.0)
8
-
9
- GEM
10
- remote: https://rubygems.org/
11
- specs:
12
- awesome_print (1.8.0)
13
- byebug (10.0.2)
14
- chunky_png (1.3.10)
15
- coderay (1.1.2)
16
- concurrent-ruby (1.0.5)
17
- deep_merge (1.2.1)
18
- diff-lcs (1.3)
19
- ffi (1.9.25)
20
- method_source (0.9.0)
21
- mnist-learn (0.1.1)
22
- narray (0.6.1.2)
23
- narray_ffi (1.4.4)
24
- ffi (~> 1.9, >= 1.9.3)
25
- narray (~> 0.6, >= 0.6.0.8)
26
- oily_png (1.2.1)
27
- chunky_png (~> 1.3.7)
28
- opencl_ruby_ffi (1.3.4)
29
- ffi (~> 1.9, >= 1.9.3)
30
- narray (~> 0.6, >= 0.6.0.8)
31
- narray_ffi (~> 1.0, >= 1.0.0)
32
- pry (0.11.3)
33
- coderay (~> 1.1.0)
34
- method_source (~> 0.9.0)
35
- pry-byebug (3.6.0)
36
- byebug (~> 10.0)
37
- pry (~> 0.10)
38
- rake (10.5.0)
39
- rspec (3.8.0)
40
- rspec-core (~> 3.8.0)
41
- rspec-expectations (~> 3.8.0)
42
- rspec-mocks (~> 3.8.0)
43
- rspec-core (3.8.0)
44
- rspec-support (~> 3.8.0)
45
- rspec-expectations (3.8.1)
46
- diff-lcs (>= 1.2.0, < 2.0)
47
- rspec-support (~> 3.8.0)
48
- rspec-mocks (3.8.0)
49
- diff-lcs (>= 1.2.0, < 2.0)
50
- rspec-support (~> 3.8.0)
51
- rspec-support (3.8.0)
52
- tensor_stream (0.9.0)
53
- chunky_png
54
- concurrent-ruby
55
- deep_merge
56
-
57
- PLATFORMS
58
- ruby
59
-
60
- DEPENDENCIES
61
- awesome_print
62
- bundler (~> 1.16)
63
- mnist-learn
64
- pry-byebug
65
- rake (~> 10.0)
66
- rspec (~> 3.0)
67
- tensor_stream-opencl!
68
-
69
- BUNDLED WITH
70
- 1.16.2
@@ -1,65 +0,0 @@
1
- # A ruby port of the example code discussed by Martin Gorner in
2
- # "TensorFlow and Deep Learning without a PhD, Part 1 (Google Cloud Next '17)""
3
- #
4
- # https://www.youtube.com/watch?v=u4alGiomYP4
5
- #
6
- # Requirements:
7
- # mnist-learn gem
8
- # opencl_ruby_ffi gem
9
- require "bundler/setup"
10
- require 'tensor_stream'
11
- require 'mnist-learn'
12
-
13
- # Enable OpenCL hardware accelerated computation, not using OpenCL can be very slow
14
- # require 'tensor_stream/opencl'
15
-
16
- tf = TensorStream
17
-
18
- # Import MNIST data
19
- puts "downloading minst data"
20
- mnist = Mnist.read_data_sets('/tmp/data', one_hot: true)
21
- puts "downloading finished"
22
-
23
- x = tf.placeholder(:float32, shape: [nil, 784])
24
- w = tf.variable(tf.zeros([784, 10]))
25
- b = tf.variable(tf.zeros([10]))
26
-
27
-
28
-
29
- # model
30
- y = tf.nn.softmax(tf.matmul(tf.reshape(x, [-1, 784]), w) + b)
31
-
32
- y_ = tf.placeholder(:float32, shape: [nil, 10])
33
-
34
- # loss function
35
- cross_entropy = -tf.reduce_sum(y_ * tf.log(y))
36
-
37
- is_correct = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
38
- accuracy = tf.reduce_mean(tf.cast(is_correct, :float32))
39
-
40
- optimizer = TensorStream::Train::AdamOptimizer.new
41
- train_step = optimizer.minimize(cross_entropy)
42
-
43
- sess = tf.session
44
- init = tf.global_variables_initializer
45
- sess.run(init)
46
-
47
- (0...1000).each do |i|
48
- # load batch of images and correct answers
49
- batch_x, batch_y = mnist.train.next_batch(100)
50
- train_data = { x => batch_x, y_ => batch_y }
51
-
52
- # train
53
- sess.run(train_step, feed_dict: train_data)
54
- if (i % 10 == 0)
55
- # success? add code to print it
56
- a, c = sess.run([accuracy, cross_entropy], feed_dict: train_data)
57
- puts "#{i} train accuracy #{a}, error #{c}"
58
-
59
- # success on test data?
60
- test_data = { x => mnist.test.images, y_ => mnist.test.labels }
61
- a, c = sess.run([accuracy, cross_entropy], feed_dict: test_data)
62
- puts " test accuracy #{a}, error #{c}"
63
- end
64
- end
65
-