RubyGems - tensor_stream-opencl - Versions diffs - 0.2.4 → 0.2.5 - Mend

tensor_stream-opencl 0.2.4 → 0.2.5

Files changed (16) hide show

checksums.yaml +4 -4
data/.gitignore +3 -0
data/.rubocop.yml +6 -1
data/lib/tensor_stream/opencl/array_ops.rb +58 -55
data/lib/tensor_stream/opencl/nn_ops.rb +57 -56
data/lib/tensor_stream/opencl/opencl_buffer.rb +11 -6
data/lib/tensor_stream/opencl/opencl_evaluator.rb +49 -36
data/lib/tensor_stream/opencl/version.rb +1 -1
data/lib/tensor_stream/opencl.rb +1 -0
data/samples/classify.rb +21 -0
data/samples/dump_mnist.rb +21 -0
data/samples/image_sort.rb +9 -0
data/samples/mnist_data_2.3.rb +4 -12
data/samples/mnist_data_3.0.rb +16 -8
data/tensor_stream-opencl.gemspec +1 -1
metadata +9 -6

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 3e4aa123289372c651cd4da3e7c206abc4f9f67a551d4062180c5cf6555dc243
-  data.tar.gz: 6517954207c85f56cd08b2892b0119d4bb7a35e2d4bd9b9cacc5d3c9ccfb9e42
+  metadata.gz: c0e8de1676b30c21f9529cdce9d3fee406cdf2945d54f890ae49c14c1329860e
+  data.tar.gz: 66932db63589eedcd6247083a27344bed8c80860b6d096d01fb5b46db1b53521
 SHA512:
-  metadata.gz: 7f61d61be79dd1e06ebfdc77ed2dff9e717e0cdb292160fe20c9ca08693d867e1b0e0350c71db5d24feb4671a26e793f44d6b80762c384193c1985b6b1616376
-  data.tar.gz: 72c32530717fac8ff947ce4b204535755134bde14e0f70d0d120ff101b5654843312186317cb480fd5e1c620a25328a3590b1f35193faf1d196e7ad631d169b0
+  metadata.gz: 5b905243d98976c94cb58dd443fc25f0b1e78bba689f20677ee3457dc44641d228642eb65adc4e58227d1feb0686fe29a3cabd8b25910f8e9c1aa0ef575ae8ff
+  data.tar.gz: 03ad5c5cd27ff058df206de8e109699fe0ef492e8ced1b97217cc0fe1ab7a4e43da5db45fed85b2e96b7bb35dc14465e39695b7995b3a1ccfcece7c050e0cae3

data/.gitignore CHANGED Viewed

@@ -1,6 +1,7 @@
 /.bundle/
 /.yardoc
 /_yardoc/
+/test_images/
 /coverage/
 /doc/
 /pkg/
@@ -10,6 +11,8 @@ Gemfile.lock
 *.gem
 *.ckpt
 profile.json
+profile.csv
+/test_models/
 # rspec failure tracking
 .rspec_status

data/.rubocop.yml CHANGED Viewed

@@ -34,6 +34,7 @@ Metrics/CyclomaticComplexity:
 Metrics/BlockLength:
   Exclude:
     - lib/tensor_stream/math_gradients.rb
+    - benchmark/benchmark.rb
 Naming/AccessorMethodName:
   Exclude:
@@ -86,4 +87,8 @@ Style/TrailingCommaInHashLiteral:
 Naming/UncommunicativeMethodParamName:
   Exclude:
     - lib/tensor_stream/evaluator/ruby_evaluator.rb
-    - lib/tensor_stream/ops.rb
+    - lib/tensor_stream/ops.rb
+Style/BlockDelimiters:
+  Exclude:
+    - benchmark/benchmark.rb

data/lib/tensor_stream/opencl/array_ops.rb CHANGED Viewed

@@ -103,26 +103,27 @@ module TensorStream
                         end
                       else
                         raise TensorStream::ValueError, "#{num_split} does not divide #{value_shape[axis]} evenly" if num_split.reduce(:+) != value_shape[axis]
                         # compute shapes of individual output buffers
                         new_shapes = num_split.each_with_index.collect do |num, index|
                                        new_shape = value_shape.dup
                                        new_shape[axis] = num
                                        new_shape
                                      end
+                        out = []
                         if axis.zero? # axis zero fast copy path
                           start = 0
-                          out = []
-                          new_shapes.each_with_index do |new_shape, index|
-                            element_count = new_shape.reduce(:*) || 1
+                          new_shapes.each_with_index do |ns, index|
+                            element_count = ns.reduce(:*) || 1
                             region_size_in_bytes = element_count * value.buffer.element_size
-                            out << _create_variable_result_sub_buffer(value, index, start, region_size_in_bytes, tensor.data_type, new_shape, "#{tensor.name}/out_#{index}_#{new_shape.join('.')}")
+                            out << _create_variable_result_sub_buffer(value, index, start, region_size_in_bytes, tensor.data_type, ns, "#{tensor.name}/out_#{index}_#{ns.join('.')}")
                             start += region_size_in_bytes
                           end
-                          out
                         else
                           # create buffers for each piece
                           work_buffer = _create_result_buffer(tensor.data_type, value_shape, "#{tensor.name}/out")
-                          out = []
                           start = 0
                           steps = num_split.dup.reverse.drop(1).inject([0]) do |a, s|
@@ -157,14 +158,15 @@ module TensorStream
                                                                 event_wait_list: event_wait_list)
                           end
                           work_buffer.op = events
-                          new_shapes.each_with_index do |new_shape, index|
-                            element_count = new_shape.reduce(:*) || 1
+                          new_shapes.each_with_index do |ns, index|
+                            element_count = ns.reduce(:*) || 1
                             region_size_in_bytes = element_count * work_buffer.buffer.element_size
-                            out << _create_variable_result_sub_buffer(work_buffer, index, start, region_size_in_bytes, tensor.data_type, new_shape, "#{tensor.name}/out_#{index}_#{new_shape.join('.')}")
+                            out << _create_variable_result_sub_buffer(work_buffer, index, start, region_size_in_bytes, tensor.data_type, ns, "#{tensor.name}/out_#{index}_#{new_shape.join('.')}")
                             start += region_size_in_bytes
                           end
-                          out
                         end
+                        out
                       end
             TensorStream::Evaluator::OutputGroup.new(outputs, outputs.map(&:data_type))
@@ -195,58 +197,57 @@ module TensorStream
             output_buffer = _create_result_buffer(tensor.data_type, new_shape, tensor.name)
             ops = if axis.zero? # fast path
-              inputs.each_with_index.map do |input, index|
-                next if input.empty_value?
-                start = index * input.buffer.size * input.buffer.element_size
-                region = [input.buffer.size * input.buffer.element_size, 1, 1]
-                event_wait_list = build_event_wait_list(input)
-                _opencl_queue.enqueue_copy_buffer_rect(input.cl_buffer, output_buffer.cl_buffer,
-                      region, dst_origin: [start, 0, 0], event_wait_list: event_wait_list)
-              end.compact
-            else
-              elem_size = shape.empty? ? 1 : shape.reduce(:*)
-              cl_n = OpenCL::Int1.new(elem_size)
+                    inputs.each_with_index.map do |input, index|
+                      next if input.empty_value?
-              steps = inputs.map(&:shape).reverse.drop(1).inject([0]) do |a, shape|
-                a << shape[axis] + a.last
-              end
+                      start = index * input.buffer.size * input.buffer.element_size
+                      region = [input.buffer.size * input.buffer.element_size, 1, 1]
+                      event_wait_list = build_event_wait_list(input)
+                      _opencl_queue.enqueue_copy_buffer_rect(input.cl_buffer, output_buffer.cl_buffer,
+                            region, dst_origin: [start, 0, 0], event_wait_list: event_wait_list)
+                    end.compact
+                  else
+                    elem_size = shape.empty? ? 1 : shape.reduce(:*)
+                    cl_n = OpenCL::Int1.new(elem_size)
-              work_group = [elem_size]
-              event_wait_list = build_event_wait_list(inputs)
+                    steps = inputs.map(&:shape).reverse.drop(1).inject([0]) do |a, shape|
+                      a << shape[axis] + a.last
+                    end
+                    work_group = [elem_size]
+                    event_wait_list = build_event_wait_list(inputs)
+                    inputs.each_with_index.map do |input, index|
+                      cl_index = OpenCL::Int1.new(index)
+                      step = OpenCL::Int1.new(steps[index])
+                      _cl_program('concat', data_type: tensor.data_type, divisors: divisors, multipliers: multipliers, axis: axis).
+                                    concat(_opencl_queue, work_group, cl_n, cl_index, step, input.cl_buffer,
+                                          output_buffer.cl_buffer, event_wait_list: event_wait_list)
+                    end
+                  end
-              inputs.each_with_index.map do |input, index|
-                cl_index = OpenCL::Int1.new(index)
-                step = OpenCL::Int1.new(steps[index])
-                _cl_program('concat', data_type: tensor.data_type, divisors: divisors, multipliers: multipliers, axis: axis).
-                              concat(_opencl_queue, work_group, cl_n, cl_index, step, input.cl_buffer,
-                                     output_buffer.cl_buffer, event_wait_list: event_wait_list)
-              end
-            end
             output_buffer.op = ops
             output_buffer
           end
-          register_op :squeeze do |context, tensor, inputs|
+          register_op :squeeze do |_context, tensor, inputs|
             arr = inputs[0]
             shape = inputs[0].shape.dup
             axis = !tensor.options[:axis].is_a?(Array) ? [tensor.options[:axis]] : tensor.options[:axis]
             if !axis.empty?
-              axis.each do |axis|
-                if shape[axis] == 1
-                  shape[axis] = nil
-                else
-                  raise TensorStream::ValueError, "unable to squeeze dimension that does not have a size of 1"
-                end
+              axis.each do |x|
+                raise TensorStream::ValueError, "unable to squeeze dimension that does not have a size of 1" unless shape[x] == 1
+                shape[x] = nil
               end
             else
               shape = shape.map { |s| s == 1 ? nil : s }
             end
             OpenCLBuffer.new(self, name: tensor.name, data_type: tensor.data_type,
-              shape: shape.compact, buffer: arr.buffer,
-              cl_buffer: arr.cl_buffer,
-              op: arr.op)
+                                   shape: shape.compact, buffer: arr.buffer,
+                                   cl_buffer: arr.cl_buffer,
+                                   op: arr.op)
           end
           register_op :stack do |_context, tensor, inputs|
@@ -312,7 +313,6 @@ module TensorStream
               a << s * a.last
             end.reverse
-            step = multipliers[0]
             sub_shape = new_shape.dup
             sub_shape.shift
@@ -375,9 +375,9 @@ module TensorStream
                     end
             OpenCLBuffer.new(self, name: tensor.name, data_type: tensor.data_type,
-                             shape: shape, buffer: arr.buffer,
-                             cl_buffer: arr.cl_buffer,
-                             op: arr.op)
+                                   shape: shape, buffer: arr.buffer,
+                                   cl_buffer: arr.cl_buffer,
+                                   op: arr.op)
           end
           register_op :transpose, buffer: true do |_context, tensor, inputs|
@@ -407,7 +407,10 @@ module TensorStream
             shape = input_a.shape
-            slice_param = input_b.zip(size).collect.with_index { | p, index|  p[1] = (p[1] == -1) ? shape[index] : p[1] ; p[0]..p[0] + p[1] - 1 }.reverse
+            slice_param = input_b.zip(size).collect.with_index do |p, index|
+              p[1] = p[1] == -1 ? shape[index] : p[1]
+              p[0]..p[0] + p[1] - 1
+            end.reverse
             new_buf = input_a.buffer.reshape(*input_a.shape.reverse)
             sliced = new_buf.slice[*slice_param]
@@ -423,11 +426,11 @@ module TensorStream
             if a.data_type != tensor.data_type
               buffer = _create_result_buffer(tensor.data_type, a.shape, tensor.name)
               work_group = if inputs[0].shape.size > 2
-                              [ inputs[0].shape.reduce(:*) / inputs[0].shape.last, inputs[0].shape.last]
-                            else
-                              m, n = inputs[0].shape
-                              [m || 1, n || 1]
-                            end
+                             [inputs[0].shape.reduce(:*) / inputs[0].shape.last, inputs[0].shape.last]
+                           else
+                             m, n = inputs[0].shape
+                             [m || 1, n || 1]
+                           end
               cl_m = OpenCL::Int1.new(work_group[0])
               cl_n = OpenCL::Int1.new(work_group[1])

data/lib/tensor_stream/opencl/nn_ops.rb CHANGED Viewed

@@ -11,12 +11,12 @@ module TensorStream
             assign = tensor.inputs[0] || tensor
-            assign.buffer.dirty = true # force buffer copy when variable is read externally
-            output_buffer = assign.buffer
+            assign.container_buffer.dirty = true # force buffer copy when variable is read externally
+            output_buffer = assign.container_buffer
             work_group = [output_buffer.total_elements]
-            event_wait_list = build_event_wait_list([assign.buffer, learning_rate, delta])
+            event_wait_list = build_event_wait_list([assign.container_buffer, learning_rate, delta])
             event = call_program("apply_gradient", output_buffer.data_type,
                            work_group,
@@ -33,21 +33,21 @@ module TensorStream
             assign = tensor.inputs[0] || tensor
             assign_acc = tensor.inputs[1]
-            assign.buffer.dirty = true # force buffer copy when variable is read externally
-            assign_acc.buffer.dirty = true # force buffer copy when variable is read externally
+            assign.container_buffer.dirty = true # force buffer copy when variable is read externally
+            assign_acc.container_buffer.dirty = true # force buffer copy when variable is read externally
-            output_buffer = assign.buffer
+            output_buffer = assign.container_buffer
             work_group = [output_buffer.total_elements]
-            event_wait_list = build_event_wait_list([assign.buffer, assign_acc.buffer, learning_rate, grad, momentum])
+            event_wait_list = build_event_wait_list([assign.container_buffer, assign_acc.container_buffer, learning_rate, grad, momentum])
             method_call = :"apply_momentum_#{output_buffer.data_type}"
             event = _cl_program("apply_momentum", nesterov: tensor.options[:use_nesterov], dtype: output_buffer.data_type).
                         send(method_call, _opencl_queue, work_group, grad.cl_buffer,
                             learning_rate.cl_buffer, momentum.cl_buffer, output_buffer.cl_buffer,
-                            assign_acc.buffer.cl_buffer, event_wait_list: event_wait_list)
+                            assign_acc.container_buffer.cl_buffer, event_wait_list: event_wait_list)
             output_buffer.op = event
-            assign_acc.buffer.op = event
+            assign_acc.container_buffer.op = event
             output_buffer
           end
@@ -58,11 +58,11 @@ module TensorStream
             assign_acc_update = tensor.inputs[2]
             # mark variable buffers as dirty
-            assign.buffer.dirty = true # force buffer copy when variable is read externally
-            assign_acc.buffer.dirty = true # force buffer copy when variable is read externally
-            assign_acc_update.buffer.dirty = true # force buffer copy when variable is read externally
+            assign.container_buffer.dirty = true # force buffer copy when variable is read externally
+            assign_acc.container_buffer.dirty = true # force buffer copy when variable is read externally
+            assign_acc_update.container_buffer.dirty = true # force buffer copy when variable is read externally
-            output_buffer = assign.buffer
+            output_buffer = assign.container_buffer
             work_group = [output_buffer.total_elements]
@@ -73,13 +73,13 @@ module TensorStream
                                       rho.cl_buffer,
                                       epsilon.cl_buffer,
                                       grad.cl_buffer,
-                                      assign.buffer.cl_buffer,
-                                      assign_acc.buffer.cl_buffer,
-                                      assign_acc_update.buffer.cl_buffer,
+                                      assign.container_buffer.cl_buffer,
+                                      assign_acc.container_buffer.cl_buffer,
+                                      assign_acc_update.container_buffer.cl_buffer,
                                       event_wait_list: event_wait_list)
             output_buffer.op = event
-            assign_acc.buffer.op = event
-            assign_acc_update.buffer.op = event
+            assign_acc.container_buffer.op = event
+            assign_acc_update.container_buffer.op = event
             output_buffer
           end
@@ -92,11 +92,11 @@ module TensorStream
             assign_v = tensor.inputs[2]
             # mark variable buffers as dirty
-            assign.buffer.dirty = true # force buffer copy when variable is read externally
-            assign_m.buffer.dirty = true # force buffer copy when variable is read externally
-            assign_v.buffer.dirty = true # force buffer copy when variable is read externally
+            assign.container_buffer.dirty = true # force buffer copy when variable is read externally
+            assign_m.container_buffer.dirty = true # force buffer copy when variable is read externally
+            assign_v.container_buffer.dirty = true # force buffer copy when variable is read externally
-            output_buffer = assign.buffer
+            output_buffer = assign.container_buffer
             work_group = [output_buffer.total_elements]
@@ -110,13 +110,13 @@ module TensorStream
                                       beta1_t.cl_buffer,
                                       beta2_t.cl_buffer,
                                       epsilon_t.cl_buffer,
-                                      assign_m.buffer.cl_buffer,
-                                      assign.buffer.cl_buffer,
-                                      assign_v.buffer.cl_buffer,
+                                      assign_m.container_buffer.cl_buffer,
+                                      assign.container_buffer.cl_buffer,
+                                      assign_v.container_buffer.cl_buffer,
                                       event_wait_list: event_wait_list)
             output_buffer.op = event
-            assign_m.buffer.op = event
-            assign_v.buffer.op = event
+            assign_m.container_buffer.op = event
+            assign_v.container_buffer.op = event
             output_buffer
           end
@@ -126,9 +126,9 @@ module TensorStream
             assign = tensor.inputs[0] || tensor
             assign_acc = tensor.inputs[1]
-            assign.buffer.dirty = true
-            assign_acc.buffer.dirty = true
-            output_buffer = assign.buffer
+            assign.container_buffer.dirty = true
+            assign_acc.container_buffer.dirty = true
+            output_buffer = assign.container_buffer
             work_group = [output_buffer.total_elements]
@@ -138,11 +138,11 @@ module TensorStream
                                       work_group,
                                       lr.cl_buffer,
                                       grad.cl_buffer,
-                                      assign.buffer.cl_buffer,
-                                      assign_acc.buffer.cl_buffer,
+                                      assign.container_buffer.cl_buffer,
+                                      assign_acc.container_buffer.cl_buffer,
                                       event_wait_list: event_wait_list)
             output_buffer.op = event
-            assign_acc.buffer.op = event
+            assign_acc.container_buffer.op = event
             output_buffer
           end
@@ -154,11 +154,11 @@ module TensorStream
             assign_ms = tensor.inputs[2]
             assign_mom = tensor.inputs[3]
-            assign.buffer.dirty = true
-            assign_mg.buffer.dirty = true
-            assign_ms.buffer.dirty = true
-            assign_mom.buffer.dirty = true
-            output_buffer = assign.buffer
+            assign.container_buffer.dirty = true
+            assign_mg.container_buffer.dirty = true
+            assign_ms.container_buffer.dirty = true
+            assign_mom.container_buffer.dirty = true
+            output_buffer = assign.container_buffer
             event_wait_list = build_event_wait_list(inputs)
             work_group = [output_buffer.total_elements]
@@ -168,30 +168,30 @@ module TensorStream
                             momentum.cl_buffer,
                             epsilon.cl_buffer,
                             grad.cl_buffer,
-                            assign.buffer.cl_buffer,
-                            assign_ms.buffer.cl_buffer,
-                            assign_mg.buffer.cl_buffer,
-                            assign_mom.buffer.cl_buffer,
+                            assign.container_buffer.cl_buffer,
+                            assign_ms.container_buffer.cl_buffer,
+                            assign_mg.container_buffer.cl_buffer,
+                            assign_mom.container_buffer.cl_buffer,
                             event_wait_list: event_wait_list)
             output_buffer.op = event
-            assign_mg.buffer.op = event
-            assign_ms.buffer.op = event
-            assign_mom.buffer.op = event
+            assign_mg.container_buffer.op = event
+            assign_ms.container_buffer.op = event
+            assign_mom.container_buffer.op = event
             output_buffer
           end
-          register_op :apply_rms_prop do |context, tensor, inputs|
+          register_op :apply_rms_prop do |_context, tensor, inputs|
             var, ms, mom, lr, rho, momentum, epsilon, grad = inputs
             assign = tensor.inputs[0]
             assign_ms = tensor.inputs[1]
             assign_mom = tensor.inputs[2]
-            assign.buffer.dirty = true
-            assign_ms.buffer.dirty = true
-            assign_mom.buffer.dirty = true
-            output_buffer = assign.buffer
+            assign.container_buffer.dirty = true
+            assign_ms.container_buffer.dirty = true
+            assign_mom.container_buffer.dirty = true
+            output_buffer = assign.container_buffer
             event_wait_list = build_event_wait_list(inputs)
             work_group = [output_buffer.total_elements]
@@ -202,14 +202,14 @@ module TensorStream
                             momentum.cl_buffer,
                             epsilon.cl_buffer,
                             grad.cl_buffer,
-                            assign.buffer.cl_buffer,
-                            assign_ms.buffer.cl_buffer,
-                            assign_mom.buffer.cl_buffer,
+                            assign.container_buffer.cl_buffer,
+                            assign_ms.container_buffer.cl_buffer,
+                            assign_mom.container_buffer.cl_buffer,
                             event_wait_list: event_wait_list)
             output_buffer.op = event
-            assign_ms.buffer.op = event
-            assign_mom.buffer.op = event
+            assign_ms.container_buffer.op = event
+            assign_mom.container_buffer.op = event
             output_buffer
           end
@@ -273,7 +273,7 @@ module TensorStream
             output_buffer_backprop.op = event
             loss = reduction(context, tensor, output_buffer, rank, :sum)
-            TensorStream::Evaluator::OutputGroup.new([loss, output_buffer_backprop],  [tensor.inputs[0].data_type, tensor.inputs[0].data_type])
+            TensorStream::Evaluator::OutputGroup.new([loss, output_buffer_backprop], [tensor.inputs[0].data_type, tensor.inputs[0].data_type])
           end
           register_op :softmax_cross_entropy_with_logits_v2_grad do |_context, tensor, inputs|
@@ -370,6 +370,7 @@ module TensorStream
             raise TensorStream::ValueError, " Current implementation does not yet support strides in the batch and depth dimensions." if strides[0] != 1 || strides[3] != 1
             padding_option = tensor.options[:padding]
             padding = conv2d_padding_options(padding_option, filter_shape, height, width, height_stride, width_stride)
             event_wait_list = build_event_wait_list(inputs)

data/lib/tensor_stream/opencl/opencl_buffer.rb CHANGED Viewed

@@ -33,13 +33,18 @@ module TensorStream
       end
       if shape.empty?
-        return buffer.to_s if data_type == :string
-        return buffer[0] != 0 if data_type == :boolean
-        return buffer[0]
+        return case data_type
+               when :string
+                 buffer.to_s
+               when :boolean
+                 buffer[0] != 0
+               else
+                 buffer[0]
+               end
       end
-      result = buffer.reshape(*shape.map(&:to_i).reverse).to_a
-      data_type == :boolean ? process_function_op(result, ->(a, _b) { a != 0 }) : result
+      result = buffer.reshape(*shape.map(&:to_i).reverse).to_a
+      data_type == :boolean ? process_function_op(result) { |a, _b|  a != 0 } : result
     end
     def self.nil_buffer(owner, name, data_type)

data/lib/tensor_stream/opencl/opencl_evaluator.rb CHANGED Viewed

@@ -225,7 +225,6 @@ module TensorStream
       def prepare_input(tensor, context, options = {})
         return nil unless tensor
-        tensor = resolve_placeholder(tensor)
         if options[:noop]
           tensor
         elsif options[:buffer]
@@ -329,30 +328,18 @@ module TensorStream
         tensor = tensor.call if tensor.is_a?(Proc)
         child_context = execution_context.dup
-        res = if tensor.is_a?(Operation)
-                if !on_same_device?(tensor) # tensor is on another device or evaluator
-                  perform_transition(tensor, tensor, @context[:_cache][:placement][tensor.name][1], execution_context)
-                else
-                  eval_operation(tensor, child_context)
-                end
-              elsif tensor.is_a?(Variable)
-                eval_variable(tensor, child_context)
-              elsif tensor.is_a?(Placeholder)
-                resolve_placeholder(tensor, child_context)
+        res = if !on_same_device?(tensor) # tensor is on another device or evaluator
+                perform_transition(tensor, tensor, @context[:_cache][:placement][tensor.name][1], execution_context)
+              elsif tensor.is_a?(Operation)
+                eval_operation(tensor, child_context)
               else
-                eval_tensor(tensor, child_context)
+                raise "invalid tensor type!"
               end
         execution_context.deep_merge!(returns: child_context[:returns])
         res
       end
-      def eval_variable(tensor, _child_context)
-        raise "variable #{tensor.name} not initalized" if tensor.value.nil? && (tensor.buffer.nil? || !tensor.buffer.dirty)
-        tensor.buffer = wrap_opencl(tensor, name: tensor.name) if tensor.buffer.nil?
-        tensor.buffer
-      end
       register_op :no_op do |_context, _tensor, _inputs|
       end
@@ -396,14 +383,14 @@ module TensorStream
       end
       %i[less less_equal greater greater_equal equal not_equal logical_and].each do |op|
-        register_op op do |context, tensor, inputs|
+        register_op op do |_context, tensor, inputs|
           execute_2_operand_func(op.to_s, tensor, inputs[0], inputs[1], 'cond')
         end
       end
       register_op :where, noop: true do |context, tensor, inputs|
-        pred = tensor.options[:pred]
-        execute_cond_func('where', tensor, pred, inputs[0], inputs[1], context)
+        pred = inputs[0]
+        execute_cond_func('where', tensor, pred, inputs[1], inputs[2], context)
       end
       register_op :check_numerics, noop: true do |context, tensor, inputs|
@@ -455,10 +442,36 @@ module TensorStream
         nil
       end
+      register_op :const do |_context, tensor, inputs|
+        wrap_opencl(tensor.const_value, name: tensor.name, data_type: tensor.data_type)
+      end
       register_op :size do |_context, tensor, inputs|
         wrap_opencl(inputs[0].buffer.size, name: tensor.name, data_type: tensor.options[:out_type] || :int32)
       end
+      register_op :restore_ts do |context, tensor, inputs|
+        inputs = inputs.dup
+        filename = inputs.shift
+        tensor_names = inputs
+        filename = read_final_result(complete_eval(filename, context))
+        tensor_names.map! { |n| read_final_result(complete_eval(n, context)) }
+        input_dump = YAML.safe_load(File.read(filename), [Symbol])
+        vars = tensor.graph.get_collection(GraphKeys::GLOBAL_VARIABLES)
+        vars.select! { |v| input_dump['variables'].key?(v.name) && tensor_names.include?(v.name) }
+        vars.each do |variable|
+          data = TensorStream::Packer.unpack(Zlib::Inflate.inflate(Base64.decode64(input_dump['variables'][variable.name]['data'])), variable.data_type)
+          shape = input_dump['variables'][variable.name]['shape']
+          variable.buffer = convert_to_opencl(data, shape, data_type: variable.data_type, name: variable.name)
+          variable.value = TensorShape.reshape(data, shape)
+        end
+        nil
+      end
       def eval_operation(tensor, child_context)
         cache_key = "#{tensor.graph.object_id}_opencl_#{tensor.name}:#{object_id}"
         return @context[:_cache][cache_key] if @context[:_cache].key?(cache_key)
@@ -514,7 +527,7 @@ module TensorStream
         # File.write('/home/jedld/workspace/tensor_stream/samples/error.graphml', TensorStream::Graphml.new.get_string(tensor, @session))
         # File.write('/Users/josephemmanueldayo/workspace/gradients.graphml', TensorStream::Graphml.new.get_string(tensor, @session))
-        raise EvaluatorExcecutionException.new(e, tensor), "error #{e.message} while evaluating #{tensor.name} : #{tensor.to_math(true, 1)} defined at #{tensor.source}"
+        raise EvaluatorExcecutionException.new(e, tensor), "error #{e.message} while evaluating #{tensor.name} : defined at #{tensor.source}"
       end
       def eval_tensor(tensor, child_context)
@@ -539,21 +552,21 @@ module TensorStream
         assign = tensor.inputs[0] || tensor
         buffer = complete_eval(b, child_context)
-        if assign.buffer
-          event_wait_list = build_event_wait_list([buffer, assign.buffer])
-          assign.buffer.op = if assign.buffer.cl_buffer != buffer.cl_buffer
-                               _opencl_queue.enqueue_copy_buffer(buffer.cl_buffer, assign.buffer.cl_buffer, event_wait_list: event_wait_list)
-                             else
-                               buffer.op
-                             end
+        if assign.container_buffer
+          event_wait_list = build_event_wait_list([buffer, assign.container_buffer])
+          assign.container_buffer.op = if assign.container_buffer.cl_buffer != buffer.cl_buffer
+                                         _opencl_queue.enqueue_copy_buffer(buffer.cl_buffer, assign.container_buffer.cl_buffer, event_wait_list: event_wait_list)
+                                       else
+                                         buffer.op
+                                       end
         else
           value = read_final_result(buffer)
-          assign.buffer = convert_to_opencl(value, buffer.shape, data_type: tensor.data_type, name: assign.name)
-          assign.value = value
+          assign.options[:container].buffer = convert_to_opencl(value, buffer.shape, data_type: tensor.data_type, name: assign.name)
+          assign.options[:container].value = value
         end
-        assign.buffer.dirty = true
-        assign.buffer
+        assign.container_buffer.dirty = true
+        assign.container_buffer
       end
       def execute_2_operand_func(op_name, tensor, a, b, prog_name = nil)
@@ -572,7 +585,7 @@ module TensorStream
                        [m || 1, n || 1]
                      elsif (b.shape.size == 1) && (result_shape.last == b.shape.last)
                       last_dim = b.shape.last
-                      [result_shape.reduce(:*) / last_dim, last_dim]
+                      [result_shape.reduce(:*) / last_dim, last_dim]
                      else
                        raise "rank > 2 not supported for now"
                      end
@@ -622,7 +635,7 @@ module TensorStream
         work_group = if p.shape.size > 2
                        [m, p.shape.reduce(:*) / m]
                      else
-                       [ m || 1, n || 1]
+                       [m || 1, n || 1]
                      end
         cl_m = OpenCL::Int1.new(work_group[0])

data/lib/tensor_stream/opencl/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module TensorStream
   module Opencl
-    VERSION = "0.2.4"
+    VERSION = "0.2.5"
   end
 end

data/lib/tensor_stream/opencl.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 require "tensor_stream/opencl/version"
+require 'tensor_stream'
 require "tensor_stream/opencl/opencl_evaluator"
 module TensorStream

data/samples/classify.rb ADDED Viewed

@@ -0,0 +1,21 @@
+#!/usr/bin/env ruby
+require "bundler/setup"
+require "tensor_stream"
+require 'mnist-learn'
+require 'fileutils'
+file_path = ARGV[0]
+model_path = ARGV[1]
+decoded_image = TensorStream.image.decode_png(File.read(file_path), channels: 1)
+target_graph = TensorStream::YamlLoader.new.load_from_file(model_path)
+input = target_graph['Placeholder']
+output = TensorStream.argmax(target_graph['out'], 1)
+sess = TensorStream.session
+reshaped_image = 255.0.t - decoded_image.reshape([1, 28, 28, 1]).cast(:float32)
+result = sess.run(output, feed_dict: { input => reshaped_image})
+puts "image is a #{result.first}"

data/samples/dump_mnist.rb ADDED Viewed

@@ -0,0 +1,21 @@
+#!/usr/bin/env ruby
+require "bundler/setup"
+require "tensor_stream"
+require 'mnist-learn'
+require 'fileutils'
+mnist = Mnist.read_data_sets('/tmp/data', one_hot: true)
+ts = TensorStream
+test_data = mnist.test.images
+FileUtils.mkdir_p 'test_images'
+sess = ts.session
+test_data.each_with_index do |image , index|
+  image = 255.t - ts.cast(ts.reshape(image, [28, 28, 1]), :uint8) # reshape image
+  encoder = ts.image.encode_png(image)
+  blob = sess.run(encoder)
+  File.write(File.join('test_images', "#{index}_image.png"), blob)
+end

data/samples/image_sort.rb ADDED Viewed

@@ -0,0 +1,9 @@
+require "bundler/setup"
+require 'tensor_stream'
+require 'mnist-learn'
+require 'csv'
+# Enable OpenCL hardware accelerated computation, not using OpenCL can be very slow
+require 'tensor_stream/opencl'

data/samples/mnist_data_2.3.rb CHANGED Viewed

@@ -54,18 +54,10 @@ b5 = tf.variable(tf.zeros([10]))
 x_ = tf.reshape(x, [-1, 784])
 y1 = tf.nn.relu(tf.matmul(x_, w1) + b1)
-y1d = tf.nn.dropout(y1, pkeep)
-y2 = tf.nn.relu(tf.matmul(y1d, w2) + b2)
-y2d = tf.nn.dropout(y2, pkeep)
-y3 = tf.nn.relu(tf.matmul(y2d, w3) + b3)
-y3d = tf.nn.dropout(y3, pkeep)
-y4 = tf.nn.relu(tf.matmul(y3d, w4) + b4)
-y4d = tf.nn.dropout(y4, pkeep)
-ylogits = tf.matmul(y4d, w5) + b5
+y2 = tf.nn.relu(tf.matmul(y1, w2) + b2)
+y3 = tf.nn.relu(tf.matmul(y2, w3) + b3)
+y4 = tf.nn.relu(tf.matmul(y3, w4) + b4)
+ylogits = tf.matmul(y4, w5) + b5
 # model
 y = tf.nn.softmax(ylogits)

data/samples/mnist_data_3.0.rb CHANGED Viewed

@@ -10,6 +10,7 @@ require "bundler/setup"
 require 'tensor_stream'
 require 'mnist-learn'
 require 'pry-byebug'
+require 'csv'
 # Enable OpenCL hardware accelerated computation, not using OpenCL can be very slow
 require 'tensor_stream/opencl'
@@ -21,6 +22,7 @@ puts "Tensorstream version #{tf.__version__} with OpenCL lib #{TensorStream::Ope
 puts "downloading minst data"
 # Download images and labels into mnist.test (10K images+labels) and mnist.train (60K images+labels)
 mnist = Mnist.read_data_sets('/tmp/data', one_hot: true)
 puts "downloading finished"
 # neural network structure for this sample:
@@ -85,13 +87,10 @@ y3 = tf.nn.relu(tf.nn.conv2d(y2, w3, [1, stride, stride, 1], 'SAME') + b3)
 yy = tf.reshape(y3, [-1, 7 * 7 * M])
 y4 = tf.nn.relu(tf.matmul(yy, w4) + b4)
-# dropout to prevent overfitting
-yy4 = tf.nn.dropout(y4, pkeep)
-ylogits = tf.matmul(yy4, w5) + b5
+ylogits = tf.matmul(y4, w5) + b5
 # model
-y = tf.nn.softmax(ylogits)
+y = tf.nn.softmax(ylogits, name: 'out')
@@ -111,16 +110,21 @@ accuracy =  tf.reduce_mean(tf.cast(is_correct, :float32))
 lr = 0.0001.t +  tf.train.exponential_decay(0.003, step, 2000, 1/Math::E)
 train_step = TensorStream::Train::AdamOptimizer.new(lr).minimize(cross_entropy)
-sess = tf.session
+sess = tf.session(profile_enabled: true)
 # Add ops to save and restore all the variables.
 init = tf.global_variables_initializer
 sess.run(init)
+#Setup save and restore
+model_save_path = "test_models/mnist_data_3.0"
+saver = tf::Train::Saver.new
+saver.restore(sess, model_save_path)
 mnist_train = mnist.train
 test_data = { x => mnist.test.images, y_ => mnist.test.labels, pkeep => 1.0 }
 (0..10001).each do |i|
   # load batch of images and correct answers
   batch_x, batch_y = mnist_train.next_batch(100)
@@ -130,7 +134,8 @@ test_data = { x => mnist.test.images, y_ => mnist.test.labels, pkeep => 1.0 }
   sess.run(train_step, feed_dict: train_data)
   if (i % 10 == 0)
-    # File.write("profile.json", TensorStream::ReportTool.profile_for(sess).to_json)
+    # result = TensorStream::ReportTool.profile_for(sess)
+    # File.write("profile.csv", result.map(&:to_csv).join("\n"))
     # success? add code to print it
     a_train, c_train, l = sess.run([accuracy, cross_entropy, lr], feed_dict: { x => batch_x, y_ => batch_y, step => i, pkeep => 1.0})
     puts "#{i}: accuracy:#{a_train} loss:#{c_train} (lr:#{l})"
@@ -140,6 +145,9 @@ test_data = { x => mnist.test.images, y_ => mnist.test.labels, pkeep => 1.0 }
     # success on test data?
     a_test, c_test = sess.run([accuracy, cross_entropy], feed_dict: test_data, pkeep => 1.0)
     puts("#{i}: ******** test accuracy: #{a_test} test loss: #{c_test}")
+    # save current state of the model
+    save_path = saver.save(sess, model_save_path)
   end
 end

data/tensor_stream-opencl.gemspec CHANGED Viewed

@@ -39,7 +39,7 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency "awesome_print"
   spec.add_development_dependency "mnist-learn"
   spec.add_development_dependency "simplecov"
-  spec.add_dependency "tensor_stream", "~> 0.9.8"
+  spec.add_dependency "tensor_stream", "1.0.0-rc1"
   spec.add_dependency "opencl_ruby_ffi"
   spec.add_dependency "oily_png"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: tensor_stream-opencl
 version: !ruby/object:Gem::Version
-  version: 0.2.4
+  version: 0.2.5
 platform: ruby
 authors:
 - Joseph Dayo
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2018-11-25 00:00:00.000000000 Z
+date: 2019-01-06 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -112,16 +112,16 @@ dependencies:
   name: tensor_stream
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - '='
       - !ruby/object:Gem::Version
-        version: 0.9.8
+        version: 1.0.0.pre.rc1
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - '='
       - !ruby/object:Gem::Version
-        version: 0.9.8
+        version: 1.0.0.pre.rc1
 - !ruby/object:Gem::Dependency
   name: opencl_ruby_ffi
   requirement: !ruby/object:Gem::Requirement
@@ -246,6 +246,9 @@ files:
 - lib/tensor_stream/opencl/opencl_evaluator.rb
 - lib/tensor_stream/opencl/opencl_template_helper.rb
 - lib/tensor_stream/opencl/version.rb
+- samples/classify.rb
+- samples/dump_mnist.rb
+- samples/image_sort.rb
 - samples/iris.data
 - samples/iris.rb
 - samples/logistic_regression.rb