RubyGems - tensor_stream-opencl - Versions diffs - 0.2.4 → 0.2.5 - Mend

tensor_stream-opencl 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

checksums.yaml +4 -4
data/.gitignore +3 -0
data/.rubocop.yml +6 -1
data/lib/tensor_stream/opencl/array_ops.rb +58 -55
data/lib/tensor_stream/opencl/nn_ops.rb +57 -56
data/lib/tensor_stream/opencl/opencl_buffer.rb +11 -6
data/lib/tensor_stream/opencl/opencl_evaluator.rb +49 -36
data/lib/tensor_stream/opencl/version.rb +1 -1
data/lib/tensor_stream/opencl.rb +1 -0
data/samples/classify.rb +21 -0
data/samples/dump_mnist.rb +21 -0
data/samples/image_sort.rb +9 -0
data/samples/mnist_data_2.3.rb +4 -12
data/samples/mnist_data_3.0.rb +16 -8
data/tensor_stream-opencl.gemspec +1 -1
metadata +9 -6

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 3e4aa123289372c651cd4da3e7c206abc4f9f67a551d4062180c5cf6555dc243
-  data.tar.gz: 6517954207c85f56cd08b2892b0119d4bb7a35e2d4bd9b9cacc5d3c9ccfb9e42
+  metadata.gz: c0e8de1676b30c21f9529cdce9d3fee406cdf2945d54f890ae49c14c1329860e
+  data.tar.gz: 66932db63589eedcd6247083a27344bed8c80860b6d096d01fb5b46db1b53521
 SHA512:
-  metadata.gz: 7f61d61be79dd1e06ebfdc77ed2dff9e717e0cdb292160fe20c9ca08693d867e1b0e0350c71db5d24feb4671a26e793f44d6b80762c384193c1985b6b1616376
-  data.tar.gz: 72c32530717fac8ff947ce4b204535755134bde14e0f70d0d120ff101b5654843312186317cb480fd5e1c620a25328a3590b1f35193faf1d196e7ad631d169b0
+  metadata.gz: 5b905243d98976c94cb58dd443fc25f0b1e78bba689f20677ee3457dc44641d228642eb65adc4e58227d1feb0686fe29a3cabd8b25910f8e9c1aa0ef575ae8ff
+  data.tar.gz: 03ad5c5cd27ff058df206de8e109699fe0ef492e8ced1b97217cc0fe1ab7a4e43da5db45fed85b2e96b7bb35dc14465e39695b7995b3a1ccfcece7c050e0cae3

data/.gitignore CHANGED Viewed

@@ -1,6 +1,7 @@
 /.bundle/
 /.yardoc
 /_yardoc/
+/test_images/
 /coverage/
 /doc/
 /pkg/
@@ -10,6 +11,8 @@ Gemfile.lock
 *.gem
 *.ckpt
 profile.json
+profile.csv
+/test_models/
 # rspec failure tracking
 .rspec_status

data/.rubocop.yml CHANGED Viewed

@@ -34,6 +34,7 @@ Metrics/CyclomaticComplexity:
 Metrics/BlockLength:
   Exclude:
     - lib/tensor_stream/math_gradients.rb
+    - benchmark/benchmark.rb
 Naming/AccessorMethodName:
   Exclude:
@@ -86,4 +87,8 @@ Style/TrailingCommaInHashLiteral:
 Naming/UncommunicativeMethodParamName:
   Exclude:
     - lib/tensor_stream/evaluator/ruby_evaluator.rb
-    - lib/tensor_stream/ops.rb
+    - lib/tensor_stream/ops.rb
+Style/BlockDelimiters:
+  Exclude:
+    - benchmark/benchmark.rb

data/lib/tensor_stream/opencl/array_ops.rb CHANGED Viewed

@@ -103,26 +103,27 @@ module TensorStream
                         end
                       else
                         raise TensorStream::ValueError, "#{num_split} does not divide #{value_shape[axis]} evenly" if num_split.reduce(:+) != value_shape[axis]
                         # compute shapes of individual output buffers
                         new_shapes = num_split.each_with_index.collect do |num, index|
                                        new_shape = value_shape.dup
                                        new_shape[axis] = num
                                        new_shape
                                      end
+                        out = []
                         if axis.zero? # axis zero fast copy path
                           start = 0
-                          out = []
-                          new_shapes.each_with_index do |new_shape, index|
-                            element_count = new_shape.reduce(:*) || 1
+                          new_shapes.each_with_index do |ns, index|
+                            element_count = ns.reduce(:*) || 1
                             region_size_in_bytes = element_count * value.buffer.element_size
-                            out << _create_variable_result_sub_buffer(value, index, start, region_size_in_bytes, tensor.data_type, new_shape, "#{tensor.name}/out_#{index}_#{new_shape.join('.')}")
+                            out << _create_variable_result_sub_buffer(value, index, start, region_size_in_bytes, tensor.data_type, ns, "#{tensor.name}/out_#{index}_#{ns.join('.')}")
                             start += region_size_in_bytes
                           end
-                          out
                         else
                           # create buffers for each piece
                           work_buffer = _create_result_buffer(tensor.data_type, value_shape, "#{tensor.name}/out")
-                          out = []
                           start = 0
                           steps = num_split.dup.reverse.drop(1).inject([0]) do |a, s|
@@ -157,14 +158,15 @@ module TensorStream
                                                                 event_wait_list: event_wait_list)
                           end
                           work_buffer.op = events
-                          new_shapes.each_with_index do |new_shape, index|
-                            element_count = new_shape.reduce(:*) || 1
+                          new_shapes.each_with_index do |ns, index|
+                            element_count = ns.reduce(:*) || 1
                             region_size_in_bytes = element_count * work_buffer.buffer.element_size
-                            out << _create_variable_result_sub_buffer(work_buffer, index, start, region_size_in_bytes, tensor.data_type, new_shape, "#{tensor.name}/out_#{index}_#{new_shape.join('.')}")
+                            out << _create_variable_result_sub_buffer(work_buffer, index, start, region_size_in_bytes, tensor.data_type, ns, "#{tensor.name}/out_#{index}_#{new_shape.join('.')}")
                             start += region_size_in_bytes
                           end
-                          out
                         end
+                        out
                       end
             TensorStream::Evaluator::OutputGroup.new(outputs, outputs.map(&:data_type))
@@ -195,58 +197,57 @@ module TensorStream
             output_buffer = _create_result_buffer(tensor.data_type, new_shape, tensor.name)
             ops = if axis.zero? # fast path
-              inputs.each_with_index.map do |input, index|
-                next if input.empty_value?
-                start = index * input.buffer.size * input.buffer.element_size
-                region = [input.buffer.size * input.buffer.element_size, 1, 1]
-                event_wait_list = build_event_wait_list(input)
-                _opencl_queue.enqueue_copy_buffer_rect(input.cl_buffer, output_buffer.cl_buffer,
-                      region, dst_origin: [start, 0, 0], event_wait_list: event_wait_list)
-              end.compact
-            else
-              elem_size = shape.empty? ? 1 : shape.reduce(:*)
-              cl_n = OpenCL::Int1.new(elem_size)
+                    inputs.each_with_index.map do |input, index|
+                      next if input.empty_value?
-              steps = inputs.map(&:shape).reverse.drop(1).inject([0]) do |a, shape|
-                a << shape[axis] + a.last
-              end
+                      start = index * input.buffer.size * input.buffer.element_size
+                      region = [input.buffer.size * input.buffer.element_size, 1, 1]
+                      event_wait_list = build_event_wait_list(input)
+                      _opencl_queue.enqueue_copy_buffer_rect(input.cl_buffer, output_buffer.cl_buffer,
+                            region, dst_origin: [start, 0, 0], event_wait_list: event_wait_list)
+                    end.compact
+                  else
+                    elem_size = shape.empty? ? 1 : shape.reduce(:*)
+                    cl_n = OpenCL::Int1.new(elem_size)
-              work_group = [elem_size]
-              event_wait_list = build_event_wait_list(inputs)
+                    steps = inputs.map(&:shape).reverse.drop(1).inject([0]) do |a, shape|
+                      a << shape[axis] + a.last
+                    end
+                    work_group = [elem_size]
+                    event_wait_list = build_event_wait_list(inputs)
+                    inputs.each_with_index.map do |input, index|
+                      cl_index = OpenCL::Int1.new(index)
+                      step = OpenCL::Int1.new(steps[index])
+                      _cl_program('concat', data_type: tensor.data_type, divisors: divisors, multipliers: multipliers, axis: axis).
+                                    concat(_opencl_queue, work_group, cl_n, cl_index, step, input.cl_buffer,
+                                          output_buffer.cl_buffer, event_wait_list: event_wait_list)
+                    end
+                  end
-              inputs.each_with_index.map do |input, index|
-                cl_index = OpenCL::Int1.new(index)
-                step = OpenCL::Int1.new(steps[index])
-                _cl_program('concat', data_type: tensor.data_type, divisors: divisors, multipliers: multipliers, axis: axis).
-                              concat(_opencl_queue, work_group, cl_n, cl_index, step, input.cl_buffer,
-                                     output_buffer.cl_buffer, event_wait_list: event_wait_list)
-              end
-            end
             output_buffer.op = ops
             output_buffer
           end
-          register_op :squeeze do |context, tensor, inputs|
+          register_op :squeeze do |_context, tensor, inputs|
             arr = inputs[0]
             shape = inputs[0].shape.dup
             axis = !tensor.options[:axis].is_a?(Array) ? [tensor.options[:axis]] : tensor.options[:axis]
             if !axis.empty?
-              axis.each do |axis|
-                if shape[axis] == 1
-                  shape[axis] = nil
-                else
-                  raise TensorStream::ValueError, "unable to squeeze dimension that does not have a size of 1"
-                end
+              axis.each do |x|
+                raise TensorStream::ValueError, "unable to squeeze dimension that does not have a size of 1" unless shape[x] == 1
+                shape[x] = nil
               end
             else
               shape = shape.map { |s| s == 1 ? nil : s }
             end
             OpenCLBuffer.new(self, name: tensor.name, data_type: tensor.data_type,
-              shape: shape.compact, buffer: arr.buffer,
-              cl_buffer: arr.cl_buffer,
-              op: arr.op)
+                                   shape: shape.compact, buffer: arr.buffer,
+                                   cl_buffer: arr.cl_buffer,
+                                   op: arr.op)
           end
           register_op :stack do |_context, tensor, inputs|
@@ -312,7 +313,6 @@ module TensorStream
               a << s * a.last
             end.reverse
-            step = multipliers[0]
             sub_shape = new_shape.dup
             sub_shape.shift
@@ -375,9 +375,9 @@ module TensorStream
                     end
             OpenCLBuffer.new(self, name: tensor.name, data_type: tensor.data_type,
-                             shape: shape, buffer: arr.buffer,
-                             cl_buffer: arr.cl_buffer,
-                             op: arr.op)
+                                   shape: shape, buffer: arr.buffer,
+                                   cl_buffer: arr.cl_buffer,
+                                   op: arr.op)
           end
           register_op :transpose, buffer: true do |_context, tensor, inputs|
@@ -407,7 +407,10 @@ module TensorStream
             shape = input_a.shape
-            slice_param = input_b.zip(size).collect.with_index { | p, index|  p[1] = (p[1] == -1) ? shape[index] : p[1] ; p[0]..p[0] + p[1] - 1 }.reverse
+            slice_param = input_b.zip(size).collect.with_index do |p, index|
+              p[1] = p[1] == -1 ? shape[index] : p[1]
+              p[0]..p[0] + p[1] - 1
+            end.reverse
             new_buf = input_a.buffer.reshape(*input_a.shape.reverse)
             sliced = new_buf.slice[*slice_param]
@@ -423,11 +426,11 @@ module TensorStream
             if a.data_type != tensor.data_type
               buffer = _create_result_buffer(tensor.data_type, a.shape, tensor.name)
               work_group = if inputs[0].shape.size > 2
-                              [ inputs[0].shape.reduce(:*) / inputs[0].shape.last, inputs[0].shape.last]
-                            else
-                              m, n = inputs[0].shape
-                              [m || 1, n || 1]
-                            end
+                             [inputs[0].shape.reduce(:*) / inputs[0].shape.last, inputs[0].shape.last]
+                           else
+                             m, n = inputs[0].shape
+                             [m || 1, n || 1]
+                           end
               cl_m = OpenCL::Int1.new(work_group[0])
               cl_n = OpenCL::Int1.new(work_group[1])

data/lib/tensor_stream/opencl/nn_ops.rb CHANGED Viewed

@@ -11,12 +11,12 @@ module TensorStream
             assign = tensor.inputs[0] || tensor
-            assign.buffer.dirty = true # force buffer copy when variable is read externally
-            output_buffer = assign.buffer
+            assign.container_buffer.dirty = true # force buffer copy when variable is read externally
+            output_buffer = assign.container_buffer
             work_group = [output_buffer.total_elements]
-            event_wait_list = build_event_wait_list([assign.buffer, learning_rate, delta])
+            event_wait_list = build_event_wait_list([assign.container_buffer, learning_rate, delta])
             event = call_program("apply_gradient", output_buffer.data_type,
                            work_group,
@@ -33,21 +33,21 @@ module TensorStream
             assign = tensor.inputs[0] || tensor
             assign_acc = tensor.inputs[1]
-            assign.buffer.dirty = true # force buffer copy when variable is read externally
-            assign_acc.buffer.dirty = true # force buffer copy when variable is read externally
+            assign.container_buffer.dirty = true # force buffer copy when variable is read externally
+            assign_acc.container_buffer.dirty = true # force buffer copy when variable is read externally
-            output_buffer = assign.buffer
+            output_buffer = assign.container_buffer
             work_group = [output_buffer.total_elements]
-            event_wait_list = build_event_wait_list([assign.buffer, assign_acc.buffer, learning_rate, grad, momentum])
+            event_wait_list = build_event_wait_list([assign.container_buffer, assign_acc.container_buffer, learning_rate, grad, momentum])
             method_call = :"apply_momentum_#{output_buffer.data_type}"
             event = _cl_program("apply_momentum", nesterov: tensor.options[:use_nesterov], dtype: output_buffer.data_type).
                         send(method_call, _opencl_queue, work_group, grad.cl_buffer,
                             learning_rate.cl_buffer, momentum.cl_buffer, output_buffer.cl_buffer,
-                            assign_acc.buffer.cl_buffer, event_wait_list: event_wait_list)
+                            assign_acc.container_buffer.cl_buffer, event_wait_list: event_wait_list)
             output_buffer.op = event
-            assign_acc.buffer.op = event
+            assign_acc.container_buffer.op = event
             output_buffer
           end
@@ -58,11 +58,11 @@ module TensorStream
             assign_acc_update = tensor.inputs[2]
             # mark variable buffers as dirty
-            assign.buffer.dirty = true # force buffer copy when variable is read externally
-            assign_acc.buffer.dirty = true # force buffer copy when variable is read externally
-            assign_acc_update.buffer.dirty = true # force buffer copy when variable is read externally
+            assign.container_buffer.dirty = true # force buffer copy when variable is read externally
+            assign_acc.container_buffer.dirty = true # force buffer copy when variable is read externally
+            assign_acc_update.container_buffer.dirty = true # force buffer copy when variable is read externally
-            output_buffer = assign.buffer
+            output_buffer = assign.container_buffer
             work_group = [output_buffer.total_elements]
@@ -73,13 +73,13 @@ module TensorStream
                                       rho.cl_buffer,
                                       epsilon.cl_buffer,
                                       grad.cl_buffer,
-                                      assign.buffer.cl_buffer,
-                                      assign_acc.buffer.cl_buffer,
-                                      assign_acc_update.buffer.cl_buffer,
+                                      assign.container_buffer.cl_buffer,
+                                      assign_acc.container_buffer.cl_buffer,
+                                      assign_acc_update.container_buffer.cl_buffer,
                                       event_wait_list: event_wait_list)
             output_buffer.op = event
-            assign_acc.buffer.op = event
-            assign_acc_update.buffer.op = event
+            assign_acc.container_buffer.op = event
+            assign_acc_update.container_buffer.op = event
             output_buffer
           end
@@ -92,11 +92,11 @@ module TensorStream
             assign_v = tensor.inputs[2]
             # mark variable buffers as dirty
-            assign.buffer.dirty = true # force buffer copy when variable is read externally
-            assign_m.buffer.dirty = true # force buffer copy when variable is read externally
-            assign_v.buffer.dirty = true # force buffer copy when variable is read externally
+            assign.container_buffer.dirty = true # force buffer copy when variable is read externally
+            assign_m.container_buffer.dirty = true # force buffer copy when variable is read externally
+            assign_v.container_buffer.dirty = true # force buffer copy when variable is read externally
-            output_buffer = assign.buffer
+            output_buffer = assign.container_buffer
             work_group = [output_buffer.total_elements]
@@ -110,13 +110,13 @@ module TensorStream
                                       beta1_t.cl_buffer,
                                       beta2_t.cl_buffer,
                                       epsilon_t.cl_buffer,
-                                      assign_m.buffer.cl_buffer,
-                                      assign.buffer.cl_buffer,
-                                      assign_v.buffer.cl_buffer,
+                                      assign_m.container_buffer.cl_buffer,
+                                      assign.container_buffer.cl_buffer,
+                                      assign_v.container_buffer.cl_buffer,
                                       event_wait_list: event_wait_list)
             output_buffer.op = event
-            assign_m.buffer.op = event
-            assign_v.buffer.op = event
+            assign_m.container_buffer.op = event
+            assign_v.container_buffer.op = event
             output_buffer
           end
@@ -126,9 +126,9 @@ module TensorStream
             assign = tensor.inputs[0] || tensor
             assign_acc = tensor.inputs[1]
-            assign.buffer.dirty = true
-            assign_acc.buffer.dirty = true
-            output_buffer = assign.buffer
+            assign.container_buffer.dirty = true
+            assign_acc.container_buffer.dirty = true
+            output_buffer = assign.container_buffer
             work_group = [output_buffer.total_elements]
@@ -138,11 +138,11 @@ module TensorStream
                                       work_group,
                                       lr.cl_buffer,
                                       grad.cl_buffer,
-                                      assign.buffer.cl_buffer,
-                                      assign_acc.buffer.cl_buffer,
+                                      assign.container_buffer.cl_buffer,
+                                      assign_acc.container_buffer.cl_buffer,
                                       event_wait_list: event_wait_list)
             output_buffer.op = event
-            assign_acc.buffer.op = event
+            assign_acc.container_buffer.op = event
             output_buffer
           end
@@ -154,11 +154,11 @@ module TensorStream
             assign_ms = tensor.inputs[2]
             assign_mom = tensor.inputs[3]
-            assign.buffer.dirty = true
-            assign_mg.buffer.dirty = true
-            assign_ms.buffer.dirty = true
-            assign_mom.buffer.dirty = true
-            output_buffer = assign.buffer
+            assign.container_buffer.dirty = true
+            assign_mg.container_buffer.dirty = true
+            assign_ms.container_buffer.dirty = true
+            assign_mom.container_buffer.dirty = true
+            output_buffer = assign.container_buffer
             event_wait_list = build_event_wait_list(inputs)
             work_group = [output_buffer.total_elements]
@@ -168,30 +168,30 @@ module TensorStream
                             momentum.cl_buffer,
                             epsilon.cl_buffer,
                             grad.cl_buffer,
-                            assign.buffer.cl_buffer,
-                            assign_ms.buffer.cl_buffer,
-                            assign_mg.buffer.cl_buffer,
-                            assign_mom.buffer.cl_buffer,
+                            assign.container_buffer.cl_buffer,
+                            assign_ms.container_buffer.cl_buffer,
+                            assign_mg.container_buffer.cl_buffer,
+                            assign_mom.container_buffer.cl_buffer,
                             event_wait_list: event_wait_list)
             output_buffer.op = event
-            assign_mg.buffer.op = event
-            assign_ms.buffer.op = event
-            assign_mom.buffer.op = event
+            assign_mg.container_buffer.op = event
+            assign_ms.container_buffer.op = event
+            assign_mom.container_buffer.op = event
             output_buffer
           end
-          register_op :apply_rms_prop do |context, tensor, inputs|
+          register_op :apply_rms_prop do |_context, tensor, inputs|
             var, ms, mom, lr, rho, momentum, epsilon, grad = inputs
             assign = tensor.inputs[0]
             assign_ms = tensor.inputs[1]
             assign_mom = tensor.inputs[2]
-            assign.buffer.dirty = true
-            assign_ms.buffer.dirty = true
-            assign_mom.buffer.dirty = true
-            output_buffer = assign.buffer
+            assign.container_buffer.dirty = true
+            assign_ms.container_buffer.dirty = true
+            assign_mom.container_buffer.dirty = true
+            output_buffer = assign.container_buffer
             event_wait_list = build_event_wait_list(inputs)
             work_group = [output_buffer.total_elements]
@@ -202,14 +202,14 @@ module TensorStream
                             momentum.cl_buffer,
                             epsilon.cl_buffer,
                             grad.cl_buffer,
-                            assign.buffer.cl_buffer,
-                            assign_ms.buffer.cl_buffer,
-                            assign_mom.buffer.cl_buffer,
+                            assign.container_buffer.cl_buffer,
+                            assign_ms.container_buffer.cl_buffer,
+                            assign_mom.container_buffer.cl_buffer,
                             event_wait_list: event_wait_list)
             output_buffer.op = event
-            assign_ms.buffer.op = event
-            assign_mom.buffer.op = event
+            assign_ms.container_buffer.op = event
+            assign_mom.container_buffer.op = event
             output_buffer
           end
@@ -273,7 +273,7 @@ module TensorStream
             output_buffer_backprop.op = event
             loss = reduction(context, tensor, output_buffer, rank, :sum)
-            TensorStream::Evaluator::OutputGroup.new([loss, output_buffer_backprop],  [tensor.inputs[0].data_type, tensor.inputs[0].data_type])
+            TensorStream::Evaluator::OutputGroup.new([loss, output_buffer_backprop], [tensor.inputs[0].data_type, tensor.inputs[0].data_type])
           end
           register_op :softmax_cross_entropy_with_logits_v2_grad do |_context, tensor, inputs|
@@ -370,6 +370,7 @@ module TensorStream
             raise TensorStream::ValueError, " Current implementation does not yet support strides in the batch and depth dimensions." if strides[0] != 1 || strides[3] != 1
             padding_option = tensor.options[:padding]
             padding = conv2d_padding_options(padding_option, filter_shape, height, width, height_stride, width_stride)
             event_wait_list = build_event_wait_list(inputs)

data/lib/tensor_stream/opencl/opencl_buffer.rb CHANGED Viewed

@@ -33,13 +33,18 @@ module TensorStream
       end
       if shape.empty?
-        return buffer.to_s if data_type == :string
-        return buffer[0] != 0 if data_type == :boolean
-        return buffer[0]
+        return case data_type
+               when :string
+                 buffer.to_s
+               when :boolean
+                 buffer[0] != 0
+               else
+                 buffer[0]
+               end
       end
-      result = buffer.reshape(*shape.map(&:to_i).reverse).to_a
-      data_type == :boolean ? process_function_op(result, ->(a, _b) { a != 0 }) : result
+      result = buffer.reshape(*shape.map(&:to_i).reverse).to_a
+      data_type == :boolean ? process_function_op(result) { |a, _b|  a != 0 } : result
     end
     def self.nil_buffer(owner, name, data_type)

data/lib/tensor_stream/opencl/opencl_evaluator.rb CHANGED Viewed

@@ -225,7 +225,6 @@ module TensorStream
       def prepare_input(tensor, context, options = {})
         return nil unless tensor
-        tensor = resolve_placeholder(tensor)
         if options[:noop]
           tensor
         elsif options[:buffer]
@@ -329,30 +328,18 @@ module TensorStream
         tensor = tensor.call if tensor.is_a?(Proc)
         child_context = execution_context.dup
-        res = if tensor.is_a?(Operation)
-                if !on_same_device?(tensor) # tensor is on another device or evaluator
-                  perform_transition(tensor, tensor, @context[:_cache][:placement][tensor.name][1], execution_context)
-                else
-                  eval_operation(tensor, child_context)
-                end
-              elsif tensor.is_a?(Variable)
-                eval_variable(tensor, child_context)
-              elsif tensor.is_a?(Placeholder)
-                resolve_placeholder(tensor, child_context)
+        res = if !on_same_device?(tensor) # tensor is on another device or evaluator
+                perform_transition(tensor, tensor, @context[:_cache][:placement][tensor.name][1], execution_context)
+              elsif tensor.is_a?(Operation)
+                eval_operation(tensor, child_context)
               else
-                eval_tensor(tensor, child_context)
+                raise "invalid tensor type!"
               end
         execution_context.deep_merge!(returns: child_context[:returns])
         res
       end
-      def eval_variable(tensor, _child_context)
-        raise "variable #{tensor.name} not initalized" if tensor.value.nil? && (tensor.buffer.nil? || !tensor.buffer.dirty)
-        tensor.buffer = wrap_opencl(tensor, name: tensor.name) if tensor.buffer.nil?
-        tensor.buffer
-      end
       register_op :no_op do |_context, _tensor, _inputs|
       end
@@ -396,14 +383,14 @@ module TensorStream
       end
       %i[less less_equal greater greater_equal equal not_equal logical_and].each do |op|
-        register_op op do |context, tensor, inputs|
+        register_op op do |_context, tensor, inputs|
           execute_2_operand_func(op.to_s, tensor, inputs[0], inputs[1], 'cond')
         end
       end
       register_op :where, noop: true do |context, tensor, inputs|
-        pred = tensor.options[:pred]
-        execute_cond_func('where', tensor, pred, inputs[0], inputs[1], context)
+        pred = inputs[0]
+        execute_cond_func('where', tensor, pred, inputs[1], inputs[2], context)
       end
       register_op :check_numerics, noop: true do |context, tensor, inputs|
@@ -455,10 +442,36 @@ module TensorStream
         nil
       end
+      register_op :const do |_context, tensor, inputs|
+        wrap_opencl(tensor.const_value, name: tensor.name, data_type: tensor.data_type)
+      end
       register_op :size do |_context, tensor, inputs|
         wrap_opencl(inputs[0].buffer.size, name: tensor.name, data_type: tensor.options[:out_type] || :int32)
       end
+      register_op :restore_ts do |context, tensor, inputs|
+        inputs = inputs.dup
+        filename = inputs.shift
+        tensor_names = inputs
+        filename = read_final_result(complete_eval(filename, context))
+        tensor_names.map! { |n| read_final_result(complete_eval(n, context)) }
+        input_dump = YAML.safe_load(File.read(filename), [Symbol])
+        vars = tensor.graph.get_collection(GraphKeys::GLOBAL_VARIABLES)
+        vars.select! { |v| input_dump['variables'].key?(v.name) && tensor_names.include?(v.name) }
+        vars.each do |variable|
+          data = TensorStream::Packer.unpack(Zlib::Inflate.inflate(Base64.decode64(input_dump['variables'][variable.name]['data'])), variable.data_type)
+          shape = input_dump['variables'][variable.name]['shape']
+          variable.buffer = convert_to_opencl(data, shape, data_type: variable.data_type, name: variable.name)
+          variable.value = TensorShape.reshape(data, shape)
+        end
+        nil
+      end
       def eval_operation(tensor, child_context)
         cache_key = "#{tensor.graph.object_id}_opencl_#{tensor.name}:#{object_id}"
         return @context[:_cache][cache_key] if @context[:_cache].key?(cache_key)
@@ -514,7 +527,7 @@ module TensorStream
         # File.write('/home/jedld/workspace/tensor_stream/samples/error.graphml', TensorStream::Graphml.new.get_string(tensor, @session))
         # File.write('/Users/josephemmanueldayo/workspace/gradients.graphml', TensorStream::Graphml.new.get_string(tensor, @session))
-        raise EvaluatorExcecutionException.new(e, tensor), "error #{e.message} while evaluating #{tensor.name} : #{tensor.to_math(true, 1)} defined at #{tensor.source}"
+        raise EvaluatorExcecutionException.new(e, tensor), "error #{e.message} while evaluating #{tensor.name} : defined at #{tensor.source}"
       end
       def eval_tensor(tensor, child_context)
@@ -539,21 +552,21 @@ module TensorStream
         assign = tensor.inputs[0] || tensor
         buffer = complete_eval(b, child_context)
-        if assign.buffer
-          event_wait_list = build_event_wait_list([buffer, assign.buffer])
-          assign.buffer.op = if assign.buffer.cl_buffer != buffer.cl_buffer
-                               _opencl_queue.enqueue_copy_buffer(buffer.cl_buffer, assign.buffer.cl_buffer, event_wait_list: event_wait_list)
-                             else
-                               buffer.op
-                             end
+        if assign.container_buffer
+          event_wait_list = build_event_wait_list([buffer, assign.container_buffer])
+          assign.container_buffer.op = if assign.container_buffer.cl_buffer != buffer.cl_buffer
+                                         _opencl_queue.enqueue_copy_buffer(buffer.cl_buffer, assign.container_buffer.cl_buffer, event_wait_list: event_wait_list)
+                                       else
+                                         buffer.op
+                                       end
         else
           value = read_final_result(buffer)
-          assign.buffer = convert_to_opencl(value, buffer.shape, data_type: tensor.data_type, name: assign.name)
-          assign.value = value
+          assign.options[:container].buffer = convert_to_opencl(value, buffer.shape, data_type: tensor.data_type, name: assign.name)
+          assign.options[:container].value = value
         end
-        assign.buffer.dirty = true
-        assign.buffer
+        assign.container_buffer.dirty = true
+        assign.container_buffer
       end
       def execute_2_operand_func(op_name, tensor, a, b, prog_name = nil)
@@ -572,7 +585,7 @@ module TensorStream
                        [m || 1, n || 1]
                      elsif (b.shape.size == 1) && (result_shape.last == b.shape.last)
                       last_dim = b.shape.last
-                      [result_shape.reduce(:*) / last_dim, last_dim]
+                      [result_shape.reduce(:*) / last_dim, last_dim]
                      else
                        raise "rank > 2 not supported for now"
                      end
@@ -622,7 +635,7 @@ module TensorStream
         work_group = if p.shape.size > 2
                        [m, p.shape.reduce(:*) / m]
                      else
-                       [ m || 1, n || 1]
+                       [m || 1, n || 1]
                      end
         cl_m = OpenCL::Int1.new(work_group[0])

data/lib/tensor_stream/opencl/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module TensorStream
   module Opencl
-    VERSION = "0.2.4"
+    VERSION = "0.2.5"
   end
 end

data/lib/tensor_stream/opencl.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 require "tensor_stream/opencl/version"
+require 'tensor_stream'
 require "tensor_stream/opencl/opencl_evaluator"
 module TensorStream

data/samples/classify.rb ADDED Viewed

@@ -0,0 +1,21 @@
+#!/usr/bin/env ruby
+require "bundler/setup"
+require "tensor_stream"
+require 'mnist-learn'
+require 'fileutils'
+file_path = ARGV[0]
+model_path = ARGV[1]
+decoded_image = TensorStream.image.decode_png(File.read(file_path), channels: 1)
+target_graph = TensorStream::YamlLoader.new.load_from_file(model_path)
+input = target_graph['Placeholder']
+output = TensorStream.argmax(target_graph['out'], 1)
+sess = TensorStream.session
+reshaped_image = 255.0.t - decoded_image.reshape([1, 28, 28, 1]).cast(:float32)
+result = sess.run(output, feed_dict: { input => reshaped_image})
+puts "image is a #{result.first}"

data/samples/dump_mnist.rb ADDED Viewed

@@ -0,0 +1,21 @@
+#!/usr/bin/env ruby
+require "bundler/setup"
+require "tensor_stream"
+require 'mnist-learn'
+require 'fileutils'
+mnist = Mnist.read_data_sets('/tmp/data', one_hot: true)
+ts = TensorStream
+test_data = mnist.test.images
+FileUtils.mkdir_p 'test_images'
+sess = ts.session
+test_data.each_with_index do |image , index|
+  image = 255.t - ts.cast(ts.reshape(image, [28, 28, 1]), :uint8) # reshape image
+  encoder = ts.image.encode_png(image)
+  blob = sess.run(encoder)
+  File.write(File.join('test_images', "#{index}_image.png"), blob)
+end

data/samples/image_sort.rb ADDED Viewed

@@ -0,0 +1,9 @@
+require "bundler/setup"
+require 'tensor_stream'
+require 'mnist-learn'
+require 'csv'
+# Enable OpenCL hardware accelerated computation, not using OpenCL can be very slow
+require 'tensor_stream/opencl'

data/samples/mnist_data_2.3.rb CHANGED Viewed

@@ -54,18 +54,10 @@ b5 = tf.variable(tf.zeros([10]))
 x_ = tf.reshape(x, [-1, 784])
 y1 = tf.nn.relu(tf.matmul(x_, w1) + b1)
-y1d = tf.nn.dropout(y1, pkeep)
-y2 = tf.nn.relu(tf.matmul(y1d, w2) + b2)
-y2d = tf.nn.dropout(y2, pkeep)
-y3 = tf.nn.relu(tf.matmul(y2d, w3) + b3)
-y3d = tf.nn.dropout(y3, pkeep)
-y4 = tf.nn.relu(tf.matmul(y3d, w4) + b4)
-y4d = tf.nn.dropout(y4, pkeep)
-ylogits = tf.matmul(y4d, w5) + b5
+y2 = tf.nn.relu(tf.matmul(y1, w2) + b2)
+y3 = tf.nn.relu(tf.matmul(y2, w3) + b3)
+y4 = tf.nn.relu(tf.matmul(y3, w4) + b4)
+ylogits = tf.matmul(y4, w5) + b5
 # model
 y = tf.nn.softmax(ylogits)

data/samples/mnist_data_3.0.rb CHANGED Viewed

@@ -10,6 +10,7 @@ require "bundler/setup"
 require 'tensor_stream'
 require 'mnist-learn'
 require 'pry-byebug'
+require 'csv'
 # Enable OpenCL hardware accelerated computation, not using OpenCL can be very slow
 require 'tensor_stream/opencl'
@@ -21,6 +22,7 @@ puts "Tensorstream version #{tf.__version__} with OpenCL lib #{TensorStream::Ope
 puts "downloading minst data"
 # Download images and labels into mnist.test (10K images+labels) and mnist.train (60K images+labels)
 mnist = Mnist.read_data_sets('/tmp/data', one_hot: true)
 puts "downloading finished"
 # neural network structure for this sample:
@@ -85,13 +87,10 @@ y3 = tf.nn.relu(tf.nn.conv2d(y2, w3, [1, stride, stride, 1], 'SAME') + b3)
 yy = tf.reshape(y3, [-1, 7 * 7 * M])
 y4 = tf.nn.relu(tf.matmul(yy, w4) + b4)
-# dropout to prevent overfitting
-yy4 = tf.nn.dropout(y4, pkeep)
-ylogits = tf.matmul(yy4, w5) + b5
+ylogits = tf.matmul(y4, w5) + b5
 # model
-y = tf.nn.softmax(ylogits)
+y = tf.nn.softmax(ylogits, name: 'out')
@@ -111,16 +110,21 @@ accuracy =  tf.reduce_mean(tf.cast(is_correct, :float32))
 lr = 0.0001.t +  tf.train.exponential_decay(0.003, step, 2000, 1/Math::E)
 train_step = TensorStream::Train::AdamOptimizer.new(lr).minimize(cross_entropy)
-sess = tf.session
+sess = tf.session(profile_enabled: true)
 # Add ops to save and restore all the variables.
 init = tf.global_variables_initializer
 sess.run(init)
+#Setup save and restore
+model_save_path = "test_models/mnist_data_3.0"
+saver = tf::Train::Saver.new
+saver.restore(sess, model_save_path)
 mnist_train = mnist.train
 test_data = { x => mnist.test.images, y_ => mnist.test.labels, pkeep => 1.0 }
 (0..10001).each do |i|
   # load batch of images and correct answers
   batch_x, batch_y = mnist_train.next_batch(100)
@@ -130,7 +134,8 @@ test_data = { x => mnist.test.images, y_ => mnist.test.labels, pkeep => 1.0 }
   sess.run(train_step, feed_dict: train_data)
   if (i % 10 == 0)
-    # File.write("profile.json", TensorStream::ReportTool.profile_for(sess).to_json)
+    # result = TensorStream::ReportTool.profile_for(sess)
+    # File.write("profile.csv", result.map(&:to_csv).join("\n"))
     # success? add code to print it
     a_train, c_train, l = sess.run([accuracy, cross_entropy, lr], feed_dict: { x => batch_x, y_ => batch_y, step => i, pkeep => 1.0})
     puts "#{i}: accuracy:#{a_train} loss:#{c_train} (lr:#{l})"
@@ -140,6 +145,9 @@ test_data = { x => mnist.test.images, y_ => mnist.test.labels, pkeep => 1.0 }
     # success on test data?
     a_test, c_test = sess.run([accuracy, cross_entropy], feed_dict: test_data, pkeep => 1.0)
     puts("#{i}: ******** test accuracy: #{a_test} test loss: #{c_test}")
+    # save current state of the model
+    save_path = saver.save(sess, model_save_path)
   end
 end

data/tensor_stream-opencl.gemspec CHANGED Viewed

@@ -39,7 +39,7 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency "awesome_print"
   spec.add_development_dependency "mnist-learn"
   spec.add_development_dependency "simplecov"
-  spec.add_dependency "tensor_stream", "~> 0.9.8"
+  spec.add_dependency "tensor_stream", "1.0.0-rc1"
   spec.add_dependency "opencl_ruby_ffi"
   spec.add_dependency "oily_png"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: tensor_stream-opencl
 version: !ruby/object:Gem::Version
-  version: 0.2.4
+  version: 0.2.5
 platform: ruby
 authors:
 - Joseph Dayo
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2018-11-25 00:00:00.000000000 Z
+date: 2019-01-06 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -112,16 +112,16 @@ dependencies:
   name: tensor_stream
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - '='
       - !ruby/object:Gem::Version
-        version: 0.9.8
+        version: 1.0.0.pre.rc1
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - '='
       - !ruby/object:Gem::Version
-        version: 0.9.8
+        version: 1.0.0.pre.rc1
 - !ruby/object:Gem::Dependency
   name: opencl_ruby_ffi
   requirement: !ruby/object:Gem::Requirement
@@ -246,6 +246,9 @@ files:
 - lib/tensor_stream/opencl/opencl_evaluator.rb
 - lib/tensor_stream/opencl/opencl_template_helper.rb
 - lib/tensor_stream/opencl/version.rb
+- samples/classify.rb
+- samples/dump_mnist.rb
+- samples/image_sort.rb
 - samples/iris.data
 - samples/iris.rb
 - samples/logistic_regression.rb