RubyGems - tensor_stream - Versions diffs - 0.5.1 → 0.6.0 - Mend

tensor_stream 0.5.1 → 0.6.0

Files changed (27) hide show

checksums.yaml +5 -5
data/CHANGELOG.md +9 -0
data/benchmark_ryzen_amd.txt +36 -0
data/lib/tensor_stream/dynamic_stitch.rb +28 -0
data/lib/tensor_stream/evaluator/base_evaluator.rb +32 -3
data/lib/tensor_stream/evaluator/opencl/kernels/floor_div.cl +48 -0
data/lib/tensor_stream/evaluator/opencl/kernels/mod.cl +3 -0
data/lib/tensor_stream/evaluator/opencl/kernels/squared_difference.cl +53 -0
data/lib/tensor_stream/evaluator/opencl/opencl_buffer.rb +1 -2
data/lib/tensor_stream/evaluator/opencl/opencl_evaluator.rb +44 -24
data/lib/tensor_stream/evaluator/opencl/opencl_template_helper.rb +2 -0
data/lib/tensor_stream/evaluator/operation_helpers/array_ops_helper.rb +21 -11
data/lib/tensor_stream/evaluator/ruby_evaluator.rb +165 -48
data/lib/tensor_stream/graph_serializers/pbtext.rb +8 -0
data/lib/tensor_stream/helpers/op_helper.rb +41 -4
data/lib/tensor_stream/math_gradients.rb +64 -64
data/lib/tensor_stream/nn/nn_ops.rb +6 -2
data/lib/tensor_stream/operation.rb +17 -3
data/lib/tensor_stream/ops.rb +47 -0
data/lib/tensor_stream/session.rb +9 -1
data/lib/tensor_stream/tensor.rb +15 -0
data/lib/tensor_stream/utils.rb +5 -1
data/lib/tensor_stream/version.rb +1 -1
data/lib/tensor_stream.rb +1 -0
data/samples/nearest_neighbor.rb +1 -1
data/test_samples/raw_neural_net_sample.rb +6 -7
metadata +8 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
-SHA1:
-  metadata.gz: f14dd6388d5cdd10827cebde01a9cbca0686b653
-  data.tar.gz: d2ccba35defe6474a21bd75fcb09f8d49ce42e79
+SHA256:
+  metadata.gz: 6d647cef8f32fa7b3c10460365adfc55ccdd9872e71d453df090986349b615f5
+  data.tar.gz: baa7be71775bc5d39396343b6d4c32943cf3b79d5a2e591c885bd6fc9314883e
 SHA512:
-  metadata.gz: 244026aae6ce13d8e932deada3c169b5320df517eb5dd7db5ea8c06c1cdedc9c9829d7f149261602f502c284ffe65ae831845016d9425250b0ad9d7d66fc6a0e
-  data.tar.gz: 91811c88a464604f5ca1e776f86d0342dc316ee016d920d40d8a228e2978f6a275783c5613a97cf21af1ba9256c951ad3db777b66fae20e5d1f8f9659f170301
+  metadata.gz: d3207ef919464e696d03fe7bbd264ba606565bf09d66796d461b687f047e8b2b969259bcffaf7524677d6b22398ba32025ca8f94c33756cda3a3bb37f535a902
+  data.tar.gz: 735dbd55e54237619bb9c6653818dade6df257fb71a58cc4abd540a3053d51d318d734ba149c3889bd6403a5219166c291534a66528b83e481ba1240e2696e49

data/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,15 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [0.6.0] - 2018-07-21
+### Added
+- [NEW OP] fill, floor_div, dynamic_stitch, mod, range, size, squared_difference
+### Fixes
+- [General] Some auto-differentation fixes
+- [softmax_cross_entropy_with_logits_v2] Use numerically stable way of calculating values
+- Other fixes related to shape computation
 ## [0.5.1] - 2018-06-27
 ### Added
 - Added support for control_dependencies

data/benchmark_ryzen_amd.txt ADDED Viewed

@@ -0,0 +1,36 @@
+TensorStream::Evaluator::OpenclEvaluator
+TensorStream::Evaluator::RubyEvaluator
+model name	: AMD Ryzen 3 1300X Quad-Core Processor
+OpenCL device AMD Accelerated Parallel Processing Ellesmere
+Rehearsal --------------------------------------------------------------
+pure ruby ooo matmul     :   1.480000   0.000000   1.480000 (  1.486855)
+opencl    ooo matmul     :   0.190000   0.130000   0.320000 (  0.332605)
+pure ruby softmax        :   0.280000   0.000000   0.280000 (  0.278398)
+opencl    softmax        :   0.040000   0.020000   0.060000 (  0.070980)
+pure ruby matmul         :   0.730000   0.000000   0.730000 (  0.726565)
+opencl    matmul         :   0.020000   0.010000   0.030000 (  0.046762)
+pure ruby                :   2.550000   0.000000   2.550000 (  2.544265)
+opencl                   :   0.290000   0.020000   0.310000 (  0.318674)
+pure ruby single function:   0.370000   0.000000   0.370000 (  0.374805)
+opencl     singlefunction:   0.190000   0.050000   0.240000 (  0.239913)
+pure ruby pow float:         0.090000   0.000000   0.090000 (  0.093488)
+opencl pow float:            0.100000   0.010000   0.110000 (  0.110532)
+pure ruby pow int:           0.030000   0.000000   0.030000 (  0.022236)
+opencl pow int:              0.090000   0.010000   0.100000 (  0.111199)
+----------------------------------------------------- total: 6.700000sec
+                                 user     system      total        real
+pure ruby ooo matmul     :   1.460000   0.000000   1.460000 (  1.468597)
+opencl    ooo matmul     :   0.040000   0.000000   0.040000 (  0.053625)
+pure ruby softmax        :   0.280000   0.000000   0.280000 (  0.280252)
+opencl    softmax        :   0.020000   0.010000   0.030000 (  0.043143)
+pure ruby matmul         :   0.700000   0.000000   0.700000 (  0.703540)
+opencl    matmul         :   0.030000   0.000000   0.030000 (  0.037716)
+pure ruby                :   2.540000   0.000000   2.540000 (  2.539661)
+opencl                   :   0.150000   0.000000   0.150000 (  0.164203)
+pure ruby single function:   0.350000   0.000000   0.350000 (  0.351883)
+opencl     singlefunction:   0.090000   0.010000   0.100000 (  0.092359)
+pure ruby pow float:         0.080000   0.000000   0.080000 (  0.080484)
+opencl pow float:            0.030000   0.000000   0.030000 (  0.032691)
+pure ruby pow int:           0.020000   0.000000   0.020000 (  0.019487)
+opencl pow int:              0.020000   0.000000   0.020000 (  0.026782)

data/lib/tensor_stream/dynamic_stitch.rb ADDED Viewed

@@ -0,0 +1,28 @@
+module TensorStream
+    # Defines a TensorStream controlflow op
+    class DynamicStitch < Operation
+      attr_accessor :ops
+      def initialize(flow_type, inputs, ops = nil, options = {})
+        setup_initial_state(options)
+        @operation = :"flow_#{flow_type}"
+        @inputs = inputs
+        @data_type = Tensor.detect_type(inputs[1])
+        @name = [@graph.get_name_scope, options[:name] || set_name].compact.join('/')
+        @ops = ops
+        @shape = TensorShape.new([inputs.size])
+        @graph.add_node(self)
+      end
+      def set_data_type(_passed_data_type)
+        :unknown
+      end
+      def run
+        eval
+      end
+    end
+  end

data/lib/tensor_stream/evaluator/base_evaluator.rb CHANGED Viewed

@@ -79,7 +79,6 @@ module TensorStream
             @ops[op.to_sym] = { options: options, block: block }
           end
         else
           @ops[opcode.to_sym] = { options: options, block: block }
         end
       end
@@ -87,16 +86,24 @@ module TensorStream
       ##
       # gets all supported ops for this Evaluator class
       def self.ops
-        @ops ||={}
+        @ops ||= {}
       end
       def invoke(tensor, execution_context)
+        return eval_tensor(tensor, execution_context) unless tensor.is_a?(Operation)
         if self.class.ops.key?(tensor.operation.to_sym)
           op = self.class.ops[tensor.operation.to_sym]
           op_options = op[:options]
           resolved_inputs = tensor.inputs.map do |i|
             next if i.nil?
-            if @context[:_cache][:placement][tensor.name] != @context[:_cache][:placement][i.name] # tensor is on another device or evaluator
+            if i.is_a?(Array)
+              next i.collect { |sub_item| sub_item.is_a?(Tensor) ? invoke(sub_item, execution_context) : sub_item }
+            end
+            if !op_options[:noop] && @context[:_cache][:placement][tensor.name] != @context[:_cache][:placement][i.name] # tensor is on another device or evaluator
               cache_key = "#{tensor.graph.object_id}_#{i.name}:#{object_id}"
               next @context[:_cache][cache_key] if @context[:_cache].key?(cache_key)
@@ -116,6 +123,28 @@ module TensorStream
       protected
+      def get_broadcast_gradient_args(input_a, input_b)
+        return [[], []] if input_a == input_b
+        input_a_args = []
+        input_b_args = []
+        input_a = input_b.size.times.map { |i| i < input_a.size ? input_a[i] : nil }.reverse if input_a.size < input_b.size
+        input_b = input_a.size.times.map { |i| i < input_b.size ? input_b[i] : nil }.reverse if input_a.size > input_b.size
+        input_a.reverse.zip(input_b.reverse).each_with_index do |item, index|
+          a, b = item
+          if a.nil? || b && (a < b)
+            input_a_args << input_b.size - index - 1
+          elsif b.nil? || a && (a > b)
+            input_b_args << input_a.size - index - 1
+          end
+        end
+        [input_a_args.reverse, input_b_args.reverse]
+      end
       ##
       # converts from a ruby Buffer object to the evaluator's native buffer format
       def convert_from_buffer(tensor, result)

data/lib/tensor_stream/evaluator/opencl/kernels/floor_div.cl ADDED Viewed

@@ -0,0 +1,48 @@
+% c_dtype = dtype_to_c_type(dtype)
+% fname = 'floor_div'
+% result_t = c_dtype
+ // same dimension add floating point op
+ __kernel void <%= fname%>_<%= dtype %>_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= result_t %> *C) {
+    // Get the index of the current element to be processed
+    const int globalRow = get_global_id(0); // Row ID of C (0..M)
+    const int globalCol = get_global_id(1); // Col ID of C (0..N)
+    C[globalRow * N + globalCol] = (int)(A[globalRow * N + globalCol] / B[globalRow * N + globalCol]);
+}
+ // 1D + Scalar floating point add op
+ __kernel void <%=fname%>_c_<%= dtype %>_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= result_t %> *C) {
+    // Get the index of the current element to be processed
+    const int globalRow = get_global_id(0); // Row ID of C (0..M)
+    const int globalCol = get_global_id(1); // Col ID of C (0..N)
+    if (switch_op == 0) {
+      C[globalRow * N + globalCol] = (int)(A[globalRow * N + globalCol] / B[0]);
+    } else {
+      C[globalRow * N + globalCol] = (int)(B[0] / A[globalRow * N + globalCol]);
+    }
+}
+ // 1D + Scalar floating point add op broadcast
+ __kernel void <%= fname%>_b_<%= dtype %>_<%= dtype %>(const int M, const int N, const int M2, const int N2, const int switch_op,__global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= result_t %> *C) {
+    // Get the index of the current element to be processed
+    const int globalRow = get_global_id(0); // Row ID of C (0..M)
+    const int globalCol = get_global_id(1); // Col ID of C (0..N)
+    int b_m_index = globalRow;
+    int b_n_index = globalCol;
+    if ( b_m_index >= M2) {
+      b_m_index = b_m_index % M2;
+    };
+    if (b_n_index >= N2) {
+      b_n_index = b_n_index % N2;
+    }
+    if (switch_op == 0) {
+      C[globalRow * N + globalCol] = (int)(A[globalRow * N + globalCol] / B[b_m_index * N2 + b_n_index]);
+    } else {
+      C[globalRow * N + globalCol] = (int)(B[b_m_index * N2 + b_n_index] / A[globalRow * N + globalCol]);
+    }
+}

data/lib/tensor_stream/evaluator/opencl/kernels/mod.cl ADDED Viewed

@@ -0,0 +1,3 @@
+% c_dtype = dtype_to_c_type(dtype)
+% op = operator_to_c('mod')
+<%= render 'operand.cl', c_dtype: c_dtype, op: op, fname: 'mod', dtype: "#{a}_#{b}", result_t: c_dtype %>

data/lib/tensor_stream/evaluator/opencl/kernels/squared_difference.cl ADDED Viewed

@@ -0,0 +1,53 @@
+% c_dtype = dtype_to_c_type(dtype)
+ // same dimension add floating point op
+ __kernel void squared_difference_<%= dtype %>_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= c_dtype %> *C) {
+    // Get the index of the current element to be processed
+    const int globalRow = get_global_id(0); // Row ID of C (0..M)
+    const int globalCol = get_global_id(1); // Col ID of C (0..N)
+    <%= c_dtype %> x = A[globalRow * N + globalCol];
+    <%= c_dtype %> y = B[globalRow * N + globalCol];
+    C[globalRow * N + globalCol] = (x - y) * (x - y);
+}
+ // 1D + Scalar floating point add op
+ __kernel void squared_difference_c_<%= dtype %>_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= c_dtype %> *C) {
+    // Get the index of the current element to be processed
+    const int globalRow = get_global_id(0); // Row ID of C (0..M)
+    const int globalCol = get_global_id(1); // Col ID of C (0..N)
+    <%= c_dtype %> x = A[globalRow * N + globalCol];
+    <%= c_dtype %> y = B[0];
+    if (switch_op == 0) {
+      C[globalRow * N + globalCol] = (x - y) * (x - y);
+    } else {
+      C[globalRow * N + globalCol] = (y - x) * (y - x);
+    }
+}
+ // 1D + Scalar floating point add op broadcast
+ __kernel void squared_difference_b_<%= dtype %>_<%= dtype %>(const int M, const int N, const int M2, const int N2, const int switch_op,__global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= c_dtype %> *C) {
+    // Get the index of the current element to be processed
+    const int globalRow = get_global_id(0); // Row ID of C (0..M)
+    const int globalCol = get_global_id(1); // Col ID of C (0..N)
+    int b_m_index = globalRow;
+    int b_n_index = globalCol;
+    if ( b_m_index >= M2) {
+      b_m_index = b_m_index % M2;
+    };
+    if (b_n_index >= N2) {
+      b_n_index = b_n_index % N2;
+    }
+    <%= c_dtype %> x = A[globalRow * N + globalCol];
+    <%= c_dtype %> y = B[b_m_index * N2 + b_n_index];
+    if (switch_op == 0) {
+      C[globalRow * N + globalCol] = (x - y) * (x - y);
+    } else {
+      C[globalRow * N + globalCol] = (y - x) * (y - x);
+    }
+}

data/lib/tensor_stream/evaluator/opencl/opencl_buffer.rb CHANGED Viewed

@@ -25,8 +25,7 @@ module TensorStream
         op.command_queue.finish
         self.dirty = false
       end
-      result = buffer.reshape(*shape.reverse).to_a
+      result = buffer.reshape(*shape.map { |s| s.to_i}.reverse).to_a
       if data_type == :boolean
         result = process_function_op(result, ->(a, _b) { a != 0 })

data/lib/tensor_stream/evaluator/opencl/opencl_evaluator.rb CHANGED Viewed

@@ -109,7 +109,9 @@ module TensorStream
             b
           end
         else
-          return buffer if buffer.nil? || buffer.buffer.size.zero?
+          return buffer if buffer.nil?
+          return [] if buffer.buffer.nil?
+          return buffer if buffer.buffer.size.zero?
           _opencl_queue.enqueue_read_buffer(buffer.cl_buffer, buffer.buffer, event_wait_list: [buffer.op].compact)
         end
         _opencl_queue.finish
@@ -202,6 +204,7 @@ module TensorStream
         suffix = args.collect { |k,v| "#{k}.#{v}"}.join('.')
         @context[:_cache]["_opencl_kernel_#{kernel}.#{suffix}:#{object_id}"] ||= begin
           filename = %w[cl.erb cl].map { |ext| cl_template_path(kernel, ext) }.find { |n| File.exist?(n) }
+          raise "opencl kernel template for #{kernel} has not yet been defined" if filename.nil?
           source = File.read(filename)
           source = OpenclTemplateHelper.new(source).generate(args)
           # File.write("/tmp/#{kernel}.#{suffix}.cl", source)
@@ -251,13 +254,13 @@ module TensorStream
         execute_func('log', tensor, inputs[0], context)
       end
-      register_op :cond do |context, tensor, inputs|
+      register_op :cond, noop: true do |context, tensor, inputs|
         pred = complete_eval(tensor.options[:pred], context)
         if all_true?(pred.buffer)
-          inputs[0]
+          complete_eval(inputs[0], context)
         else
-          inputs[1]
+          complete_eval(inputs[1], context)
         end
       end
@@ -285,12 +288,20 @@ module TensorStream
         end
       end
-      %i[max add div sub mul pow sigmoid_grad].each do |op|
+      %i[max add div sub mod mul pow sigmoid_grad squared_difference].each do |op|
         register_op op, noop: true do |context, tensor, inputs|
           execute_2_operand_func(op.to_s, tensor, inputs[0], inputs[1], context)
         end
       end
+      register_op :floor_div, noop: true do |context, tensor, inputs|
+        if fp_type?(tensor.data_type)
+          execute_2_operand_func('floor_div', tensor, inputs[0], inputs[1], context)
+        else
+          execute_2_operand_func('div', tensor, inputs[0], inputs[1], context)
+        end
+      end
       register_op :where, noop: true do |context, tensor, inputs|
         pred = tensor.options[:pred]
         execute_cond_func('where', tensor, pred, inputs[0], inputs[1], context)
@@ -479,11 +490,12 @@ module TensorStream
       end
       register_op :broadcast_gradient_args, buffer: true do |_context, tensor, inputs|
-        wrap_opencl(get_broadcast_gradient_args(inputs[0].buffer.to_a, inputs[1].buffer.to_a), data_type: inputs[0].data_type, name: tensor.name)
+        rx, ry = get_broadcast_gradient_args(inputs[0].buffer.to_a, inputs[1].buffer.to_a)
+        [ wrap_opencl(rx, data_type: :int32, name: "#{tensor.name}"), wrap_opencl(ry, data_type: :int32, name: "#{tensor.name}:1")]
       end
       register_op :shape do |_context, tensor, inputs|
-        wrap_opencl(inputs[0].shape, name: tensor.name, data_type: tensor.options[:out_type] || :float32)
+        wrap_opencl(inputs[0].shape, name: tensor.name, data_type: tensor.data_type)
       end
       register_op :reshape, buffer: true do |_context, _tensor, inputs|
@@ -504,6 +516,10 @@ module TensorStream
         inputs
       end
+      register_op :size do |_context, tensor, inputs|
+        wrap_opencl(inputs[0].buffer.size, name: tensor.name, data_type: tensor.options[:out_type] || :int32)
+      end
       %i[sum mean].each do |op|
         register_op op, noop: true do |context, tensor, inputs|
           reduction(context, tensor, inputs[0], inputs[1], op.to_sym)
@@ -534,8 +550,9 @@ module TensorStream
       end
       def eval_operation(tensor, child_context)
-        return @context[tensor.name] if @context.key?(tensor.name)
         cache_key = "#{tensor.graph.object_id}_opencl_#{tensor.name}:#{object_id}"
+        return @context[:_cache][cache_key] if @context[:_cache].key?(cache_key)
         return @context[cache_key] if @context.key?(cache_key)
          # puts tensor.name
         invoke(tensor, child_context).tap do |result|
@@ -559,8 +576,8 @@ module TensorStream
               value: result
             }
           end
-          @context[:_cache][cache_key] =  @context[cache_key] if tensor.is_const
-          @context[tensor.name] = result
+          @context[cache_key] = result
+          @context[:_cache][cache_key] = result if tensor.is_const
         end
       rescue EvaluatorExcecutionException => e
         raise e
@@ -628,6 +645,7 @@ module TensorStream
         a, b = auto_type_cast(a, b, name: "#{tensor.name}/cast_#{a.name}_#{b.data_type}")
         dtype = tensor.data_type
         result_shape = TensorShape.infer_shape(a.shape, b.shape)
+        return _create_result_buffer(dtype, [0], "out_#{tensor.name}") if result_shape == [0]
         output_buffer = _create_result_buffer(tensor.data_type, result_shape, "out_#{tensor.name}")
         a, b, prog, switch_operands = select_program(a, b, op_name)
@@ -799,8 +817,9 @@ module TensorStream
       end
       def _create_result_buffer(data_type, shape, name)
+        return OpenCLBuffer.new(data_type: data_type, shape: [0], buffer: nil, cl_buffer: nil) if shape == [0]
         @context[:_cache][:_cl_buffers]["_result_#{name}_#{shape.join('_')}:#{object_id}"] ||= begin
-          size = shape.empty? ? 1 : shape.reduce(:*)
+          size = shape.empty? || shape == [0] ? 1 : shape.reduce(:*)
           buffer =  allocate_narray_for_type(data_type, size)
           cl_buffer = _opencl_context.create_buffer(buffer.size * buffer.element_size)
           OpenCLBuffer.new(data_type: data_type, shape: shape, buffer: buffer, cl_buffer: cl_buffer)
@@ -840,6 +859,17 @@ module TensorStream
         end
       end
+      def _reduced_shape(input_shape, axes)
+        return [] if axes.nil? # reduce to scalar
+        axes = [ axes ] unless axes.is_a?(Array)
+        return input_shape if axes.empty?
+        axes.each do |dimen|
+          input_shape[dimen] = 1
+        end
+        input_shape
+      end
       def reduction(child_context, tensor, a, b, func)
         input = complete_eval(a, child_context)
         axis = read_final_result(complete_eval(b, child_context))
@@ -853,7 +883,8 @@ module TensorStream
           if axis.is_a?(Array)
             axis.map{ |x| rank - x.abs }.sort.reverse.each do |x|
-              value = value.send(func, x)
+              value = value.send(func, x.to_i)
             end
           else
             value = value.send(func, rank - axis.abs)
@@ -867,7 +898,7 @@ module TensorStream
           end
           if tensor.options[:keepdims]
-            new_shape = reduced_shape(input.shape.dup, axis)
+            new_shape = _reduced_shape(input.shape.dup, axis)
           end
           convert_to_opencl(value.flatten, new_shape, data_type: tensor.data_type, name: tensor.name)
@@ -948,17 +979,6 @@ module TensorStream
         shape.is_a?(Array) ? shape.size : 0
       end
-      def get_broadcast_gradient_args(input_a, input_b)
-        return [] if get_rank(input_b).zero? && get_rank(input_a).zero?
-        return nil if get_rank(input_b).zero?
-        # ruby scalar
-        if get_rank(input_a).zero?
-          _broadcast_gradient_op(input_b, input_a, 0, true)
-        elsif get_rank(input_a) > 0
-          _broadcast_gradient_op(input_a, input_b, 0)
-        end
-      end
       def concat_array(values, axis)
         combined_array = values.shift
         axis = get_rank(combined_array) - 1 if axis == -1