RubyGems - tensor_stream - Versions diffs - 0.5.1 → 0.6.0 - Mend

tensor_stream 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

checksums.yaml +5 -5
data/CHANGELOG.md +9 -0
data/benchmark_ryzen_amd.txt +36 -0
data/lib/tensor_stream/dynamic_stitch.rb +28 -0
data/lib/tensor_stream/evaluator/base_evaluator.rb +32 -3
data/lib/tensor_stream/evaluator/opencl/kernels/floor_div.cl +48 -0
data/lib/tensor_stream/evaluator/opencl/kernels/mod.cl +3 -0
data/lib/tensor_stream/evaluator/opencl/kernels/squared_difference.cl +53 -0
data/lib/tensor_stream/evaluator/opencl/opencl_buffer.rb +1 -2
data/lib/tensor_stream/evaluator/opencl/opencl_evaluator.rb +44 -24
data/lib/tensor_stream/evaluator/opencl/opencl_template_helper.rb +2 -0
data/lib/tensor_stream/evaluator/operation_helpers/array_ops_helper.rb +21 -11
data/lib/tensor_stream/evaluator/ruby_evaluator.rb +165 -48
data/lib/tensor_stream/graph_serializers/pbtext.rb +8 -0
data/lib/tensor_stream/helpers/op_helper.rb +41 -4
data/lib/tensor_stream/math_gradients.rb +64 -64
data/lib/tensor_stream/nn/nn_ops.rb +6 -2
data/lib/tensor_stream/operation.rb +17 -3
data/lib/tensor_stream/ops.rb +47 -0
data/lib/tensor_stream/session.rb +9 -1
data/lib/tensor_stream/tensor.rb +15 -0
data/lib/tensor_stream/utils.rb +5 -1
data/lib/tensor_stream/version.rb +1 -1
data/lib/tensor_stream.rb +1 -0
data/samples/nearest_neighbor.rb +1 -1
data/test_samples/raw_neural_net_sample.rb +6 -7
metadata +8 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
-SHA1:
-  metadata.gz: f14dd6388d5cdd10827cebde01a9cbca0686b653
-  data.tar.gz: d2ccba35defe6474a21bd75fcb09f8d49ce42e79
+SHA256:
+  metadata.gz: 6d647cef8f32fa7b3c10460365adfc55ccdd9872e71d453df090986349b615f5
+  data.tar.gz: baa7be71775bc5d39396343b6d4c32943cf3b79d5a2e591c885bd6fc9314883e
 SHA512:
-  metadata.gz: 244026aae6ce13d8e932deada3c169b5320df517eb5dd7db5ea8c06c1cdedc9c9829d7f149261602f502c284ffe65ae831845016d9425250b0ad9d7d66fc6a0e
-  data.tar.gz: 91811c88a464604f5ca1e776f86d0342dc316ee016d920d40d8a228e2978f6a275783c5613a97cf21af1ba9256c951ad3db777b66fae20e5d1f8f9659f170301
+  metadata.gz: d3207ef919464e696d03fe7bbd264ba606565bf09d66796d461b687f047e8b2b969259bcffaf7524677d6b22398ba32025ca8f94c33756cda3a3bb37f535a902
+  data.tar.gz: 735dbd55e54237619bb9c6653818dade6df257fb71a58cc4abd540a3053d51d318d734ba149c3889bd6403a5219166c291534a66528b83e481ba1240e2696e49

data/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,15 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [0.6.0] - 2018-07-21
+### Added
+- [NEW OP] fill, floor_div, dynamic_stitch, mod, range, size, squared_difference
+### Fixes
+- [General] Some auto-differentation fixes
+- [softmax_cross_entropy_with_logits_v2] Use numerically stable way of calculating values
+- Other fixes related to shape computation
 ## [0.5.1] - 2018-06-27
 ### Added
 - Added support for control_dependencies

data/benchmark_ryzen_amd.txt ADDED Viewed

@@ -0,0 +1,36 @@
+TensorStream::Evaluator::OpenclEvaluator
+TensorStream::Evaluator::RubyEvaluator
+model name	: AMD Ryzen 3 1300X Quad-Core Processor
+OpenCL device AMD Accelerated Parallel Processing Ellesmere
+Rehearsal --------------------------------------------------------------
+pure ruby ooo matmul     :   1.480000   0.000000   1.480000 (  1.486855)
+opencl    ooo matmul     :   0.190000   0.130000   0.320000 (  0.332605)
+pure ruby softmax        :   0.280000   0.000000   0.280000 (  0.278398)
+opencl    softmax        :   0.040000   0.020000   0.060000 (  0.070980)
+pure ruby matmul         :   0.730000   0.000000   0.730000 (  0.726565)
+opencl    matmul         :   0.020000   0.010000   0.030000 (  0.046762)
+pure ruby                :   2.550000   0.000000   2.550000 (  2.544265)
+opencl                   :   0.290000   0.020000   0.310000 (  0.318674)
+pure ruby single function:   0.370000   0.000000   0.370000 (  0.374805)
+opencl     singlefunction:   0.190000   0.050000   0.240000 (  0.239913)
+pure ruby pow float:         0.090000   0.000000   0.090000 (  0.093488)
+opencl pow float:            0.100000   0.010000   0.110000 (  0.110532)
+pure ruby pow int:           0.030000   0.000000   0.030000 (  0.022236)
+opencl pow int:              0.090000   0.010000   0.100000 (  0.111199)
+----------------------------------------------------- total: 6.700000sec
+                                 user     system      total        real
+pure ruby ooo matmul     :   1.460000   0.000000   1.460000 (  1.468597)
+opencl    ooo matmul     :   0.040000   0.000000   0.040000 (  0.053625)
+pure ruby softmax        :   0.280000   0.000000   0.280000 (  0.280252)
+opencl    softmax        :   0.020000   0.010000   0.030000 (  0.043143)
+pure ruby matmul         :   0.700000   0.000000   0.700000 (  0.703540)
+opencl    matmul         :   0.030000   0.000000   0.030000 (  0.037716)
+pure ruby                :   2.540000   0.000000   2.540000 (  2.539661)
+opencl                   :   0.150000   0.000000   0.150000 (  0.164203)
+pure ruby single function:   0.350000   0.000000   0.350000 (  0.351883)
+opencl     singlefunction:   0.090000   0.010000   0.100000 (  0.092359)
+pure ruby pow float:         0.080000   0.000000   0.080000 (  0.080484)
+opencl pow float:            0.030000   0.000000   0.030000 (  0.032691)
+pure ruby pow int:           0.020000   0.000000   0.020000 (  0.019487)
+opencl pow int:              0.020000   0.000000   0.020000 (  0.026782)

data/lib/tensor_stream/dynamic_stitch.rb ADDED Viewed

@@ -0,0 +1,28 @@
+module TensorStream
+    # Defines a TensorStream controlflow op
+    class DynamicStitch < Operation
+      attr_accessor :ops
+      def initialize(flow_type, inputs, ops = nil, options = {})
+        setup_initial_state(options)
+        @operation = :"flow_#{flow_type}"
+        @inputs = inputs
+        @data_type = Tensor.detect_type(inputs[1])
+        @name = [@graph.get_name_scope, options[:name] || set_name].compact.join('/')
+        @ops = ops
+        @shape = TensorShape.new([inputs.size])
+        @graph.add_node(self)
+      end
+      def set_data_type(_passed_data_type)
+        :unknown
+      end
+      def run
+        eval
+      end
+    end
+  end

data/lib/tensor_stream/evaluator/base_evaluator.rb CHANGED Viewed

@@ -79,7 +79,6 @@ module TensorStream
             @ops[op.to_sym] = { options: options, block: block }
           end
         else
           @ops[opcode.to_sym] = { options: options, block: block }
         end
       end
@@ -87,16 +86,24 @@ module TensorStream
       ##
       # gets all supported ops for this Evaluator class
       def self.ops
-        @ops ||={}
+        @ops ||= {}
       end
       def invoke(tensor, execution_context)
+        return eval_tensor(tensor, execution_context) unless tensor.is_a?(Operation)
         if self.class.ops.key?(tensor.operation.to_sym)
           op = self.class.ops[tensor.operation.to_sym]
           op_options = op[:options]
           resolved_inputs = tensor.inputs.map do |i|
             next if i.nil?
-            if @context[:_cache][:placement][tensor.name] != @context[:_cache][:placement][i.name] # tensor is on another device or evaluator
+            if i.is_a?(Array)
+              next i.collect { |sub_item| sub_item.is_a?(Tensor) ? invoke(sub_item, execution_context) : sub_item }
+            end
+            if !op_options[:noop] && @context[:_cache][:placement][tensor.name] != @context[:_cache][:placement][i.name] # tensor is on another device or evaluator
               cache_key = "#{tensor.graph.object_id}_#{i.name}:#{object_id}"
               next @context[:_cache][cache_key] if @context[:_cache].key?(cache_key)
@@ -116,6 +123,28 @@ module TensorStream
       protected
+      def get_broadcast_gradient_args(input_a, input_b)
+        return [[], []] if input_a == input_b
+        input_a_args = []
+        input_b_args = []
+        input_a = input_b.size.times.map { |i| i < input_a.size ? input_a[i] : nil }.reverse if input_a.size < input_b.size
+        input_b = input_a.size.times.map { |i| i < input_b.size ? input_b[i] : nil }.reverse if input_a.size > input_b.size
+        input_a.reverse.zip(input_b.reverse).each_with_index do |item, index|
+          a, b = item
+          if a.nil? || b && (a < b)
+            input_a_args << input_b.size - index - 1
+          elsif b.nil? || a && (a > b)
+            input_b_args << input_a.size - index - 1
+          end
+        end
+        [input_a_args.reverse, input_b_args.reverse]
+      end
       ##
       # converts from a ruby Buffer object to the evaluator's native buffer format
       def convert_from_buffer(tensor, result)

data/lib/tensor_stream/evaluator/opencl/kernels/floor_div.cl ADDED Viewed

@@ -0,0 +1,48 @@
+% c_dtype = dtype_to_c_type(dtype)
+% fname = 'floor_div'
+% result_t = c_dtype
+ // same dimension add floating point op
+ __kernel void <%= fname%>_<%= dtype %>_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= result_t %> *C) {
+    // Get the index of the current element to be processed
+    const int globalRow = get_global_id(0); // Row ID of C (0..M)
+    const int globalCol = get_global_id(1); // Col ID of C (0..N)
+    C[globalRow * N + globalCol] = (int)(A[globalRow * N + globalCol] / B[globalRow * N + globalCol]);
+}
+ // 1D + Scalar floating point add op
+ __kernel void <%=fname%>_c_<%= dtype %>_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= result_t %> *C) {
+    // Get the index of the current element to be processed
+    const int globalRow = get_global_id(0); // Row ID of C (0..M)
+    const int globalCol = get_global_id(1); // Col ID of C (0..N)
+    if (switch_op == 0) {
+      C[globalRow * N + globalCol] = (int)(A[globalRow * N + globalCol] / B[0]);
+    } else {
+      C[globalRow * N + globalCol] = (int)(B[0] / A[globalRow * N + globalCol]);
+    }
+}
+ // 1D + Scalar floating point add op broadcast
+ __kernel void <%= fname%>_b_<%= dtype %>_<%= dtype %>(const int M, const int N, const int M2, const int N2, const int switch_op,__global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= result_t %> *C) {
+    // Get the index of the current element to be processed
+    const int globalRow = get_global_id(0); // Row ID of C (0..M)
+    const int globalCol = get_global_id(1); // Col ID of C (0..N)
+    int b_m_index = globalRow;
+    int b_n_index = globalCol;
+    if ( b_m_index >= M2) {
+      b_m_index = b_m_index % M2;
+    };
+    if (b_n_index >= N2) {
+      b_n_index = b_n_index % N2;
+    }
+    if (switch_op == 0) {
+      C[globalRow * N + globalCol] = (int)(A[globalRow * N + globalCol] / B[b_m_index * N2 + b_n_index]);
+    } else {
+      C[globalRow * N + globalCol] = (int)(B[b_m_index * N2 + b_n_index] / A[globalRow * N + globalCol]);
+    }
+}

data/lib/tensor_stream/evaluator/opencl/kernels/mod.cl ADDED Viewed

@@ -0,0 +1,3 @@
+% c_dtype = dtype_to_c_type(dtype)
+% op = operator_to_c('mod')
+<%= render 'operand.cl', c_dtype: c_dtype, op: op, fname: 'mod', dtype: "#{a}_#{b}", result_t: c_dtype %>

data/lib/tensor_stream/evaluator/opencl/kernels/squared_difference.cl ADDED Viewed

@@ -0,0 +1,53 @@
+% c_dtype = dtype_to_c_type(dtype)
+ // same dimension add floating point op
+ __kernel void squared_difference_<%= dtype %>_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= c_dtype %> *C) {
+    // Get the index of the current element to be processed
+    const int globalRow = get_global_id(0); // Row ID of C (0..M)
+    const int globalCol = get_global_id(1); // Col ID of C (0..N)
+    <%= c_dtype %> x = A[globalRow * N + globalCol];
+    <%= c_dtype %> y = B[globalRow * N + globalCol];
+    C[globalRow * N + globalCol] = (x - y) * (x - y);
+}
+ // 1D + Scalar floating point add op
+ __kernel void squared_difference_c_<%= dtype %>_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= c_dtype %> *C) {
+    // Get the index of the current element to be processed
+    const int globalRow = get_global_id(0); // Row ID of C (0..M)
+    const int globalCol = get_global_id(1); // Col ID of C (0..N)
+    <%= c_dtype %> x = A[globalRow * N + globalCol];
+    <%= c_dtype %> y = B[0];
+    if (switch_op == 0) {
+      C[globalRow * N + globalCol] = (x - y) * (x - y);
+    } else {
+      C[globalRow * N + globalCol] = (y - x) * (y - x);
+    }
+}
+ // 1D + Scalar floating point add op broadcast
+ __kernel void squared_difference_b_<%= dtype %>_<%= dtype %>(const int M, const int N, const int M2, const int N2, const int switch_op,__global const <%= c_dtype %> *A, __global <%= c_dtype %> *B, __global <%= c_dtype %> *C) {
+    // Get the index of the current element to be processed
+    const int globalRow = get_global_id(0); // Row ID of C (0..M)
+    const int globalCol = get_global_id(1); // Col ID of C (0..N)
+    int b_m_index = globalRow;
+    int b_n_index = globalCol;
+    if ( b_m_index >= M2) {
+      b_m_index = b_m_index % M2;
+    };
+    if (b_n_index >= N2) {
+      b_n_index = b_n_index % N2;
+    }
+    <%= c_dtype %> x = A[globalRow * N + globalCol];
+    <%= c_dtype %> y = B[b_m_index * N2 + b_n_index];
+    if (switch_op == 0) {
+      C[globalRow * N + globalCol] = (x - y) * (x - y);
+    } else {
+      C[globalRow * N + globalCol] = (y - x) * (y - x);
+    }
+}

data/lib/tensor_stream/evaluator/opencl/opencl_buffer.rb CHANGED Viewed

@@ -25,8 +25,7 @@ module TensorStream
         op.command_queue.finish
         self.dirty = false
       end
-      result = buffer.reshape(*shape.reverse).to_a
+      result = buffer.reshape(*shape.map { |s| s.to_i}.reverse).to_a
       if data_type == :boolean
         result = process_function_op(result, ->(a, _b) { a != 0 })

data/lib/tensor_stream/evaluator/opencl/opencl_evaluator.rb CHANGED Viewed

@@ -109,7 +109,9 @@ module TensorStream
             b
           end
         else
-          return buffer if buffer.nil? || buffer.buffer.size.zero?
+          return buffer if buffer.nil?
+          return [] if buffer.buffer.nil?
+          return buffer if buffer.buffer.size.zero?
           _opencl_queue.enqueue_read_buffer(buffer.cl_buffer, buffer.buffer, event_wait_list: [buffer.op].compact)
         end
         _opencl_queue.finish
@@ -202,6 +204,7 @@ module TensorStream
         suffix = args.collect { |k,v| "#{k}.#{v}"}.join('.')
         @context[:_cache]["_opencl_kernel_#{kernel}.#{suffix}:#{object_id}"] ||= begin
           filename = %w[cl.erb cl].map { |ext| cl_template_path(kernel, ext) }.find { |n| File.exist?(n) }
+          raise "opencl kernel template for #{kernel} has not yet been defined" if filename.nil?
           source = File.read(filename)
           source = OpenclTemplateHelper.new(source).generate(args)
           # File.write("/tmp/#{kernel}.#{suffix}.cl", source)
@@ -251,13 +254,13 @@ module TensorStream
         execute_func('log', tensor, inputs[0], context)
       end
-      register_op :cond do |context, tensor, inputs|
+      register_op :cond, noop: true do |context, tensor, inputs|
         pred = complete_eval(tensor.options[:pred], context)
         if all_true?(pred.buffer)
-          inputs[0]
+          complete_eval(inputs[0], context)
         else
-          inputs[1]
+          complete_eval(inputs[1], context)
         end
       end
@@ -285,12 +288,20 @@ module TensorStream
         end
       end
-      %i[max add div sub mul pow sigmoid_grad].each do |op|
+      %i[max add div sub mod mul pow sigmoid_grad squared_difference].each do |op|
         register_op op, noop: true do |context, tensor, inputs|
           execute_2_operand_func(op.to_s, tensor, inputs[0], inputs[1], context)
         end
       end
+      register_op :floor_div, noop: true do |context, tensor, inputs|
+        if fp_type?(tensor.data_type)
+          execute_2_operand_func('floor_div', tensor, inputs[0], inputs[1], context)
+        else
+          execute_2_operand_func('div', tensor, inputs[0], inputs[1], context)
+        end
+      end
       register_op :where, noop: true do |context, tensor, inputs|
         pred = tensor.options[:pred]
         execute_cond_func('where', tensor, pred, inputs[0], inputs[1], context)
@@ -479,11 +490,12 @@ module TensorStream
       end
       register_op :broadcast_gradient_args, buffer: true do |_context, tensor, inputs|
-        wrap_opencl(get_broadcast_gradient_args(inputs[0].buffer.to_a, inputs[1].buffer.to_a), data_type: inputs[0].data_type, name: tensor.name)
+        rx, ry = get_broadcast_gradient_args(inputs[0].buffer.to_a, inputs[1].buffer.to_a)
+        [ wrap_opencl(rx, data_type: :int32, name: "#{tensor.name}"), wrap_opencl(ry, data_type: :int32, name: "#{tensor.name}:1")]
       end
       register_op :shape do |_context, tensor, inputs|
-        wrap_opencl(inputs[0].shape, name: tensor.name, data_type: tensor.options[:out_type] || :float32)
+        wrap_opencl(inputs[0].shape, name: tensor.name, data_type: tensor.data_type)
       end
       register_op :reshape, buffer: true do |_context, _tensor, inputs|
@@ -504,6 +516,10 @@ module TensorStream
         inputs
       end
+      register_op :size do |_context, tensor, inputs|
+        wrap_opencl(inputs[0].buffer.size, name: tensor.name, data_type: tensor.options[:out_type] || :int32)
+      end
       %i[sum mean].each do |op|
         register_op op, noop: true do |context, tensor, inputs|
           reduction(context, tensor, inputs[0], inputs[1], op.to_sym)
@@ -534,8 +550,9 @@ module TensorStream
       end
       def eval_operation(tensor, child_context)
-        return @context[tensor.name] if @context.key?(tensor.name)
         cache_key = "#{tensor.graph.object_id}_opencl_#{tensor.name}:#{object_id}"
+        return @context[:_cache][cache_key] if @context[:_cache].key?(cache_key)
         return @context[cache_key] if @context.key?(cache_key)
          # puts tensor.name
         invoke(tensor, child_context).tap do |result|
@@ -559,8 +576,8 @@ module TensorStream
               value: result
             }
           end
-          @context[:_cache][cache_key] =  @context[cache_key] if tensor.is_const
-          @context[tensor.name] = result
+          @context[cache_key] = result
+          @context[:_cache][cache_key] = result if tensor.is_const
         end
       rescue EvaluatorExcecutionException => e
         raise e
@@ -628,6 +645,7 @@ module TensorStream
         a, b = auto_type_cast(a, b, name: "#{tensor.name}/cast_#{a.name}_#{b.data_type}")
         dtype = tensor.data_type
         result_shape = TensorShape.infer_shape(a.shape, b.shape)
+        return _create_result_buffer(dtype, [0], "out_#{tensor.name}") if result_shape == [0]
         output_buffer = _create_result_buffer(tensor.data_type, result_shape, "out_#{tensor.name}")
         a, b, prog, switch_operands = select_program(a, b, op_name)
@@ -799,8 +817,9 @@ module TensorStream
       end
       def _create_result_buffer(data_type, shape, name)
+        return OpenCLBuffer.new(data_type: data_type, shape: [0], buffer: nil, cl_buffer: nil) if shape == [0]
         @context[:_cache][:_cl_buffers]["_result_#{name}_#{shape.join('_')}:#{object_id}"] ||= begin
-          size = shape.empty? ? 1 : shape.reduce(:*)
+          size = shape.empty? || shape == [0] ? 1 : shape.reduce(:*)
           buffer =  allocate_narray_for_type(data_type, size)
           cl_buffer = _opencl_context.create_buffer(buffer.size * buffer.element_size)
           OpenCLBuffer.new(data_type: data_type, shape: shape, buffer: buffer, cl_buffer: cl_buffer)
@@ -840,6 +859,17 @@ module TensorStream
         end
       end
+      def _reduced_shape(input_shape, axes)
+        return [] if axes.nil? # reduce to scalar
+        axes = [ axes ] unless axes.is_a?(Array)
+        return input_shape if axes.empty?
+        axes.each do |dimen|
+          input_shape[dimen] = 1
+        end
+        input_shape
+      end
       def reduction(child_context, tensor, a, b, func)
         input = complete_eval(a, child_context)
         axis = read_final_result(complete_eval(b, child_context))
@@ -853,7 +883,8 @@ module TensorStream
           if axis.is_a?(Array)
             axis.map{ |x| rank - x.abs }.sort.reverse.each do |x|
-              value = value.send(func, x)
+              value = value.send(func, x.to_i)
             end
           else
             value = value.send(func, rank - axis.abs)
@@ -867,7 +898,7 @@ module TensorStream
           end
           if tensor.options[:keepdims]
-            new_shape = reduced_shape(input.shape.dup, axis)
+            new_shape = _reduced_shape(input.shape.dup, axis)
           end
           convert_to_opencl(value.flatten, new_shape, data_type: tensor.data_type, name: tensor.name)
@@ -948,17 +979,6 @@ module TensorStream
         shape.is_a?(Array) ? shape.size : 0
       end
-      def get_broadcast_gradient_args(input_a, input_b)
-        return [] if get_rank(input_b).zero? && get_rank(input_a).zero?
-        return nil if get_rank(input_b).zero?
-        # ruby scalar
-        if get_rank(input_a).zero?
-          _broadcast_gradient_op(input_b, input_a, 0, true)
-        elsif get_rank(input_a) > 0
-          _broadcast_gradient_op(input_a, input_b, 0)
-        end
-      end
       def concat_array(values, axis)
         combined_array = values.shift
         axis = get_rank(combined_array) - 1 if axis == -1